1 /*-------------------------------------------------------------------------
4 * This file contains functions for encoding conversion.
6 * The string-conversion functions in this file share some API quirks.
9 * The functions return a palloc'd, null-terminated string if conversion
10 * is required. However, if no conversion is performed, the given source
11 * string pointer is returned as-is.
13 * Although the presence of a length argument means that callers can pass
14 * non-null-terminated strings, care is required because the same string
15 * will be passed back if no conversion occurs. Such callers *must* check
16 * whether result == src and handle that case differently.
18 * If the source and destination encodings are the same, the source string
19 * is returned without any verification; it's assumed to be valid data.
20 * If that might not be the case, the caller is responsible for validating
21 * the string using a separate call to pg_verify_mbstr(). Whenever the
22 * source and destination encodings are different, the functions ensure that
23 * the result is validly encoded according to the destination encoding.
26 * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
27 * Portions Copyright (c) 1994, Regents of the University of California
31 * src/backend/utils/mb/mbutils.c
33 *-------------------------------------------------------------------------
37 #include "access/xact.h"
38 #include "catalog/namespace.h"
39 #include "mb/pg_wchar.h"
40 #include "utils/builtins.h"
41 #include "utils/memutils.h"
42 #include "utils/syscache.h"
46 * We maintain a simple linked list caching the fmgr lookup info for the
47 * currently selected conversion functions, as well as any that have been
48 * selected previously in the current session. (We remember previous
49 * settings because we must be able to restore a previous setting during
50 * transaction rollback, without doing any fresh catalog accesses.)
52 * Since we'll never release this data, we just keep it in TopMemoryContext.
54 typedef struct ConvProcInfo
56 int s_encoding
; /* server and client encoding IDs */
58 FmgrInfo to_server_info
; /* lookup info for conversion procs */
59 FmgrInfo to_client_info
;
62 static List
*ConvProcList
= NIL
; /* List of ConvProcInfo */
65 * These variables point to the currently active conversion functions,
66 * or are NULL when no conversion is needed.
68 static FmgrInfo
*ToServerConvProc
= NULL
;
69 static FmgrInfo
*ToClientConvProc
= NULL
;
72 * This variable stores the conversion function to convert from UTF-8
73 * to the server encoding. It's NULL if the server encoding *is* UTF-8,
74 * or if we lack a conversion function for this.
76 static FmgrInfo
*Utf8ToServerConvProc
= NULL
;
79 * These variables track the currently-selected encodings.
81 static const pg_enc2name
*ClientEncoding
= &pg_enc2name_tbl
[PG_SQL_ASCII
];
82 static const pg_enc2name
*DatabaseEncoding
= &pg_enc2name_tbl
[PG_SQL_ASCII
];
83 static const pg_enc2name
*MessageEncoding
= &pg_enc2name_tbl
[PG_SQL_ASCII
];
86 * During backend startup we can't set client encoding because we (a)
87 * can't look up the conversion functions, and (b) may not know the database
88 * encoding yet either. So SetClientEncoding() just accepts anything and
89 * remembers it for InitializeClientEncoding() to apply later.
91 static bool backend_startup_complete
= false;
92 static int pending_client_encoding
= PG_SQL_ASCII
;
95 /* Internal functions */
96 static char *perform_default_encoding_conversion(const char *src
,
97 int len
, bool is_client_to_server
);
98 static int cliplen(const char *str
, int len
, int limit
);
102 * Prepare for a future call to SetClientEncoding. Success should mean
103 * that SetClientEncoding is guaranteed to succeed for this encoding request.
105 * (But note that success before backend_startup_complete does not guarantee
108 * Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
111 PrepareClientEncoding(int encoding
)
113 int current_server_encoding
;
116 if (!PG_VALID_FE_ENCODING(encoding
))
119 /* Can't do anything during startup, per notes above */
120 if (!backend_startup_complete
)
123 current_server_encoding
= GetDatabaseEncoding();
126 * Check for cases that require no conversion function.
128 if (current_server_encoding
== encoding
||
129 current_server_encoding
== PG_SQL_ASCII
||
130 encoding
== PG_SQL_ASCII
)
133 if (IsTransactionState())
136 * If we're in a live transaction, it's safe to access the catalogs,
137 * so look up the functions. We repeat the lookup even if the info is
138 * already cached, so that we can react to changes in the contents of
143 ConvProcInfo
*convinfo
;
144 MemoryContext oldcontext
;
146 to_server_proc
= FindDefaultConversionProc(encoding
,
147 current_server_encoding
);
148 if (!OidIsValid(to_server_proc
))
150 to_client_proc
= FindDefaultConversionProc(current_server_encoding
,
152 if (!OidIsValid(to_client_proc
))
156 * Load the fmgr info into TopMemoryContext (could still fail here)
158 convinfo
= (ConvProcInfo
*) MemoryContextAlloc(TopMemoryContext
,
159 sizeof(ConvProcInfo
));
160 convinfo
->s_encoding
= current_server_encoding
;
161 convinfo
->c_encoding
= encoding
;
162 fmgr_info_cxt(to_server_proc
, &convinfo
->to_server_info
,
164 fmgr_info_cxt(to_client_proc
, &convinfo
->to_client_info
,
167 /* Attach new info to head of list */
168 oldcontext
= MemoryContextSwitchTo(TopMemoryContext
);
169 ConvProcList
= lcons(convinfo
, ConvProcList
);
170 MemoryContextSwitchTo(oldcontext
);
173 * We cannot yet remove any older entry for the same encoding pair,
174 * since it could still be in use. SetClientEncoding will clean up.
177 return 0; /* success */
182 * If we're not in a live transaction, the only thing we can do is
183 * restore a previous setting using the cache. This covers all
184 * transaction-rollback cases. The only case it might not work for is
185 * trying to change client_encoding on the fly by editing
186 * postgresql.conf and SIGHUP'ing. Which would probably be a stupid
187 * thing to do anyway.
189 foreach(lc
, ConvProcList
)
191 ConvProcInfo
*oldinfo
= (ConvProcInfo
*) lfirst(lc
);
193 if (oldinfo
->s_encoding
== current_server_encoding
&&
194 oldinfo
->c_encoding
== encoding
)
198 return -1; /* it's not cached, so fail */
203 * Set the active client encoding and set up the conversion-function pointers.
204 * PrepareClientEncoding should have been called previously for this encoding.
206 * Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
209 SetClientEncoding(int encoding
)
211 int current_server_encoding
;
215 if (!PG_VALID_FE_ENCODING(encoding
))
218 /* Can't do anything during startup, per notes above */
219 if (!backend_startup_complete
)
221 pending_client_encoding
= encoding
;
225 current_server_encoding
= GetDatabaseEncoding();
228 * Check for cases that require no conversion function.
230 if (current_server_encoding
== encoding
||
231 current_server_encoding
== PG_SQL_ASCII
||
232 encoding
== PG_SQL_ASCII
)
234 ClientEncoding
= &pg_enc2name_tbl
[encoding
];
235 ToServerConvProc
= NULL
;
236 ToClientConvProc
= NULL
;
241 * Search the cache for the entry previously prepared by
242 * PrepareClientEncoding; if there isn't one, we lose. While at it,
243 * release any duplicate entries so that repeated Prepare/Set cycles don't
247 foreach(lc
, ConvProcList
)
249 ConvProcInfo
*convinfo
= (ConvProcInfo
*) lfirst(lc
);
251 if (convinfo
->s_encoding
== current_server_encoding
&&
252 convinfo
->c_encoding
== encoding
)
256 /* Found newest entry, so set up */
257 ClientEncoding
= &pg_enc2name_tbl
[encoding
];
258 ToServerConvProc
= &convinfo
->to_server_info
;
259 ToClientConvProc
= &convinfo
->to_client_info
;
264 /* Duplicate entry, release it */
265 ConvProcList
= foreach_delete_current(ConvProcList
, lc
);
272 return 0; /* success */
274 return -1; /* it's not cached, so fail */
278 * Initialize client encoding conversions.
279 * Called from InitPostgres() once during backend startup.
282 InitializeClientEncoding(void)
284 int current_server_encoding
;
286 Assert(!backend_startup_complete
);
287 backend_startup_complete
= true;
289 if (PrepareClientEncoding(pending_client_encoding
) < 0 ||
290 SetClientEncoding(pending_client_encoding
) < 0)
293 * Oops, the requested conversion is not available. We couldn't fail
294 * before, but we can now.
297 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
298 errmsg("conversion between %s and %s is not supported",
299 pg_enc2name_tbl
[pending_client_encoding
].name
,
300 GetDatabaseEncodingName())));
304 * Also look up the UTF8-to-server conversion function if needed. Since
305 * the server encoding is fixed within any one backend process, we don't
306 * have to do this more than once.
308 current_server_encoding
= GetDatabaseEncoding();
309 if (current_server_encoding
!= PG_UTF8
&&
310 current_server_encoding
!= PG_SQL_ASCII
)
312 Oid utf8_to_server_proc
;
314 Assert(IsTransactionState());
315 utf8_to_server_proc
=
316 FindDefaultConversionProc(PG_UTF8
,
317 current_server_encoding
);
318 /* If there's no such conversion, just leave the pointer as NULL */
319 if (OidIsValid(utf8_to_server_proc
))
323 finfo
= (FmgrInfo
*) MemoryContextAlloc(TopMemoryContext
,
325 fmgr_info_cxt(utf8_to_server_proc
, finfo
,
327 /* Set Utf8ToServerConvProc only after data is fully valid */
328 Utf8ToServerConvProc
= finfo
;
334 * returns the current client encoding
337 pg_get_client_encoding(void)
339 return ClientEncoding
->encoding
;
343 * returns the current client encoding name
346 pg_get_client_encoding_name(void)
348 return ClientEncoding
->name
;
352 * Convert src string to another encoding (general case).
354 * See the notes about string conversion functions at the top of this file.
357 pg_do_encoding_conversion(unsigned char *src
, int len
,
358 int src_encoding
, int dest_encoding
)
360 unsigned char *result
;
364 return src
; /* empty string is always valid */
366 if (src_encoding
== dest_encoding
)
367 return src
; /* no conversion required, assume valid */
369 if (dest_encoding
== PG_SQL_ASCII
)
370 return src
; /* any string is valid in SQL_ASCII */
372 if (src_encoding
== PG_SQL_ASCII
)
374 /* No conversion is possible, but we must validate the result */
375 (void) pg_verify_mbstr(dest_encoding
, (const char *) src
, len
, false);
379 if (!IsTransactionState()) /* shouldn't happen */
380 elog(ERROR
, "cannot perform encoding conversion outside a transaction");
382 proc
= FindDefaultConversionProc(src_encoding
, dest_encoding
);
383 if (!OidIsValid(proc
))
385 (errcode(ERRCODE_UNDEFINED_FUNCTION
),
386 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
387 pg_encoding_to_char(src_encoding
),
388 pg_encoding_to_char(dest_encoding
))));
391 * Allocate space for conversion result, being wary of integer overflow.
393 * len * MAX_CONVERSION_GROWTH is typically a vast overestimate of the
394 * required space, so it might exceed MaxAllocSize even though the result
395 * would actually fit. We do not want to hand back a result string that
396 * exceeds MaxAllocSize, because callers might not cope gracefully --- but
397 * if we just allocate more than that, and don't use it, that's fine.
399 if ((Size
) len
>= (MaxAllocHugeSize
/ (Size
) MAX_CONVERSION_GROWTH
))
401 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED
),
402 errmsg("out of memory"),
403 errdetail("String of %d bytes is too long for encoding conversion.",
406 result
= (unsigned char *)
407 MemoryContextAllocHuge(CurrentMemoryContext
,
408 (Size
) len
* MAX_CONVERSION_GROWTH
+ 1);
410 (void) OidFunctionCall6(proc
,
411 Int32GetDatum(src_encoding
),
412 Int32GetDatum(dest_encoding
),
413 CStringGetDatum((char *) src
),
414 CStringGetDatum((char *) result
),
416 BoolGetDatum(false));
419 * If the result is large, it's worth repalloc'ing to release any extra
420 * space we asked for. The cutoff here is somewhat arbitrary, but we
421 * *must* check when len * MAX_CONVERSION_GROWTH exceeds MaxAllocSize.
425 Size resultlen
= strlen((char *) result
);
427 if (resultlen
>= MaxAllocSize
)
429 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED
),
430 errmsg("out of memory"),
431 errdetail("String of %d bytes is too long for encoding conversion.",
434 result
= (unsigned char *) repalloc(result
, resultlen
+ 1);
441 * Convert src string to another encoding.
443 * This function has a different API than the other conversion functions.
444 * The caller should've looked up the conversion function using
445 * FindDefaultConversionProc(). Unlike the other functions, the converted
446 * result is not palloc'd. It is written to the caller-supplied buffer
449 * src_encoding - encoding to convert from
450 * dest_encoding - encoding to convert to
451 * src, srclen - input buffer and its length in bytes
452 * dest, destlen - destination buffer and its size in bytes
454 * The output is null-terminated.
456 * If destlen < srclen * MAX_CONVERSION_INPUT_LENGTH + 1, the converted output
457 * wouldn't necessarily fit in the output buffer, and the function will not
458 * convert the whole input.
460 * TODO: The conversion function interface is not great. Firstly, it
461 * would be nice to pass through the destination buffer size to the
462 * conversion function, so that if you pass a shorter destination buffer, it
463 * could still continue to fill up the whole buffer. Currently, we have to
464 * assume worst case expansion and stop the conversion short, even if there
465 * is in fact space left in the destination buffer. Secondly, it would be
466 * nice to return the number of bytes written to the caller, to avoid a call
470 pg_do_encoding_conversion_buf(Oid proc
,
473 unsigned char *src
, int srclen
,
474 unsigned char *dest
, int destlen
,
480 * If the destination buffer is not large enough to hold the result in the
481 * worst case, limit the input size passed to the conversion function.
483 if ((Size
) srclen
>= ((destlen
- 1) / (Size
) MAX_CONVERSION_GROWTH
))
484 srclen
= ((destlen
- 1) / (Size
) MAX_CONVERSION_GROWTH
);
486 result
= OidFunctionCall6(proc
,
487 Int32GetDatum(src_encoding
),
488 Int32GetDatum(dest_encoding
),
489 CStringGetDatum((char *) src
),
490 CStringGetDatum((char *) dest
),
491 Int32GetDatum(srclen
),
492 BoolGetDatum(noError
));
493 return DatumGetInt32(result
);
497 * Convert string to encoding encoding_name. The source
498 * encoding is the DB encoding.
500 * BYTEA convert_to(TEXT string, NAME encoding_name) */
502 pg_convert_to(PG_FUNCTION_ARGS
)
504 Datum string
= PG_GETARG_DATUM(0);
505 Datum dest_encoding_name
= PG_GETARG_DATUM(1);
506 Datum src_encoding_name
= DirectFunctionCall1(namein
,
507 CStringGetDatum(DatabaseEncoding
->name
));
511 * pg_convert expects a bytea as its first argument. We're passing it a
512 * text argument here, relying on the fact that they are both in fact
513 * varlena types, and thus structurally identical.
515 result
= DirectFunctionCall3(pg_convert
, string
,
516 src_encoding_name
, dest_encoding_name
);
518 PG_RETURN_DATUM(result
);
522 * Convert string from encoding encoding_name. The destination
523 * encoding is the DB encoding.
525 * TEXT convert_from(BYTEA string, NAME encoding_name) */
527 pg_convert_from(PG_FUNCTION_ARGS
)
529 Datum string
= PG_GETARG_DATUM(0);
530 Datum src_encoding_name
= PG_GETARG_DATUM(1);
531 Datum dest_encoding_name
= DirectFunctionCall1(namein
,
532 CStringGetDatum(DatabaseEncoding
->name
));
535 result
= DirectFunctionCall3(pg_convert
, string
,
536 src_encoding_name
, dest_encoding_name
);
539 * pg_convert returns a bytea, which we in turn return as text, relying on
540 * the fact that they are both in fact varlena types, and thus
541 * structurally identical. Although not all bytea values are valid text,
542 * in this case it will be because we've told pg_convert to return one
543 * that is valid as text in the current database encoding.
545 PG_RETURN_DATUM(result
);
549 * Convert string between two arbitrary encodings.
551 * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
554 pg_convert(PG_FUNCTION_ARGS
)
556 bytea
*string
= PG_GETARG_BYTEA_PP(0);
557 char *src_encoding_name
= NameStr(*PG_GETARG_NAME(1));
558 int src_encoding
= pg_char_to_encoding(src_encoding_name
);
559 char *dest_encoding_name
= NameStr(*PG_GETARG_NAME(2));
560 int dest_encoding
= pg_char_to_encoding(dest_encoding_name
);
566 if (src_encoding
< 0)
568 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
569 errmsg("invalid source encoding name \"%s\"",
570 src_encoding_name
)));
571 if (dest_encoding
< 0)
573 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
574 errmsg("invalid destination encoding name \"%s\"",
575 dest_encoding_name
)));
577 /* make sure that source string is valid */
578 len
= VARSIZE_ANY_EXHDR(string
);
579 src_str
= VARDATA_ANY(string
);
580 (void) pg_verify_mbstr(src_encoding
, src_str
, len
, false);
582 /* perform conversion */
583 dest_str
= (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str
),
588 /* update len if conversion actually happened */
589 if (dest_str
!= src_str
)
590 len
= strlen(dest_str
);
593 * build bytea data type structure.
595 retval
= (bytea
*) palloc(len
+ VARHDRSZ
);
596 SET_VARSIZE(retval
, len
+ VARHDRSZ
);
597 memcpy(VARDATA(retval
), dest_str
, len
);
599 if (dest_str
!= src_str
)
602 /* free memory if allocated by the toaster */
603 PG_FREE_IF_COPY(string
, 0);
605 PG_RETURN_BYTEA_P(retval
);
609 * get the length of the string considered as text in the specified
610 * encoding. Raises an error if the data is not valid in that
613 * INT4 length (BYTEA string, NAME src_encoding_name)
616 length_in_encoding(PG_FUNCTION_ARGS
)
618 bytea
*string
= PG_GETARG_BYTEA_PP(0);
619 char *src_encoding_name
= NameStr(*PG_GETARG_NAME(1));
620 int src_encoding
= pg_char_to_encoding(src_encoding_name
);
625 if (src_encoding
< 0)
627 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
628 errmsg("invalid encoding name \"%s\"",
629 src_encoding_name
)));
631 len
= VARSIZE_ANY_EXHDR(string
);
632 src_str
= VARDATA_ANY(string
);
634 retval
= pg_verify_mbstr_len(src_encoding
, src_str
, len
, false);
636 PG_RETURN_INT32(retval
);
640 * Get maximum multibyte character length in the specified encoding.
642 * Note encoding is specified numerically, not by name as above.
645 pg_encoding_max_length_sql(PG_FUNCTION_ARGS
)
647 int encoding
= PG_GETARG_INT32(0);
649 if (PG_VALID_ENCODING(encoding
))
650 PG_RETURN_INT32(pg_wchar_table
[encoding
].maxmblen
);
656 * Convert client encoding to server encoding.
658 * See the notes about string conversion functions at the top of this file.
661 pg_client_to_server(const char *s
, int len
)
663 return pg_any_to_server(s
, len
, ClientEncoding
->encoding
);
667 * Convert any encoding to server encoding.
669 * See the notes about string conversion functions at the top of this file.
671 * Unlike the other string conversion functions, this will apply validation
672 * even if encoding == DatabaseEncoding->encoding. This is because this is
673 * used to process data coming in from outside the database, and we never
674 * want to just assume validity.
677 pg_any_to_server(const char *s
, int len
, int encoding
)
680 return unconstify(char *, s
); /* empty string is always valid */
682 if (encoding
== DatabaseEncoding
->encoding
||
683 encoding
== PG_SQL_ASCII
)
686 * No conversion is needed, but we must still validate the data.
688 (void) pg_verify_mbstr(DatabaseEncoding
->encoding
, s
, len
, false);
689 return unconstify(char *, s
);
692 if (DatabaseEncoding
->encoding
== PG_SQL_ASCII
)
695 * No conversion is possible, but we must still validate the data,
696 * because the client-side code might have done string escaping using
697 * the selected client_encoding. If the client encoding is ASCII-safe
698 * then we just do a straight validation under that encoding. For an
699 * ASCII-unsafe encoding we have a problem: we dare not pass such data
700 * to the parser but we have no way to convert it. We compromise by
701 * rejecting the data if it contains any non-ASCII characters.
703 if (PG_VALID_BE_ENCODING(encoding
))
704 (void) pg_verify_mbstr(encoding
, s
, len
, false);
709 for (i
= 0; i
< len
; i
++)
711 if (s
[i
] == '\0' || IS_HIGHBIT_SET(s
[i
]))
713 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE
),
714 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
715 pg_enc2name_tbl
[PG_SQL_ASCII
].name
,
716 (unsigned char) s
[i
])));
719 return unconstify(char *, s
);
722 /* Fast path if we can use cached conversion function */
723 if (encoding
== ClientEncoding
->encoding
)
724 return perform_default_encoding_conversion(s
, len
, true);
726 /* General case ... will not work outside transactions */
727 return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s
),
730 DatabaseEncoding
->encoding
);
734 * Convert server encoding to client encoding.
736 * See the notes about string conversion functions at the top of this file.
739 pg_server_to_client(const char *s
, int len
)
741 return pg_server_to_any(s
, len
, ClientEncoding
->encoding
);
745 * Convert server encoding to any encoding.
747 * See the notes about string conversion functions at the top of this file.
750 pg_server_to_any(const char *s
, int len
, int encoding
)
753 return unconstify(char *, s
); /* empty string is always valid */
755 if (encoding
== DatabaseEncoding
->encoding
||
756 encoding
== PG_SQL_ASCII
)
757 return unconstify(char *, s
); /* assume data is valid */
759 if (DatabaseEncoding
->encoding
== PG_SQL_ASCII
)
761 /* No conversion is possible, but we must validate the result */
762 (void) pg_verify_mbstr(encoding
, s
, len
, false);
763 return unconstify(char *, s
);
766 /* Fast path if we can use cached conversion function */
767 if (encoding
== ClientEncoding
->encoding
)
768 return perform_default_encoding_conversion(s
, len
, false);
770 /* General case ... will not work outside transactions */
771 return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s
),
773 DatabaseEncoding
->encoding
,
778 * Perform default encoding conversion using cached FmgrInfo. Since
779 * this function does not access database at all, it is safe to call
780 * outside transactions. If the conversion has not been set up by
781 * SetClientEncoding(), no conversion is performed.
784 perform_default_encoding_conversion(const char *src
, int len
,
785 bool is_client_to_server
)
792 if (is_client_to_server
)
794 src_encoding
= ClientEncoding
->encoding
;
795 dest_encoding
= DatabaseEncoding
->encoding
;
796 flinfo
= ToServerConvProc
;
800 src_encoding
= DatabaseEncoding
->encoding
;
801 dest_encoding
= ClientEncoding
->encoding
;
802 flinfo
= ToClientConvProc
;
806 return unconstify(char *, src
);
809 * Allocate space for conversion result, being wary of integer overflow.
810 * See comments in pg_do_encoding_conversion.
812 if ((Size
) len
>= (MaxAllocHugeSize
/ (Size
) MAX_CONVERSION_GROWTH
))
814 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED
),
815 errmsg("out of memory"),
816 errdetail("String of %d bytes is too long for encoding conversion.",
820 MemoryContextAllocHuge(CurrentMemoryContext
,
821 (Size
) len
* MAX_CONVERSION_GROWTH
+ 1);
823 FunctionCall6(flinfo
,
824 Int32GetDatum(src_encoding
),
825 Int32GetDatum(dest_encoding
),
826 CStringGetDatum(src
),
827 CStringGetDatum(result
),
829 BoolGetDatum(false));
832 * Release extra space if there might be a lot --- see comments in
833 * pg_do_encoding_conversion.
837 Size resultlen
= strlen(result
);
839 if (resultlen
>= MaxAllocSize
)
841 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED
),
842 errmsg("out of memory"),
843 errdetail("String of %d bytes is too long for encoding conversion.",
846 result
= (char *) repalloc(result
, resultlen
+ 1);
853 * Convert a single Unicode code point into a string in the server encoding.
855 * The code point given by "c" is converted and stored at *s, which must
856 * have at least MAX_UNICODE_EQUIVALENT_STRING+1 bytes available.
857 * The output will have a trailing '\0'. Throws error if the conversion
858 * cannot be performed.
860 * Note that this relies on having previously looked up any required
861 * conversion function. That's partly for speed but mostly because the parser
862 * may call this outside any transaction, or in an aborted transaction.
865 pg_unicode_to_server(pg_wchar c
, unsigned char *s
)
867 unsigned char c_as_utf8
[MAX_MULTIBYTE_CHAR_LEN
+ 1];
872 * Complain if invalid Unicode code point. The choice of errcode here is
873 * debatable, but really our caller should have checked this anyway.
875 if (!is_valid_unicode_codepoint(c
))
877 (errcode(ERRCODE_SYNTAX_ERROR
),
878 errmsg("invalid Unicode code point")));
880 /* Otherwise, if it's in ASCII range, conversion is trivial */
883 s
[0] = (unsigned char) c
;
888 /* If the server encoding is UTF-8, we just need to reformat the code */
889 server_encoding
= GetDatabaseEncoding();
890 if (server_encoding
== PG_UTF8
)
892 unicode_to_utf8(c
, s
);
893 s
[pg_utf_mblen(s
)] = '\0';
897 /* For all other cases, we must have a conversion function available */
898 if (Utf8ToServerConvProc
== NULL
)
900 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
901 errmsg("conversion between %s and %s is not supported",
902 pg_enc2name_tbl
[PG_UTF8
].name
,
903 GetDatabaseEncodingName())));
905 /* Construct UTF-8 source string */
906 unicode_to_utf8(c
, c_as_utf8
);
907 c_as_utf8_len
= pg_utf_mblen(c_as_utf8
);
908 c_as_utf8
[c_as_utf8_len
] = '\0';
910 /* Convert, or throw error if we can't */
911 FunctionCall6(Utf8ToServerConvProc
,
912 Int32GetDatum(PG_UTF8
),
913 Int32GetDatum(server_encoding
),
914 CStringGetDatum((char *) c_as_utf8
),
915 CStringGetDatum((char *) s
),
916 Int32GetDatum(c_as_utf8_len
),
917 BoolGetDatum(false));
921 * Convert a single Unicode code point into a string in the server encoding.
923 * Same as pg_unicode_to_server(), except that we don't throw errors,
924 * but simply return false on conversion failure.
927 pg_unicode_to_server_noerror(pg_wchar c
, unsigned char *s
)
929 unsigned char c_as_utf8
[MAX_MULTIBYTE_CHAR_LEN
+ 1];
934 /* Fail if invalid Unicode code point */
935 if (!is_valid_unicode_codepoint(c
))
938 /* Otherwise, if it's in ASCII range, conversion is trivial */
941 s
[0] = (unsigned char) c
;
946 /* If the server encoding is UTF-8, we just need to reformat the code */
947 server_encoding
= GetDatabaseEncoding();
948 if (server_encoding
== PG_UTF8
)
950 unicode_to_utf8(c
, s
);
951 s
[pg_utf_mblen(s
)] = '\0';
955 /* For all other cases, we must have a conversion function available */
956 if (Utf8ToServerConvProc
== NULL
)
959 /* Construct UTF-8 source string */
960 unicode_to_utf8(c
, c_as_utf8
);
961 c_as_utf8_len
= pg_utf_mblen(c_as_utf8
);
962 c_as_utf8
[c_as_utf8_len
] = '\0';
964 /* Convert, but without throwing error if we can't */
965 converted_len
= DatumGetInt32(FunctionCall6(Utf8ToServerConvProc
,
966 Int32GetDatum(PG_UTF8
),
967 Int32GetDatum(server_encoding
),
968 CStringGetDatum((char *) c_as_utf8
),
969 CStringGetDatum((char *) s
),
970 Int32GetDatum(c_as_utf8_len
),
971 BoolGetDatum(true)));
973 /* Conversion was successful iff it consumed the whole input */
974 return (converted_len
== c_as_utf8_len
);
978 /* convert a multibyte string to a wchar */
980 pg_mb2wchar(const char *from
, pg_wchar
*to
)
982 return pg_wchar_table
[DatabaseEncoding
->encoding
].mb2wchar_with_len((const unsigned char *) from
, to
, strlen(from
));
985 /* convert a multibyte string to a wchar with a limited length */
987 pg_mb2wchar_with_len(const char *from
, pg_wchar
*to
, int len
)
989 return pg_wchar_table
[DatabaseEncoding
->encoding
].mb2wchar_with_len((const unsigned char *) from
, to
, len
);
992 /* same, with any encoding */
994 pg_encoding_mb2wchar_with_len(int encoding
,
995 const char *from
, pg_wchar
*to
, int len
)
997 return pg_wchar_table
[encoding
].mb2wchar_with_len((const unsigned char *) from
, to
, len
);
1000 /* convert a wchar string to a multibyte */
1002 pg_wchar2mb(const pg_wchar
*from
, char *to
)
1004 return pg_wchar_table
[DatabaseEncoding
->encoding
].wchar2mb_with_len(from
, (unsigned char *) to
, pg_wchar_strlen(from
));
1007 /* convert a wchar string to a multibyte with a limited length */
1009 pg_wchar2mb_with_len(const pg_wchar
*from
, char *to
, int len
)
1011 return pg_wchar_table
[DatabaseEncoding
->encoding
].wchar2mb_with_len(from
, (unsigned char *) to
, len
);
1014 /* same, with any encoding */
1016 pg_encoding_wchar2mb_with_len(int encoding
,
1017 const pg_wchar
*from
, char *to
, int len
)
1019 return pg_wchar_table
[encoding
].wchar2mb_with_len(from
, (unsigned char *) to
, len
);
1022 /* returns the byte length of a multibyte character */
1024 pg_mblen(const char *mbstr
)
1026 return pg_wchar_table
[DatabaseEncoding
->encoding
].mblen((const unsigned char *) mbstr
);
1029 /* returns the display length of a multibyte character */
1031 pg_dsplen(const char *mbstr
)
1033 return pg_wchar_table
[DatabaseEncoding
->encoding
].dsplen((const unsigned char *) mbstr
);
1036 /* returns the length (counted in wchars) of a multibyte string */
1038 pg_mbstrlen(const char *mbstr
)
1042 /* optimization for single byte encoding */
1043 if (pg_database_encoding_max_length() == 1)
1044 return strlen(mbstr
);
1048 mbstr
+= pg_mblen(mbstr
);
1054 /* returns the length (counted in wchars) of a multibyte string
1055 * (not necessarily NULL terminated)
1058 pg_mbstrlen_with_len(const char *mbstr
, int limit
)
1062 /* optimization for single byte encoding */
1063 if (pg_database_encoding_max_length() == 1)
1066 while (limit
> 0 && *mbstr
)
1068 int l
= pg_mblen(mbstr
);
1078 * returns the byte length of a multibyte string
1079 * (not necessarily NULL terminated)
1080 * that is no longer than limit.
1081 * this function does not break multibyte character boundary.
1084 pg_mbcliplen(const char *mbstr
, int len
, int limit
)
1086 return pg_encoding_mbcliplen(DatabaseEncoding
->encoding
, mbstr
,
1091 * pg_mbcliplen with specified encoding
1094 pg_encoding_mbcliplen(int encoding
, const char *mbstr
,
1097 mblen_converter mblen_fn
;
1101 /* optimization for single byte encoding */
1102 if (pg_encoding_max_length(encoding
) == 1)
1103 return cliplen(mbstr
, len
, limit
);
1105 mblen_fn
= pg_wchar_table
[encoding
].mblen
;
1107 while (len
> 0 && *mbstr
)
1109 l
= (*mblen_fn
) ((const unsigned char *) mbstr
);
1110 if ((clen
+ l
) > limit
)
1122 * Similar to pg_mbcliplen except the limit parameter specifies the
1123 * character length, not the byte length.
1126 pg_mbcharcliplen(const char *mbstr
, int len
, int limit
)
1132 /* optimization for single byte encoding */
1133 if (pg_database_encoding_max_length() == 1)
1134 return cliplen(mbstr
, len
, limit
);
1136 while (len
> 0 && *mbstr
)
1138 l
= pg_mblen(mbstr
);
1149 /* mbcliplen for any single-byte encoding */
1151 cliplen(const char *str
, int len
, int limit
)
1155 len
= Min(len
, limit
);
1156 while (l
< len
&& str
[l
])
1162 SetDatabaseEncoding(int encoding
)
1164 if (!PG_VALID_BE_ENCODING(encoding
))
1165 elog(ERROR
, "invalid database encoding: %d", encoding
);
1167 DatabaseEncoding
= &pg_enc2name_tbl
[encoding
];
1168 Assert(DatabaseEncoding
->encoding
== encoding
);
1172 SetMessageEncoding(int encoding
)
1174 /* Some calls happen before we can elog()! */
1175 Assert(PG_VALID_ENCODING(encoding
));
1177 MessageEncoding
= &pg_enc2name_tbl
[encoding
];
1178 Assert(MessageEncoding
->encoding
== encoding
);
1183 * Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext
1184 * codeset. Fails for MULE_INTERNAL, an encoding unknown to gettext; can also
1185 * fail for gettext-internal causes like out-of-memory.
1188 raw_pg_bind_textdomain_codeset(const char *domainname
, int encoding
)
1190 bool elog_ok
= (CurrentMemoryContext
!= NULL
);
1193 for (i
= 0; pg_enc2gettext_tbl
[i
].name
!= NULL
; i
++)
1195 if (pg_enc2gettext_tbl
[i
].encoding
== encoding
)
1197 if (bind_textdomain_codeset(domainname
,
1198 pg_enc2gettext_tbl
[i
].name
) != NULL
)
1202 elog(LOG
, "bind_textdomain_codeset failed");
1204 write_stderr("bind_textdomain_codeset failed");
1214 * Bind a gettext message domain to the codeset corresponding to the database
1215 * encoding. For SQL_ASCII, instead bind to the codeset implied by LC_CTYPE.
1216 * Return the MessageEncoding implied by the new settings.
1218 * On most platforms, gettext defaults to the codeset implied by LC_CTYPE.
1219 * When that matches the database encoding, we don't need to do anything. In
1220 * CREATE DATABASE, we enforce or trust that the locale's codeset matches the
1221 * database encoding, except for the C locale. (On Windows, we also permit a
1222 * discrepancy under the UTF8 encoding.) For the C locale, explicitly bind
1223 * gettext to the right codeset.
1225 * On Windows, gettext defaults to the Windows ANSI code page. This is a
1226 * convenient departure for software that passes the strings to Windows ANSI
1227 * APIs, but we don't do that. Compel gettext to use database encoding or,
1228 * failing that, the LC_CTYPE encoding as it would on other platforms.
1230 * This function is called before elog() and palloc() are usable.
1233 pg_bind_textdomain_codeset(const char *domainname
)
1235 bool elog_ok
= (CurrentMemoryContext
!= NULL
);
1236 int encoding
= GetDatabaseEncoding();
1240 const char *ctype
= setlocale(LC_CTYPE
, NULL
);
1242 if (pg_strcasecmp(ctype
, "C") == 0 || pg_strcasecmp(ctype
, "POSIX") == 0)
1244 if (encoding
!= PG_SQL_ASCII
&&
1245 raw_pg_bind_textdomain_codeset(domainname
, encoding
))
1248 new_msgenc
= pg_get_encoding_from_locale(NULL
, elog_ok
);
1250 new_msgenc
= PG_SQL_ASCII
;
1253 if (!raw_pg_bind_textdomain_codeset(domainname
, new_msgenc
))
1254 /* On failure, the old message encoding remains valid. */
1255 return GetMessageEncoding();
1263 * The database encoding, also called the server encoding, represents the
1264 * encoding of data stored in text-like data types. Affected types include
1265 * cstring, text, varchar, name, xml, and json.
1268 GetDatabaseEncoding(void)
1270 return DatabaseEncoding
->encoding
;
1274 GetDatabaseEncodingName(void)
1276 return DatabaseEncoding
->name
;
1280 getdatabaseencoding(PG_FUNCTION_ARGS
)
1282 return DirectFunctionCall1(namein
, CStringGetDatum(DatabaseEncoding
->name
));
1286 pg_client_encoding(PG_FUNCTION_ARGS
)
1288 return DirectFunctionCall1(namein
, CStringGetDatum(ClientEncoding
->name
));
1292 PG_char_to_encoding(PG_FUNCTION_ARGS
)
1294 Name s
= PG_GETARG_NAME(0);
1296 PG_RETURN_INT32(pg_char_to_encoding(NameStr(*s
)));
1300 PG_encoding_to_char(PG_FUNCTION_ARGS
)
1302 int32 encoding
= PG_GETARG_INT32(0);
1303 const char *encoding_name
= pg_encoding_to_char(encoding
);
1305 return DirectFunctionCall1(namein
, CStringGetDatum(encoding_name
));
1309 * gettext() returns messages in this encoding. This often matches the
1310 * database encoding, but it differs for SQL_ASCII databases, for processes
1311 * not attached to a database, and under a database encoding lacking iconv
1312 * support (MULE_INTERNAL).
1315 GetMessageEncoding(void)
1317 return MessageEncoding
->encoding
;
1322 * Generic character incrementer function.
1324 * Not knowing anything about the properties of the encoding in use, we just
1325 * keep incrementing the last byte until we get a validly-encoded result,
1326 * or we run out of values to try. We don't bother to try incrementing
1327 * higher-order bytes, so there's no growth in runtime for wider characters.
1328 * (If we did try to do that, we'd need to consider the likelihood that 255
1329 * is not a valid final byte in the encoding.)
1332 pg_generic_charinc(unsigned char *charptr
, int len
)
1334 unsigned char *lastbyte
= charptr
+ len
- 1;
1335 mbchar_verifier mbverify
;
1337 /* We can just invoke the character verifier directly. */
1338 mbverify
= pg_wchar_table
[GetDatabaseEncoding()].mbverifychar
;
1340 while (*lastbyte
< (unsigned char) 255)
1343 if ((*mbverify
) (charptr
, len
) == len
)
1351 * UTF-8 character incrementer function.
1353 * For a one-byte character less than 0x7F, we just increment the byte.
1355 * For a multibyte character, every byte but the first must fall between 0x80
1356 * and 0xBF; and the first byte must be between 0xC0 and 0xF4. We increment
1357 * the last byte that's not already at its maximum value. If we can't find a
1358 * byte that's less than the maximum allowable value, we simply fail. We also
1359 * need some special-case logic to skip regions used for surrogate pair
1360 * handling, as those should not occur in valid UTF-8.
1362 * Note that we don't reset lower-order bytes back to their minimums, since
1363 * we can't afford to make an exhaustive search (see make_greater_string).
1366 pg_utf8_increment(unsigned char *charptr
, int length
)
1369 unsigned char limit
;
1374 /* reject lengths 5 and 6 for now */
1414 if (a
== 0x7F || a
== 0xDF || a
== 0xEF || a
== 0xF4)
1424 * EUC-JP character incrementer function.
1426 * If the sequence starts with SS2 (0x8e), it must be a two-byte sequence
1427 * representing JIS X 0201 characters with the second byte ranging between
1428 * 0xa1 and 0xdf. We just increment the last byte if it's less than 0xdf,
1429 * and otherwise rewrite the whole sequence to 0xa1 0xa1.
1431 * If the sequence starts with SS3 (0x8f), it must be a three-byte sequence
1432 * in which the last two bytes range between 0xa1 and 0xfe. The last byte
1433 * is incremented if possible, otherwise the second-to-last byte.
1435 * If the sequence starts with a value other than the above and its MSB
1436 * is set, it must be a two-byte sequence representing JIS X 0208 characters
1437 * with both bytes ranging between 0xa1 and 0xfe. The last byte is
1438 * incremented if possible, otherwise the second-to-last byte.
1440 * Otherwise, the sequence is a single-byte ASCII character. It is
1441 * incremented up to 0x7f.
1444 pg_eucjp_increment(unsigned char *charptr
, int length
)
1454 case SS2
: /* JIS X 0201 */
1461 charptr
[0] = charptr
[1] = 0xa1;
1468 case SS3
: /* JIS X 0212 */
1472 for (i
= 2; i
> 0; i
--)
1487 /* Out of 3-byte code region */
1491 if (IS_HIGHBIT_SET(c1
)) /* JIS X 0208? */
1496 for (i
= 1; i
>= 0; i
--)
1511 /* Out of 2 byte code region */
1515 { /* ASCII, single byte */
1527 * get the character incrementer for the encoding for the current database
1529 mbcharacter_incrementer
1530 pg_database_encoding_character_incrementer(void)
1533 * Eventually it might be best to add a field to pg_wchar_table[], but for
1534 * now we just use a switch.
1536 switch (GetDatabaseEncoding())
1539 return pg_utf8_increment
;
1542 return pg_eucjp_increment
;
1545 return pg_generic_charinc
;
1550 * fetch maximum length of the encoding for the current database
1553 pg_database_encoding_max_length(void)
1555 return pg_wchar_table
[GetDatabaseEncoding()].maxmblen
;
1559 * Verify mbstr to make sure that it is validly encoded in the current
1560 * database encoding. Otherwise same as pg_verify_mbstr().
1563 pg_verifymbstr(const char *mbstr
, int len
, bool noError
)
1565 return pg_verify_mbstr(GetDatabaseEncoding(), mbstr
, len
, noError
);
1569 * Verify mbstr to make sure that it is validly encoded in the specified
1573 pg_verify_mbstr(int encoding
, const char *mbstr
, int len
, bool noError
)
1577 Assert(PG_VALID_ENCODING(encoding
));
1579 oklen
= pg_wchar_table
[encoding
].mbverifystr((const unsigned char *) mbstr
, len
);
1584 report_invalid_encoding(encoding
, mbstr
+ oklen
, len
- oklen
);
1590 * Verify mbstr to make sure that it is validly encoded in the specified
1593 * mbstr is not necessarily zero terminated; length of mbstr is
1596 * If OK, return length of string in the encoding.
1597 * If a problem is found, return -1 when noError is
1598 * true; when noError is false, ereport() a descriptive message.
1600 * Note: We cannot use the faster encoding-specific mbverifystr() function
1601 * here, because we need to count the number of characters in the string.
1604 pg_verify_mbstr_len(int encoding
, const char *mbstr
, int len
, bool noError
)
1606 mbchar_verifier mbverifychar
;
1609 Assert(PG_VALID_ENCODING(encoding
));
1612 * In single-byte encodings, we need only reject nulls (\0).
1614 if (pg_encoding_max_length(encoding
) <= 1)
1616 const char *nullpos
= memchr(mbstr
, 0, len
);
1618 if (nullpos
== NULL
)
1622 report_invalid_encoding(encoding
, nullpos
, 1);
1625 /* fetch function pointer just once */
1626 mbverifychar
= pg_wchar_table
[encoding
].mbverifychar
;
1634 /* fast path for ASCII-subset characters */
1635 if (!IS_HIGHBIT_SET(*mbstr
))
1646 report_invalid_encoding(encoding
, mbstr
, len
);
1649 l
= (*mbverifychar
) ((const unsigned char *) mbstr
, len
);
1655 report_invalid_encoding(encoding
, mbstr
, len
);
1666 * check_encoding_conversion_args: check arguments of a conversion function
1668 * "expected" arguments can be either an encoding ID or -1 to indicate that
1669 * the caller will check whether it accepts the ID.
1671 * Note: the errors here are not really user-facing, so elog instead of
1672 * ereport seems sufficient. Also, we trust that the "expected" encoding
1673 * arguments are valid encoding IDs, but we don't trust the actuals.
1676 check_encoding_conversion_args(int src_encoding
,
1679 int expected_src_encoding
,
1680 int expected_dest_encoding
)
1682 if (!PG_VALID_ENCODING(src_encoding
))
1683 elog(ERROR
, "invalid source encoding ID: %d", src_encoding
);
1684 if (src_encoding
!= expected_src_encoding
&& expected_src_encoding
>= 0)
1685 elog(ERROR
, "expected source encoding \"%s\", but got \"%s\"",
1686 pg_enc2name_tbl
[expected_src_encoding
].name
,
1687 pg_enc2name_tbl
[src_encoding
].name
);
1688 if (!PG_VALID_ENCODING(dest_encoding
))
1689 elog(ERROR
, "invalid destination encoding ID: %d", dest_encoding
);
1690 if (dest_encoding
!= expected_dest_encoding
&& expected_dest_encoding
>= 0)
1691 elog(ERROR
, "expected destination encoding \"%s\", but got \"%s\"",
1692 pg_enc2name_tbl
[expected_dest_encoding
].name
,
1693 pg_enc2name_tbl
[dest_encoding
].name
);
1695 elog(ERROR
, "encoding conversion length must not be negative");
1699 * report_invalid_encoding: complain about invalid multibyte character
1701 * note: len is remaining length of string, not length of character;
1702 * len must be greater than zero, as we always examine the first byte.
1705 report_invalid_encoding(int encoding
, const char *mbstr
, int len
)
1707 int l
= pg_encoding_mblen(encoding
, mbstr
);
1708 char buf
[8 * 5 + 1];
1713 jlimit
= Min(l
, len
);
1714 jlimit
= Min(jlimit
, 8); /* prevent buffer overrun */
1716 for (j
= 0; j
< jlimit
; j
++)
1718 p
+= sprintf(p
, "0x%02x", (unsigned char) mbstr
[j
]);
1720 p
+= sprintf(p
, " ");
1724 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE
),
1725 errmsg("invalid byte sequence for encoding \"%s\": %s",
1726 pg_enc2name_tbl
[encoding
].name
,
1731 * report_untranslatable_char: complain about untranslatable character
1733 * note: len is remaining length of string, not length of character;
1734 * len must be greater than zero, as we always examine the first byte.
1737 report_untranslatable_char(int src_encoding
, int dest_encoding
,
1738 const char *mbstr
, int len
)
1740 int l
= pg_encoding_mblen(src_encoding
, mbstr
);
1741 char buf
[8 * 5 + 1];
1746 jlimit
= Min(l
, len
);
1747 jlimit
= Min(jlimit
, 8); /* prevent buffer overrun */
1749 for (j
= 0; j
< jlimit
; j
++)
1751 p
+= sprintf(p
, "0x%02x", (unsigned char) mbstr
[j
]);
1753 p
+= sprintf(p
, " ");
1757 (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER
),
1758 errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
1760 pg_enc2name_tbl
[src_encoding
].name
,
1761 pg_enc2name_tbl
[dest_encoding
].name
)));
1767 * Convert from MessageEncoding to a palloc'ed, null-terminated utf16
1768 * string. The character length is also passed to utf16len if not
1769 * null. Returns NULL iff failed. Before MessageEncoding initialization, "str"
1770 * should be ASCII-only; this will function as though MessageEncoding is UTF8.
1773 pgwin32_message_to_UTF16(const char *str
, int len
, int *utf16len
)
1775 int msgenc
= GetMessageEncoding();
1780 if (msgenc
== PG_SQL_ASCII
)
1781 /* No conversion is possible, and SQL_ASCII is never utf16. */
1784 codepage
= pg_enc2name_tbl
[msgenc
].codepage
;
1787 * Use MultiByteToWideChar directly if there is a corresponding codepage,
1788 * or double conversion through UTF8 if not. Double conversion is needed,
1789 * for example, in an ENCODING=LATIN8, LC_CTYPE=C database.
1793 utf16
= (WCHAR
*) palloc(sizeof(WCHAR
) * (len
+ 1));
1794 dstlen
= MultiByteToWideChar(codepage
, 0, str
, len
, utf16
, len
);
1795 utf16
[dstlen
] = (WCHAR
) 0;
1802 * XXX pg_do_encoding_conversion() requires a transaction. In the
1803 * absence of one, hope for the input to be valid UTF8.
1805 if (IsTransactionState())
1807 utf8
= (char *) pg_do_encoding_conversion((unsigned char *) str
,
1815 utf8
= (char *) str
;
1817 utf16
= (WCHAR
*) palloc(sizeof(WCHAR
) * (len
+ 1));
1818 dstlen
= MultiByteToWideChar(CP_UTF8
, 0, utf8
, len
, utf16
, len
);
1819 utf16
[dstlen
] = (WCHAR
) 0;
1825 if (dstlen
== 0 && len
> 0)
1828 return NULL
; /* error */