1 /*-------------------------------------------------------------------------
3 * EUC_JIS_2004, SHIFT_JIS_2004
5 * Copyright (c) 2007-2022, PostgreSQL Global Development Group
8 * src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
10 *-------------------------------------------------------------------------
15 #include "mb/pg_wchar.h"
19 PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004
);
20 PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004
);
22 static int euc_jis_20042shift_jis_2004(const unsigned char *euc
, unsigned char *p
, int len
, bool noError
);
23 static int shift_jis_20042euc_jis_2004(const unsigned char *sjis
, unsigned char *p
, int len
, bool noError
);
27 * INTEGER, -- source encoding id
28 * INTEGER, -- destination encoding id
29 * CSTRING, -- source string (null terminated C string)
30 * CSTRING, -- destination string (null terminated C string)
31 * INTEGER, -- source string length
32 * BOOL -- if true, don't throw an error if conversion fails
35 * Returns the number of bytes successfully converted.
40 euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS
)
42 unsigned char *src
= (unsigned char *) PG_GETARG_CSTRING(2);
43 unsigned char *dest
= (unsigned char *) PG_GETARG_CSTRING(3);
44 int len
= PG_GETARG_INT32(4);
45 bool noError
= PG_GETARG_BOOL(5);
48 CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004
, PG_SHIFT_JIS_2004
);
50 converted
= euc_jis_20042shift_jis_2004(src
, dest
, len
, noError
);
52 PG_RETURN_INT32(converted
);
56 shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS
)
58 unsigned char *src
= (unsigned char *) PG_GETARG_CSTRING(2);
59 unsigned char *dest
= (unsigned char *) PG_GETARG_CSTRING(3);
60 int len
= PG_GETARG_INT32(4);
61 bool noError
= PG_GETARG_BOOL(5);
64 CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004
, PG_EUC_JIS_2004
);
66 converted
= shift_jis_20042euc_jis_2004(src
, dest
, len
, noError
);
68 PG_RETURN_INT32(converted
);
72 * EUC_JIS_2004 -> SHIFT_JIS_2004
75 euc_jis_20042shift_jis_2004(const unsigned char *euc
, unsigned char *p
, int len
, bool noError
)
77 const unsigned char *start
= euc
;
86 if (!IS_HIGHBIT_SET(c1
))
93 report_invalid_encoding(PG_EUC_JIS_2004
,
94 (const char *) euc
, len
);
102 l
= pg_encoding_verifymbchar(PG_EUC_JIS_2004
, (const char *) euc
, len
);
108 report_invalid_encoding(PG_EUC_JIS_2004
,
109 (const char *) euc
, len
);
112 if (c1
== SS2
&& l
== 2) /* JIS X 0201 kana? */
116 else if (c1
== SS3
&& l
== 3) /* JIS X 0213 plane 2? */
132 *p
++ = ((ku
+ 0x1df) >> 1) - (ku
>> 3) * 3;
135 if (ku
>= 78 && ku
<= 94)
137 *p
++ = (ku
+ 0x19b) >> 1;
143 report_invalid_encoding(PG_EUC_JIS_2004
,
144 (const char *) euc
, len
);
150 if (ten
>= 1 && ten
<= 63)
152 else if (ten
>= 64 && ten
<= 94)
158 report_invalid_encoding(PG_EUC_JIS_2004
,
159 (const char *) euc
, len
);
166 else if (l
== 2) /* JIS X 0213 plane 1? */
171 if (ku
>= 1 && ku
<= 62)
172 *p
++ = (ku
+ 0x101) >> 1;
173 else if (ku
>= 63 && ku
<= 94)
174 *p
++ = (ku
+ 0x181) >> 1;
179 report_invalid_encoding(PG_EUC_JIS_2004
,
180 (const char *) euc
, len
);
185 if (ten
>= 1 && ten
<= 63)
187 else if (ten
>= 64 && ten
<= 94)
193 report_invalid_encoding(PG_EUC_JIS_2004
,
194 (const char *) euc
, len
);
204 report_invalid_encoding(PG_EUC_JIS_2004
,
205 (const char *) euc
, len
);
217 * returns SHIFT_JIS_2004 "ku" code indicated by second byte
218 * *ku = 0: "ku" = even
219 * *ku = 1: "ku" = odd
222 get_ten(int b
, int *ku
)
226 if (b
>= 0x40 && b
<= 0x7e)
231 else if (b
>= 0x80 && b
<= 0x9e)
236 else if (b
>= 0x9f && b
<= 0xfc)
243 ten
= -1; /* error */
244 *ku
= 0; /* keep compiler quiet */
250 * SHIFT_JIS_2004 ---> EUC_JIS_2004
254 shift_jis_20042euc_jis_2004(const unsigned char *sjis
, unsigned char *p
, int len
, bool noError
)
256 const unsigned char *start
= sjis
;
268 if (!IS_HIGHBIT_SET(c1
))
275 report_invalid_encoding(PG_SHIFT_JIS_2004
,
276 (const char *) sjis
, len
);
284 l
= pg_encoding_verifymbchar(PG_SHIFT_JIS_2004
, (const char *) sjis
, len
);
286 if (l
< 0 || l
> len
)
290 report_invalid_encoding(PG_SHIFT_JIS_2004
,
291 (const char *) sjis
, len
);
294 if (c1
>= 0xa1 && c1
<= 0xdf && l
== 1)
296 /* JIS X0201 (1 byte kana) */
311 if (c1
>= 0x81 && c1
<= 0x9f) /* plane 1 1ku-62ku */
313 ku
= (c1
<< 1) - 0x100;
314 ten
= get_ten(c2
, &kubun
);
319 report_invalid_encoding(PG_SHIFT_JIS_2004
,
320 (const char *) sjis
, len
);
324 else if (c1
>= 0xe0 && c1
<= 0xef) /* plane 1 62ku-94ku */
326 ku
= (c1
<< 1) - 0x180;
327 ten
= get_ten(c2
, &kubun
);
332 report_invalid_encoding(PG_SHIFT_JIS_2004
,
333 (const char *) sjis
, len
);
337 else if (c1
>= 0xf0 && c1
<= 0xf3) /* plane 2
338 * 1,3,4,5,8,12,13,14,15 ku */
341 ten
= get_ten(c2
, &kubun
);
346 report_invalid_encoding(PG_SHIFT_JIS_2004
,
347 (const char *) sjis
, len
);
352 ku
= kubun
== 0 ? 8 : 1;
355 ku
= kubun
== 0 ? 4 : 3;
358 ku
= kubun
== 0 ? 12 : 5;
361 ku
= kubun
== 0 ? 14 : 13;
365 else if (c1
>= 0xf4 && c1
<= 0xfc) /* plane 2 78-94ku */
368 ten
= get_ten(c2
, &kubun
);
373 report_invalid_encoding(PG_SHIFT_JIS_2004
,
374 (const char *) sjis
, len
);
376 if (c1
== 0xf4 && kubun
== 1)
379 ku
= (c1
<< 1) - 0x19a - kubun
;
385 report_invalid_encoding(PG_SHIFT_JIS_2004
,
386 (const char *) sjis
, len
);