1 /* Multibyte Character Functions.
2 Copyright (C) 1998 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 /* These functions are used to manipulate multibyte characters. */
23 /* Note regarding cross compilation:
25 In general translation of multibyte characters to wide characters can
26 only work in a native compiler since the translation function (mbtowc)
27 needs to know about both the source and target character encoding. However,
28 this particular implementation for JIS, SJIS and EUCJP source characters
29 will work for any compiler with a newlib target. Other targets may also
30 work provided that their wchar_t implementation is 2 bytes and the encoding
31 leaves the source character values unchanged (except for removing the
32 state shifting markers). */
34 #ifdef MULTIBYTE_CHARS
37 #include "gansidecl.h"
43 ESCAPE
, DOLLAR
, BRACKET
, AT
, B
, J
, NUL
, JIS_CHAR
, OTHER
, JIS_C_NUM
48 ASCII
, A_ESC
, A_ESC_DL
, JIS
, JIS_1
, JIS_2
, J_ESC
, J_ESC_BR
,
49 J2_ESC
, J2_ESC_BR
, INV
, JIS_S_NUM
54 COPYA
, COPYJ
, COPYJ2
, MAKE_A
, MAKE_J
, NOOP
, EMPTY
, ERROR
57 /*****************************************************************************
58 * state/action tables for processing JIS encoding
59 * Where possible, switches to JIS are grouped with proceding JIS characters
60 * and switches to ASCII are grouped with preceding JIS characters.
61 * Thus, maximum returned length is:
62 * 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
63 *****************************************************************************/
64 static JIS_STATE JIS_state_table
[JIS_S_NUM
][JIS_C_NUM
] = {
65 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER*/
66 /*ASCII*/ { A_ESC
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,ASCII
,ASCII
},
67 /*A_ESC*/ { ASCII
, A_ESC_DL
,ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,ASCII
,ASCII
},
68 /*A_ESC_DL*/{ ASCII
, ASCII
, ASCII
, JIS
, JIS
, ASCII
, ASCII
,ASCII
,ASCII
},
69 /*JIS*/ { J_ESC
, JIS_1
, JIS_1
, JIS_1
, JIS_1
, JIS_1
, INV
, JIS_1
,INV
},
70 /*JIS_1*/ { INV
, JIS_2
, JIS_2
, JIS_2
, JIS_2
, JIS_2
, INV
, JIS_2
,INV
},
71 /*JIS_2*/ { J2_ESC
,JIS
, JIS
, JIS
, JIS
, JIS
, INV
, JIS
, JIS
},
72 /*J_ESC*/ { INV
, INV
, J_ESC_BR
, INV
, INV
, INV
, INV
, INV
, INV
},
73 /*J_ESC_BR*/{ INV
, INV
, INV
, INV
, ASCII
, ASCII
, INV
, INV
, INV
},
74 /*J2_ESC*/ { INV
, INV
, J2_ESC_BR
,INV
, INV
, INV
, INV
, INV
, INV
},
75 /*J2_ESC_BR*/{INV
, INV
, INV
, INV
, ASCII
, ASCII
, INV
, INV
, INV
},
78 static JIS_ACTION JIS_action_table
[JIS_S_NUM
][JIS_C_NUM
] = {
79 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
80 /*ASCII */ {NOOP
, COPYA
, COPYA
, COPYA
, COPYA
, COPYA
, EMPTY
, COPYA
, COPYA
},
81 /*A_ESC */ {COPYA
, NOOP
, COPYA
, COPYA
, COPYA
, COPYA
, COPYA
, COPYA
, COPYA
},
82 /*A_ESC_DL */{COPYA
, COPYA
, COPYA
, MAKE_J
, MAKE_J
, COPYA
, COPYA
, COPYA
, COPYA
},
83 /*JIS */ {NOOP
, NOOP
, NOOP
, NOOP
, NOOP
, NOOP
, ERROR
, NOOP
, ERROR
},
84 /*JIS_1 */ {ERROR
, NOOP
, NOOP
, NOOP
, NOOP
, NOOP
, ERROR
, NOOP
, ERROR
},
85 /*JIS_2 */ {NOOP
, COPYJ2
,COPYJ2
,COPYJ2
, COPYJ2
, COPYJ2
,ERROR
, COPYJ2
,COPYJ2
},
86 /*J_ESC */ {ERROR
, ERROR
, NOOP
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
},
87 /*J_ESC_BR */{ERROR
, ERROR
, ERROR
, ERROR
, NOOP
, NOOP
, ERROR
, ERROR
, ERROR
},
88 /*J2_ESC */ {ERROR
, ERROR
, NOOP
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
},
89 /*J2_ESC_BR*/{ERROR
, ERROR
, ERROR
, ERROR
, COPYJ
, COPYJ
, ERROR
, ERROR
, ERROR
},
93 char *literal_codeset
= NULL
;
96 local_mbtowc (pwc
, s
, n
)
101 static JIS_STATE save_state
= ASCII
;
102 JIS_STATE curr_state
= save_state
;
103 unsigned char *t
= (unsigned char *)s
;
105 if (s
!= NULL
&& n
== 0)
108 if (literal_codeset
== NULL
|| strlen (literal_codeset
) <= 1)
110 /* This must be the "C" locale or unknown locale -- fall thru */
112 else if (! strcmp (literal_codeset
, "C-SJIS"))
116 return 0; /* not state-dependent */
126 *pwc
= (((wchar_t)*t
) << 8) + (wchar_t)(*(t
+1));
137 else if (! strcmp (literal_codeset
, "C-EUCJP"))
141 return 0; /* not state-dependent */
151 *pwc
= (((wchar_t)*t
) << 8) + (wchar_t)(*(t
+1));
162 else if (! strcmp (literal_codeset
, "C-JIS"))
172 return 1; /* state-dependent */
177 for (i
= 0; i
< n
; ++i
)
210 action
= JIS_action_table
[curr_state
][ch
];
211 curr_state
= JIS_state_table
[curr_state
][ch
];
220 save_state
= curr_state
;
224 *pwc
= (wchar_t)*ptr
;
225 save_state
= curr_state
;
229 *pwc
= (((wchar_t)*ptr
) << 8) + (wchar_t)(*(ptr
+1));
230 save_state
= curr_state
;
234 *pwc
= (((wchar_t)*ptr
) << 8) + (wchar_t)(*(ptr
+1));
235 save_state
= curr_state
;
236 return (ptr
- t
) + 2;
239 ptr
= (char *)(t
+ i
+ 1);
247 return -1; /* n < bytes needed */
252 return 0; /* not state-dependent */
257 /* This must be the "C" locale or unknown locale. */
258 return mbtowc (pwc
, s
, n
);
267 return local_mbtowc (NULL
, s
, n
);
273 if (literal_codeset
== NULL
|| strlen (literal_codeset
) <= 1)
275 else if (! strcmp (literal_codeset
, "C-SJIS"))
277 else if (! strcmp (literal_codeset
, "C-EUCJP"))
279 else if (! strcmp (literal_codeset
, "C-JIS"))
280 return 8; /* 3 + 2 + 3 */
288 #endif /* MULTIBYTE_CHARS */