1 /* Multibyte Character Functions.
2 Copyright (C) 1998 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 /* These functions are used to manipulate multibyte characters. */
23 /* Note regarding cross compilation:
25 In general translation of multibyte characters to wide characters can
26 only work in a native compiler since the translation function (mbtowc)
27 needs to know about both the source and target character encoding. However,
28 this particular implementation for JIS, SJIS and EUCJP source characters
29 will work for any compiler with a newlib target. Other targets may also
30 work provided that their wchar_t implementation is 2 bytes and the encoding
31 leaves the source character values unchanged (except for removing the
32 state shifting markers). */
34 #ifdef MULTIBYTE_CHARS
42 ESCAPE
, DOLLAR
, BRACKET
, AT
, B
, J
, NUL
, JIS_CHAR
, OTHER
, JIS_C_NUM
47 ASCII
, A_ESC
, A_ESC_DL
, JIS
, JIS_1
, JIS_2
, J_ESC
, J_ESC_BR
,
48 J2_ESC
, J2_ESC_BR
, INV
, JIS_S_NUM
53 COPYA
, COPYJ
, COPYJ2
, MAKE_A
, MAKE_J
, NOOP
, EMPTY
, ERROR
56 /*****************************************************************************
57 * state/action tables for processing JIS encoding
58 * Where possible, switches to JIS are grouped with proceding JIS characters
59 * and switches to ASCII are grouped with preceding JIS characters.
60 * Thus, maximum returned length is:
61 * 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
62 *****************************************************************************/
63 static JIS_STATE JIS_state_table
[JIS_S_NUM
][JIS_C_NUM
] = {
64 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER*/
65 /*ASCII*/ { A_ESC
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,ASCII
,ASCII
},
66 /*A_ESC*/ { ASCII
, A_ESC_DL
,ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,ASCII
,ASCII
},
67 /*A_ESC_DL*/{ ASCII
, ASCII
, ASCII
, JIS
, JIS
, ASCII
, ASCII
,ASCII
,ASCII
},
68 /*JIS*/ { J_ESC
, JIS_1
, JIS_1
, JIS_1
, JIS_1
, JIS_1
, INV
, JIS_1
,INV
},
69 /*JIS_1*/ { INV
, JIS_2
, JIS_2
, JIS_2
, JIS_2
, JIS_2
, INV
, JIS_2
,INV
},
70 /*JIS_2*/ { J2_ESC
,JIS
, JIS
, JIS
, JIS
, JIS
, INV
, JIS
, JIS
},
71 /*J_ESC*/ { INV
, INV
, J_ESC_BR
, INV
, INV
, INV
, INV
, INV
, INV
},
72 /*J_ESC_BR*/{ INV
, INV
, INV
, INV
, ASCII
, ASCII
, INV
, INV
, INV
},
73 /*J2_ESC*/ { INV
, INV
, J2_ESC_BR
,INV
, INV
, INV
, INV
, INV
, INV
},
74 /*J2_ESC_BR*/{INV
, INV
, INV
, INV
, ASCII
, ASCII
, INV
, INV
, INV
},
77 static JIS_ACTION JIS_action_table
[JIS_S_NUM
][JIS_C_NUM
] = {
78 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
79 /*ASCII */ {NOOP
, COPYA
, COPYA
, COPYA
, COPYA
, COPYA
, EMPTY
, COPYA
, COPYA
},
80 /*A_ESC */ {COPYA
, NOOP
, COPYA
, COPYA
, COPYA
, COPYA
, COPYA
, COPYA
, COPYA
},
81 /*A_ESC_DL */{COPYA
, COPYA
, COPYA
, MAKE_J
, MAKE_J
, COPYA
, COPYA
, COPYA
, COPYA
},
82 /*JIS */ {NOOP
, NOOP
, NOOP
, NOOP
, NOOP
, NOOP
, ERROR
, NOOP
, ERROR
},
83 /*JIS_1 */ {ERROR
, NOOP
, NOOP
, NOOP
, NOOP
, NOOP
, ERROR
, NOOP
, ERROR
},
84 /*JIS_2 */ {NOOP
, COPYJ2
,COPYJ2
,COPYJ2
, COPYJ2
, COPYJ2
,ERROR
, COPYJ2
,COPYJ2
},
85 /*J_ESC */ {ERROR
, ERROR
, NOOP
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
},
86 /*J_ESC_BR */{ERROR
, ERROR
, ERROR
, ERROR
, NOOP
, NOOP
, ERROR
, ERROR
, ERROR
},
87 /*J2_ESC */ {ERROR
, ERROR
, NOOP
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
},
88 /*J2_ESC_BR*/{ERROR
, ERROR
, ERROR
, ERROR
, COPYJ
, COPYJ
, ERROR
, ERROR
, ERROR
},
92 char *literal_codeset
= NULL
;
95 local_mbtowc (pwc
, s
, n
)
100 static JIS_STATE save_state
= ASCII
;
101 JIS_STATE curr_state
= save_state
;
102 unsigned char *t
= (unsigned char *)s
;
104 if (s
!= NULL
&& n
== 0)
107 if (literal_codeset
== NULL
|| strlen (literal_codeset
) <= 1)
109 /* This must be the "C" locale or unknown locale -- fall thru */
111 else if (! strcmp (literal_codeset
, "C-SJIS"))
115 return 0; /* not state-dependent */
125 *pwc
= (((wchar_t)*t
) << 8) + (wchar_t)(*(t
+1));
136 else if (! strcmp (literal_codeset
, "C-EUCJP"))
140 return 0; /* not state-dependent */
150 *pwc
= (((wchar_t)*t
) << 8) + (wchar_t)(*(t
+1));
161 else if (! strcmp (literal_codeset
, "C-JIS"))
171 return 1; /* state-dependent */
176 for (i
= 0; i
< n
; ++i
)
209 action
= JIS_action_table
[curr_state
][ch
];
210 curr_state
= JIS_state_table
[curr_state
][ch
];
219 save_state
= curr_state
;
223 *pwc
= (wchar_t)*ptr
;
224 save_state
= curr_state
;
228 *pwc
= (((wchar_t)*ptr
) << 8) + (wchar_t)(*(ptr
+1));
229 save_state
= curr_state
;
233 *pwc
= (((wchar_t)*ptr
) << 8) + (wchar_t)(*(ptr
+1));
234 save_state
= curr_state
;
235 return (ptr
- t
) + 2;
238 ptr
= (char *)(t
+ i
+ 1);
246 return -1; /* n < bytes needed */
251 return 0; /* not state-dependent */
256 /* This must be the "C" locale or unknown locale. */
257 return mbtowc (pwc
, s
, n
);
266 return local_mbtowc (NULL
, s
, n
);
272 if (literal_codeset
== NULL
|| strlen (literal_codeset
) <= 1)
274 else if (! strcmp (literal_codeset
, "C-SJIS"))
276 else if (! strcmp (literal_codeset
, "C-EUCJP"))
278 else if (! strcmp (literal_codeset
, "C-JIS"))
279 return 8; /* 3 + 2 + 3 */
287 #endif /* MULTIBYTE_CHARS */