[official-gcc.git] / gcc / mbchar.c
blobd54a49749ce58a31173fcf2f6b883a050c65b0b7
1 /* Multibyte Character Functions.
2 Copyright (C) 1998 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 /* These functions are used to manipulate multibyte characters. */
23 /* Note regarding cross compilation:
25 In general translation of multibyte characters to wide characters can
26 only work in a native compiler since the translation function (mbtowc)
27 needs to know about both the source and target character encoding. However,
28 this particular implementation for JIS, SJIS and EUCJP source characters
29 will work for any compiler with a newlib target. Other targets may also
30 work provided that their wchar_t implementation is 2 bytes and the encoding
31 leaves the source character values unchanged (except for removing the
32 state shifting markers). */
34 #ifdef MULTIBYTE_CHARS
35 #include "config.h"
36 #include "system.h"
37 #include "gansidecl.h"
38 #include "mbchar.h"
39 #include <locale.h>
41 typedef enum
43 ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM
44 } JIS_CHAR_TYPE;
46 typedef enum
48 ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
49 J2_ESC, J2_ESC_BR, INV, JIS_S_NUM
50 } JIS_STATE;
52 typedef enum
54 COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR
55 } JIS_ACTION;
57 /*****************************************************************************
58 * state/action tables for processing JIS encoding
59 * Where possible, switches to JIS are grouped with proceding JIS characters
60 * and switches to ASCII are grouped with preceding JIS characters.
61 * Thus, maximum returned length is:
62 * 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
63 *****************************************************************************/
64 static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
65 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER*/
66 /*ASCII*/ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
67 /*A_ESC*/ { ASCII, A_ESC_DL,ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
68 /*A_ESC_DL*/{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII,ASCII,ASCII},
69 /*JIS*/ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1,INV },
70 /*JIS_1*/ { INV, JIS_2, JIS_2, JIS_2, JIS_2, JIS_2, INV, JIS_2,INV },
71 /*JIS_2*/ { J2_ESC,JIS, JIS, JIS, JIS, JIS, INV, JIS, JIS },
72 /*J_ESC*/ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
73 /*J_ESC_BR*/{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
74 /*J2_ESC*/ { INV, INV, J2_ESC_BR,INV, INV, INV, INV, INV, INV },
75 /*J2_ESC_BR*/{INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
78 static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
79 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
80 /*ASCII */ {NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, EMPTY, COPYA, COPYA},
81 /*A_ESC */ {COPYA, NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA},
82 /*A_ESC_DL */{COPYA, COPYA, COPYA, MAKE_J, MAKE_J, COPYA, COPYA, COPYA, COPYA},
83 /*JIS */ {NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
84 /*JIS_1 */ {ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
85 /*JIS_2 */ {NOOP, COPYJ2,COPYJ2,COPYJ2, COPYJ2, COPYJ2,ERROR, COPYJ2,COPYJ2},
86 /*J_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
87 /*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR },
88 /*J2_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
89 /*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR, COPYJ, COPYJ, ERROR, ERROR, ERROR },
93 char *literal_codeset = NULL;
95 int
96 local_mbtowc (pwc, s, n)
97 wchar_t *pwc;
98 const char *s;
99 size_t n;
101 static JIS_STATE save_state = ASCII;
102 JIS_STATE curr_state = save_state;
103 unsigned char *t = (unsigned char *)s;
105 if (s != NULL && n == 0)
106 return -1;
108 if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
110 /* This must be the "C" locale or unknown locale -- fall thru */
112 else if (! strcmp (literal_codeset, "C-SJIS"))
114 int char1;
115 if (s == NULL)
116 return 0; /* not state-dependent */
117 char1 = *t;
118 if (ISSJIS1 (char1))
120 int char2 = t[1];
121 if (n <= 1)
122 return -1;
123 if (ISSJIS2 (char2))
125 if (pwc != NULL)
126 *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
127 return 2;
129 return -1;
131 if (pwc != NULL)
132 *pwc = (wchar_t)*t;
133 if (*t == '\0')
134 return 0;
135 return 1;
137 else if (! strcmp (literal_codeset, "C-EUCJP"))
139 int char1;
140 if (s == NULL)
141 return 0; /* not state-dependent */
142 char1 = *t;
143 if (ISEUCJP (char1))
145 int char2 = t[1];
146 if (n <= 1)
147 return -1;
148 if (ISEUCJP (char2))
150 if (pwc != NULL)
151 *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
152 return 2;
154 return -1;
156 if (pwc != NULL)
157 *pwc = (wchar_t)*t;
158 if (*t == '\0')
159 return 0;
160 return 1;
162 else if (! strcmp (literal_codeset, "C-JIS"))
164 JIS_ACTION action;
165 JIS_CHAR_TYPE ch;
166 unsigned char *ptr;
167 int i, curr_ch;
169 if (s == NULL)
171 save_state = ASCII;
172 return 1; /* state-dependent */
175 ptr = t;
177 for (i = 0; i < n; ++i)
179 curr_ch = t[i];
180 switch (curr_ch)
182 case JIS_ESC_CHAR:
183 ch = ESCAPE;
184 break;
185 case '$':
186 ch = DOLLAR;
187 break;
188 case '@':
189 ch = AT;
190 break;
191 case '(':
192 ch = BRACKET;
193 break;
194 case 'B':
195 ch = B;
196 break;
197 case 'J':
198 ch = J;
199 break;
200 case '\0':
201 ch = NUL;
202 break;
203 default:
204 if (ISJIS (curr_ch))
205 ch = JIS_CHAR;
206 else
207 ch = OTHER;
210 action = JIS_action_table[curr_state][ch];
211 curr_state = JIS_state_table[curr_state][ch];
213 switch (action)
215 case NOOP:
216 break;
217 case EMPTY:
218 if (pwc != NULL)
219 *pwc = (wchar_t)0;
220 save_state = curr_state;
221 return i;
222 case COPYA:
223 if (pwc != NULL)
224 *pwc = (wchar_t)*ptr;
225 save_state = curr_state;
226 return (i + 1);
227 case COPYJ:
228 if (pwc != NULL)
229 *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
230 save_state = curr_state;
231 return (i + 1);
232 case COPYJ2:
233 if (pwc != NULL)
234 *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
235 save_state = curr_state;
236 return (ptr - t) + 2;
237 case MAKE_A:
238 case MAKE_J:
239 ptr = (char *)(t + i + 1);
240 break;
241 case ERROR:
242 default:
243 return -1;
247 return -1; /* n < bytes needed */
250 #ifdef CROSS_COMPILE
251 if (s == NULL)
252 return 0; /* not state-dependent */
253 if (pwc != NULL)
254 *pwc = *s;
255 return 1;
256 #else
257 /* This must be the "C" locale or unknown locale. */
258 return mbtowc (pwc, s, n);
259 #endif
263 local_mblen (s, n)
264 const char *s;
265 size_t n;
267 return local_mbtowc (NULL, s, n);
271 local_mb_cur_max ()
273 if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
275 else if (! strcmp (literal_codeset, "C-SJIS"))
276 return 2;
277 else if (! strcmp (literal_codeset, "C-EUCJP"))
278 return 2;
279 else if (! strcmp (literal_codeset, "C-JIS"))
280 return 8; /* 3 + 2 + 3 */
282 #ifdef CROSS_COMPILE
283 return 1;
284 #else
285 return MB_CUR_MAX;
286 #endif
288 #endif /* MULTIBYTE_CHARS */