--with-gnu-ld uses different x- fiile under aix 4.1
[official-gcc.git] / gcc / mbchar.c
blob89604361cf28e0a7deae485f4afa9b63938d05ff
1 /* Multibyte Character Functions.
2 Copyright (C) 1998 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 /* These functions are used to manipulate multibyte characters. */
23 /* Note regarding cross compilation:
25 In general translation of multibyte characters to wide characters can
26 only work in a native compiler since the translation function (mbtowc)
27 needs to know about both the source and target character encoding. However,
28 this particular implementation for JIS, SJIS and EUCJP source characters
29 will work for any compiler with a newlib target. Other targets may also
30 work provided that their wchar_t implementation is 2 bytes and the encoding
31 leaves the source character values unchanged (except for removing the
32 state shifting markers). */
34 #ifdef MULTIBYTE_CHARS
35 #include "config.h"
36 #include "system.h"
37 #include "mbchar.h"
38 #include <locale.h>
40 typedef enum
42 ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM
43 } JIS_CHAR_TYPE;
45 typedef enum
47 ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
48 J2_ESC, J2_ESC_BR, INV, JIS_S_NUM
49 } JIS_STATE;
51 typedef enum
53 COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR
54 } JIS_ACTION;
56 /*****************************************************************************
57 * state/action tables for processing JIS encoding
58 * Where possible, switches to JIS are grouped with proceding JIS characters
59 * and switches to ASCII are grouped with preceding JIS characters.
60 * Thus, maximum returned length is:
61 * 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
62 *****************************************************************************/
63 static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
64 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER*/
65 /*ASCII*/ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
66 /*A_ESC*/ { ASCII, A_ESC_DL,ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
67 /*A_ESC_DL*/{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII,ASCII,ASCII},
68 /*JIS*/ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1,INV },
69 /*JIS_1*/ { INV, JIS_2, JIS_2, JIS_2, JIS_2, JIS_2, INV, JIS_2,INV },
70 /*JIS_2*/ { J2_ESC,JIS, JIS, JIS, JIS, JIS, INV, JIS, JIS },
71 /*J_ESC*/ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
72 /*J_ESC_BR*/{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
73 /*J2_ESC*/ { INV, INV, J2_ESC_BR,INV, INV, INV, INV, INV, INV },
74 /*J2_ESC_BR*/{INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
77 static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
78 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
79 /*ASCII */ {NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, EMPTY, COPYA, COPYA},
80 /*A_ESC */ {COPYA, NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA},
81 /*A_ESC_DL */{COPYA, COPYA, COPYA, MAKE_J, MAKE_J, COPYA, COPYA, COPYA, COPYA},
82 /*JIS */ {NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
83 /*JIS_1 */ {ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
84 /*JIS_2 */ {NOOP, COPYJ2,COPYJ2,COPYJ2, COPYJ2, COPYJ2,ERROR, COPYJ2,COPYJ2},
85 /*J_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
86 /*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR },
87 /*J2_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
88 /*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR, COPYJ, COPYJ, ERROR, ERROR, ERROR },
92 char *literal_codeset = NULL;
94 int
95 local_mbtowc (pwc, s, n)
96 wchar_t *pwc;
97 const char *s;
98 size_t n;
100 static JIS_STATE save_state = ASCII;
101 JIS_STATE curr_state = save_state;
102 unsigned char *t = (unsigned char *)s;
104 if (s != NULL && n == 0)
105 return -1;
107 if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
109 /* This must be the "C" locale or unknown locale -- fall thru */
111 else if (! strcmp (literal_codeset, "C-SJIS"))
113 int char1;
114 if (s == NULL)
115 return 0; /* not state-dependent */
116 char1 = *t;
117 if (ISSJIS1 (char1))
119 int char2 = t[1];
120 if (n <= 1)
121 return -1;
122 if (ISSJIS2 (char2))
124 if (pwc != NULL)
125 *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
126 return 2;
128 return -1;
130 if (pwc != NULL)
131 *pwc = (wchar_t)*t;
132 if (*t == '\0')
133 return 0;
134 return 1;
136 else if (! strcmp (literal_codeset, "C-EUCJP"))
138 int char1;
139 if (s == NULL)
140 return 0; /* not state-dependent */
141 char1 = *t;
142 if (ISEUCJP (char1))
144 int char2 = t[1];
145 if (n <= 1)
146 return -1;
147 if (ISEUCJP (char2))
149 if (pwc != NULL)
150 *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
151 return 2;
153 return -1;
155 if (pwc != NULL)
156 *pwc = (wchar_t)*t;
157 if (*t == '\0')
158 return 0;
159 return 1;
161 else if (! strcmp (literal_codeset, "C-JIS"))
163 JIS_ACTION action;
164 JIS_CHAR_TYPE ch;
165 unsigned char *ptr;
166 int i, curr_ch;
168 if (s == NULL)
170 save_state = ASCII;
171 return 1; /* state-dependent */
174 ptr = t;
176 for (i = 0; i < n; ++i)
178 curr_ch = t[i];
179 switch (curr_ch)
181 case JIS_ESC_CHAR:
182 ch = ESCAPE;
183 break;
184 case '$':
185 ch = DOLLAR;
186 break;
187 case '@':
188 ch = AT;
189 break;
190 case '(':
191 ch = BRACKET;
192 break;
193 case 'B':
194 ch = B;
195 break;
196 case 'J':
197 ch = J;
198 break;
199 case '\0':
200 ch = NUL;
201 break;
202 default:
203 if (ISJIS (curr_ch))
204 ch = JIS_CHAR;
205 else
206 ch = OTHER;
209 action = JIS_action_table[curr_state][ch];
210 curr_state = JIS_state_table[curr_state][ch];
212 switch (action)
214 case NOOP:
215 break;
216 case EMPTY:
217 if (pwc != NULL)
218 *pwc = (wchar_t)0;
219 save_state = curr_state;
220 return i;
221 case COPYA:
222 if (pwc != NULL)
223 *pwc = (wchar_t)*ptr;
224 save_state = curr_state;
225 return (i + 1);
226 case COPYJ:
227 if (pwc != NULL)
228 *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
229 save_state = curr_state;
230 return (i + 1);
231 case COPYJ2:
232 if (pwc != NULL)
233 *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
234 save_state = curr_state;
235 return (ptr - t) + 2;
236 case MAKE_A:
237 case MAKE_J:
238 ptr = (char *)(t + i + 1);
239 break;
240 case ERROR:
241 default:
242 return -1;
246 return -1; /* n < bytes needed */
249 #ifdef CROSS_COMPILE
250 if (s == NULL)
251 return 0; /* not state-dependent */
252 if (pwc != NULL)
253 *pwc = *s;
254 return 1;
255 #else
256 /* This must be the "C" locale or unknown locale. */
257 return mbtowc (pwc, s, n);
258 #endif
262 local_mblen (s, n)
263 const char *s;
264 size_t n;
266 return local_mbtowc (NULL, s, n);
270 local_mb_cur_max ()
272 if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
274 else if (! strcmp (literal_codeset, "C-SJIS"))
275 return 2;
276 else if (! strcmp (literal_codeset, "C-EUCJP"))
277 return 2;
278 else if (! strcmp (literal_codeset, "C-JIS"))
279 return 8; /* 3 + 2 + 3 */
281 #ifdef CROSS_COMPILE
282 return 1;
283 #else
284 return MB_CUR_MAX;
285 #endif
287 #endif /* MULTIBYTE_CHARS */