1 /* Multibyte Character Functions.
2 Copyright (C) 1998 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* Note regarding cross compilation:
23 In general, translation of multibyte characters to wide characters can
24 only work in a native compiler since the translation function (mbtowc)
25 needs to know about both the source and target character encoding. However,
26 this particular implementation for JIS, SJIS and EUCJP source characters
27 will work for any compiler with a newlib target. Other targets may also
28 work provided that their wchar_t implementation is 2 bytes and the encoding
29 leaves the source character values unchanged (except for removing the
30 state shifting markers). */
33 #ifdef MULTIBYTE_CHARS
35 #include "coretypes.h"
40 typedef enum {ESCAPE
, DOLLAR
, BRACKET
, AT
, B
, J
, NUL
, JIS_CHAR
, OTHER
,
41 JIS_C_NUM
} JIS_CHAR_TYPE
;
43 typedef enum {ASCII
, A_ESC
, A_ESC_DL
, JIS
, JIS_1
, JIS_2
, J_ESC
, J_ESC_BR
,
44 J2_ESC
, J2_ESC_BR
, INV
, JIS_S_NUM
} JIS_STATE
;
46 typedef enum {COPYA
, COPYJ
, COPYJ2
, MAKE_A
, MAKE_J
, NOOP
,
47 EMPTY
, ERROR
} JIS_ACTION
;
49 /* State/action tables for processing JIS encoding:
51 Where possible, switches to JIS are grouped with proceding JIS characters
52 and switches to ASCII are grouped with preceding JIS characters.
53 Thus, maximum returned length is:
54 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6. */
56 static const JIS_STATE JIS_state_table
[JIS_S_NUM
][JIS_C_NUM
] = {
57 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTH*/
58 /*ASCII*/ { A_ESC
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,ASCII
,ASCII
},
59 /*A_ESC*/ { ASCII
, A_ESC_DL
,ASCII
, ASCII
, ASCII
, ASCII
, ASCII
,ASCII
,ASCII
},
60 /*A_ESC_DL*/{ ASCII
, ASCII
, ASCII
, JIS
, JIS
, ASCII
, ASCII
,ASCII
,ASCII
},
61 /*JIS*/ { J_ESC
, JIS_1
, JIS_1
, JIS_1
, JIS_1
, JIS_1
, INV
, JIS_1
,INV
},
62 /*JIS_1*/ { INV
, JIS_2
, JIS_2
, JIS_2
, JIS_2
, JIS_2
, INV
, JIS_2
,INV
},
63 /*JIS_2*/ { J2_ESC
,JIS
, JIS
, JIS
, JIS
, JIS
, INV
, JIS
, JIS
},
64 /*J_ESC*/ { INV
, INV
, J_ESC_BR
, INV
, INV
, INV
, INV
, INV
, INV
},
65 /*J_ESC_BR*/{ INV
, INV
, INV
, INV
, ASCII
, ASCII
, INV
, INV
, INV
},
66 /*J2_ESC*/ { INV
, INV
, J2_ESC_BR
,INV
, INV
, INV
, INV
, INV
, INV
},
67 /*J2_ESC_BR*/{INV
, INV
, INV
, INV
, ASCII
, ASCII
, INV
, INV
, INV
},
70 static const JIS_ACTION JIS_action_table
[JIS_S_NUM
][JIS_C_NUM
] = {
71 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTH */
72 /*ASCII */ {NOOP
, COPYA
, COPYA
, COPYA
, COPYA
, COPYA
, EMPTY
, COPYA
, COPYA
},
73 /*A_ESC */ {COPYA
, NOOP
, COPYA
, COPYA
, COPYA
, COPYA
, COPYA
, COPYA
, COPYA
},
74 /*A_ESC_DL */{COPYA
, COPYA
, COPYA
, MAKE_J
, MAKE_J
, COPYA
, COPYA
, COPYA
, COPYA
},
75 /*JIS */ {NOOP
, NOOP
, NOOP
, NOOP
, NOOP
, NOOP
, ERROR
, NOOP
, ERROR
},
76 /*JIS_1 */ {ERROR
, NOOP
, NOOP
, NOOP
, NOOP
, NOOP
, ERROR
, NOOP
, ERROR
},
77 /*JIS_2 */ {NOOP
, COPYJ2
,COPYJ2
,COPYJ2
, COPYJ2
, COPYJ2
,ERROR
, COPYJ2
,COPYJ2
},
78 /*J_ESC */ {ERROR
, ERROR
, NOOP
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
},
79 /*J_ESC_BR */{ERROR
, ERROR
, ERROR
, ERROR
, NOOP
, NOOP
, ERROR
, ERROR
, ERROR
},
80 /*J2_ESC */ {ERROR
, ERROR
, NOOP
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
},
81 /*J2_ESC_BR*/{ERROR
, ERROR
, ERROR
, ERROR
, COPYJ
, COPYJ
, ERROR
, ERROR
, ERROR
},
85 const char *literal_codeset
= NULL
;
87 /* Store into *PWC (if PWC is not null) the wide character
88 corresponding to the multibyte character at the start of the
89 buffer S of size N. Return the number of bytes in the multibyte
90 character. Return -1 if the bytes do not form a valid character,
91 or 0 if S is null or points to a null byte.
93 This function behaves like the Standard C function mbtowc, except
94 it treats locale names of the form "C-..." specially. */
97 local_mbtowc (pwc
, s
, n
)
102 static JIS_STATE save_state
= ASCII
;
103 JIS_STATE curr_state
= save_state
;
104 const unsigned char *t
= (const unsigned char *) s
;
106 if (s
!= NULL
&& n
== 0)
109 if (literal_codeset
== NULL
|| strlen (literal_codeset
) <= 1)
110 /* This must be the "C" locale or unknown locale -- fall thru */
112 else if (! strcmp (literal_codeset
, "C-SJIS"))
116 /* Not state-dependent. */
130 *pwc
= (((wchar_t) *t
) << 8) + (wchar_t) (*(t
+ 1));
145 else if (! strcmp (literal_codeset
, "C-EUCJP"))
150 /* Not state-dependent. */
164 *pwc
= (((wchar_t) *t
) << 8) + (wchar_t) (*(t
+ 1));
179 else if (! strcmp (literal_codeset
, "C-JIS"))
183 const unsigned char *ptr
;
189 /* State-dependent. */
195 for (i
= 0; i
< n
; i
++)
228 action
= JIS_action_table
[curr_state
][ch
];
229 curr_state
= JIS_state_table
[curr_state
][ch
];
240 save_state
= curr_state
;
245 *pwc
= (wchar_t) *ptr
;
246 save_state
= curr_state
;
251 *pwc
= (((wchar_t) *ptr
) << 8) + (wchar_t) (*(ptr
+ 1));
253 save_state
= curr_state
;
258 *pwc
= (((wchar_t) *ptr
) << 8) + (wchar_t) (*(ptr
+ 1));
260 save_state
= curr_state
;
265 ptr
= (const unsigned char *) (t
+ i
+ 1);
274 /* More than n bytes needed. */
280 /* Not state-dependent. */
288 /* This must be the "C" locale or unknown locale. */
289 return mbtowc (pwc
, s
, n
);
293 /* Return the number of bytes in the multibyte character at the start
294 of the buffer S of size N. Return -1 if the bytes do not form a
295 valid character, or 0 if S is null or points to a null byte.
297 This function behaves like the Standard C function mblen, except
298 it treats locale names of the form "C-..." specially. */
305 return local_mbtowc (NULL
, s
, n
);
308 /* Return the maximum mumber of bytes in a multibyte character.
310 This function returns the same value as the Standard C macro MB_CUR_MAX,
311 except it treats locale names of the form "C-..." specially. */
316 if (literal_codeset
== NULL
|| strlen (literal_codeset
) <= 1)
318 else if (! strcmp (literal_codeset
, "C-SJIS"))
320 else if (! strcmp (literal_codeset
, "C-EUCJP"))
322 else if (! strcmp (literal_codeset
, "C-JIS"))
323 return 8; /* 3 + 2 + 3 */
331 return 1; /* default */
334 #else /* MULTIBYTE_CHARS */
335 extern int dummy
; /* silence 'ANSI C forbids an empty source file' warning */
336 #endif /* MULTIBYTE_CHARS */