1 // natIconv.cc -- Java side of iconv() reader.
3 /* Copyright (C) 2000, 2001, 2003 Free Software Foundation
5 This file is part of libgcj.
7 This software is copyrighted work licensed under the terms of the
8 Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
11 /* Author: Tom Tromey <tromey@redhat.com>. */
18 #include <gnu/gcj/convert/Input_iconv.h>
19 #include <gnu/gcj/convert/Output_iconv.h>
20 #include <java/io/CharConversionException.h>
21 #include <java/io/UnsupportedEncodingException.h>
30 iconv_adapter (size_t (*iconv_f
) (iconv_t
, T
, size_t *, char **, size_t *),
31 iconv_t handle
, char **inbuf
, size_t *inavail
,
32 char **outbuf
, size_t *outavail
)
34 return (*iconv_f
) (handle
, (T
) inbuf
, inavail
, outbuf
, outavail
);
40 gnu::gcj::convert::Input_iconv::init (jstring encoding
)
43 jsize len
= _Jv_GetStringUTFLength (encoding
);
45 _Jv_GetStringUTFRegion (encoding
, 0, encoding
->length(), buffer
);
48 iconv_t h
= iconv_open ("UCS-2", buffer
);
49 if (h
== (iconv_t
) -1)
50 throw new java::io::UnsupportedEncodingException (encoding
);
53 handle
= reinterpret_cast<gnu::gcj::RawData
*> (h
);
54 #else /* HAVE_ICONV */
55 // If no iconv, just throw an exception.
56 throw new java::io::UnsupportedEncodingException (encoding
);
57 #endif /* HAVE_ICONV */
61 gnu::gcj::convert::Input_iconv::finalize (void)
66 iconv_close ((iconv_t
) handle
);
69 #endif /* HAVE_ICONV */
73 gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer
,
74 jint outpos
, jint count
)
77 jbyte
*bytes
= elements (inbuffer
);
78 jchar
*out
= elements (outbuffer
);
79 size_t inavail
= inlength
- inpos
;
80 size_t old_in
= inavail
;
81 size_t outavail
= count
* sizeof (jchar
);
82 size_t old_out
= outavail
;
84 char *inbuf
= (char *) &bytes
[inpos
];
85 char *outbuf
= (char *) &out
[outpos
];
87 size_t r
= iconv_adapter (iconv
, (iconv_t
) handle
,
93 // If we see EINVAL then there is an incomplete sequence at the
94 // end of the input buffer. If we see E2BIG then we ran out of
95 // space in the output buffer. However, in both these cases
96 // some conversion might have taken place. So we fall through
97 // to the normal case.
98 if (errno
!= EINVAL
&& errno
!= E2BIG
)
99 throw new java::io::CharConversionException ();
104 size_t max
= (old_out
- outavail
) / sizeof (jchar
);
105 for (size_t i
= 0; i
< max
; ++i
)
108 jchar c
= (((out
[outpos
+ i
] & 0xff) << 8)
109 | ((out
[outpos
+ i
] >> 8) & 0xff));
114 inpos
+= old_in
- inavail
;
115 return (old_out
- outavail
) / sizeof (jchar
);
116 #else /* HAVE_ICONV */
118 #endif /* HAVE_ICONV */
122 gnu::gcj::convert::Input_iconv::done ()
125 // 50 bytes should be enough for any reset sequence.
129 // Calling iconv() with a NULL INBUF pointer will cause iconv() to
130 // switch to its initial state. We don't care about the output that
131 // might be generated in that situation.
132 iconv_adapter (iconv
, (iconv_t
) handle
, NULL
, NULL
, &p
, &avail
);
133 BytesToUnicode::done ();
134 #else /* HAVE_ICONV */
135 // If no iconv, do nothing
136 #endif /* HAVE_ICONV */
140 gnu::gcj::convert::Output_iconv::init (jstring encoding
)
143 jsize len
= _Jv_GetStringUTFLength (encoding
);
144 char buffer
[len
+ 1];
145 _Jv_GetStringUTFRegion (encoding
, 0, encoding
->length(), buffer
);
148 iconv_t h
= iconv_open (buffer
, "UCS-2");
149 if (h
== (iconv_t
) -1)
150 throw new java::io::UnsupportedEncodingException (encoding
);
152 JvAssert (h
!= NULL
);
153 handle
= reinterpret_cast<gnu::gcj::RawData
*> (h
);
154 #else /* HAVE_ICONV */
155 // If no iconv, just throw an exception.
156 throw new java::io::UnsupportedEncodingException (encoding
);
157 #endif /* HAVE_ICONV */
161 gnu::gcj::convert::Output_iconv::finalize (void)
166 iconv_close ((iconv_t
) handle
);
169 #endif /* HAVE_ICONV */
173 gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer
,
174 jint inpos
, jint inlength
)
177 jchar
*chars
= elements (inbuffer
);
178 jbyte
*out
= elements (buf
);
179 jchar
*temp_buffer
= NULL
;
181 size_t inavail
= inlength
* sizeof (jchar
);
182 size_t old_in
= inavail
;
184 size_t outavail
= buf
->length
- count
;
185 size_t old_out
= outavail
;
187 char *inbuf
= (char *) &chars
[inpos
];
188 char *outbuf
= (char *) &out
[count
];
192 // Ugly performance penalty -- don't use losing systems!
193 temp_buffer
= (jchar
*) _Jv_Malloc (inlength
* sizeof (jchar
));
194 for (int i
= 0; i
< inlength
; ++i
)
197 jchar c
= (((chars
[inpos
+ i
] & 0xff) << 8)
198 | ((chars
[inpos
+ i
] >> 8) & 0xff));
201 inbuf
= (char *) temp_buffer
;
204 size_t loop_old_in
= old_in
;
207 size_t r
= iconv_adapter (iconv
, (iconv_t
) handle
,
210 if (r
== (size_t) -1)
214 // Incomplete byte sequence at the end of the input
215 // buffer. This shouldn't be able to happen here.
218 else if (errno
== E2BIG
)
220 // Output buffer is too small.
223 else if (errno
== EILSEQ
|| inavail
== loop_old_in
)
225 // Untranslatable sequence. Since glibc 2.1.3 doesn't
226 // properly set errno, we also assume that this is what
227 // is happening if no conversions took place. (This can
228 // be a bogus assumption if in fact the output buffer is
229 // too small.) We skip the first character and try
242 if (temp_buffer
!= NULL
)
243 _Jv_Free (temp_buffer
);
245 count
+= old_out
- outavail
;
246 return (old_in
- inavail
) / sizeof (jchar
);
247 #else /* HAVE_ICONV */
249 #endif /* HAVE_ICONV */
253 gnu::gcj::convert::IOConverter::iconv_init (void)
255 // Some versions of iconv() always return their UCS-2 results in
256 // big-endian order, and they also require UCS-2 inputs to be in
257 // big-endian order. For instance, glibc 2.1.3 does this. If the
258 // UTF-8=>UCS-2 iconv converter has this feature, then we assume
259 // that all UCS-2 converters do. (This might not be the best
260 // heuristic, but is is all we've got.)
261 jboolean result
= false;
263 iconv_t handle
= iconv_open ("UCS-2", "UTF-8");
264 if (handle
!= (iconv_t
) -1)
271 // This is the UTF-8 encoding of \ufeff.
281 r
= iconv_adapter (iconv
, handle
, &inp
, &inc
, &outp
, &outc
);
282 // Conversion must be complete for us to use the result.
283 if (r
!= (size_t) -1 && inc
== 0 && outc
== 0)
284 result
= (c
!= 0xfeff);
286 // Release iconv handle.
287 iconv_close (handle
);
289 #endif /* HAVE_ICONV */
294 gnu::gcj::convert::Output_iconv::done ()
297 // 50 bytes should be enough for any reset sequence.
301 // Calling iconv() with a NULL INBUF pointer will cause iconv() to
302 // switch to its initial state. We don't care about the output that
303 // might be generated in that situation.
304 iconv_adapter (iconv
, (iconv_t
) handle
, NULL
, NULL
, &p
, &avail
);
305 UnicodeToBytes::done ();
306 #else /* HAVE_ICONV */
307 // If no iconv, do nothing
308 #endif /* HAVE_ICONV */