Imported GNU Classpath 0.90
[official-gcc.git] / libjava / classpath / gnu / xml / stream / UnicodeReader.java
blob9350cb2e0b633948f7db001cda46908921f2bfdc
1 /* UnicodeReader.java --
2 Copyright (C) 2005 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
38 package gnu.xml.stream;
40 import java.io.IOException;
41 import java.io.Reader;
43 /**
44 * A reader that converts UTF-16 characters to Unicode code points.
46 * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
48 public class UnicodeReader
51 final Reader in;
53 UnicodeReader(Reader in)
55 this.in = in;
58 public void mark(int limit)
59 throws IOException
61 in.mark(limit * 2);
64 public void reset()
65 throws IOException
67 in.reset();
70 public int read()
71 throws IOException
73 int ret = in.read();
74 if (ret == -1)
75 return ret;
76 if (ret >= 0xd800 && ret < 0xdc00)
78 // Unicode surrogate?
79 int low = in.read();
80 if (low >= 0xdc00 && low < 0xe000)
81 ret = Character.toCodePoint((char) ret, (char) low);
82 else
83 throw new IOException("unpaired surrogate: U+" +
84 Integer.toHexString(ret));
86 else if (ret >= 0xdc00 && ret < 0xe000)
87 throw new IOException("unpaired surrogate: U+" +
88 Integer.toHexString(ret));
89 return ret;
92 public int read(int[] buf, int off, int len)
93 throws IOException
95 if (len == 0)
96 return 0;
97 char[] b2 = new char[len];
98 int ret = in.read(b2, 0, len);
99 if (ret <= 0)
100 return ret;
101 int l = ret - 1;
102 int i = 0, j = off;
103 for (; i < l; i++)
105 char c = b2[i];
106 if (c >= 0xd800 && c < 0xdc00)
108 // Unicode surrogate?
109 char d = b2[i + 1];
110 if (d >= 0xdc00 && d < 0xe000)
112 buf[j++] = Character.toCodePoint(c, d);
113 i++;
114 continue;
116 else
117 throw new IOException("unpaired surrogate: U+" +
118 Integer.toHexString(c));
120 else if (c >= 0xdc00 && c < 0xe000)
121 throw new IOException("unpaired surrogate: U+" +
122 Integer.toHexString(c));
123 buf[j++] = (int) c;
125 if (i == l)
127 // last char
128 char c = b2[l];
129 if (c >= 0xd800 && c < 0xdc00)
131 int low = in.read();
132 if (low >= 0xdc00 && low < 0xe000)
134 buf[j++] = Character.toCodePoint(c, (char) low);
135 return j;
137 else
138 throw new IOException("unpaired surrogate: U+" +
139 Integer.toHexString(c));
141 else if (c >= 0xdc00 && c < 0xe000)
142 throw new IOException("unpaired surrogate: U+" +
143 Integer.toHexString(c));
144 buf[j++] = (int) c;
146 return j;
149 public void close()
150 throws IOException
152 in.close();
156 * Returns the specified UTF-16 char array as an array of Unicode code
157 * points.
159 public static int[] toCodePointArray(String text)
160 throws IOException
162 char[] b2 = text.toCharArray();
163 int[] buf = new int[b2.length];
164 if (b2.length > 0)
166 int l = b2.length - 1;
167 int i = 0, j = 0;
168 for (; i < l; i++)
170 char c = b2[i];
171 if (c >= 0xd800 && c < 0xdc00)
173 // Unicode surrogate?
174 char d = b2[i + 1];
175 if (d >= 0xdc00 && d < 0xe000)
177 buf[j++] = Character.toCodePoint(c, d);
178 i++;
179 continue;
181 else
182 throw new IOException("unpaired surrogate: U+" +
183 Integer.toHexString(c));
185 else if (c >= 0xdc00 && c < 0xe000)
186 throw new IOException("unpaired surrogate: U+" +
187 Integer.toHexString(c));
188 buf[j++] = (int) c;
190 if (i == l)
192 // last char
193 buf[j++] = (int) b2[l];
194 if (j < buf.length)
196 int[] buf2 = new int[j];
197 System.arraycopy(buf, 0, buf2, 0, j);
198 buf = buf2;
202 return buf;