FSF GCC merge 02/23/03
[official-gcc.git] / libjava / gnu / java / text / SentenceBreakIterator.java
blob62b99f0fbf8bd5f95e21912ba1c5c656a7d428e9
1 /* SentenceBreakIterator.java - Default sentence BreakIterator.
2 Copyright (C) 1999, 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
39 package gnu.java.text;
41 import java.text.BreakIterator;
42 import java.text.CharacterIterator;
44 /**
45 * @author Tom Tromey <tromey@cygnus.com>
46 * @date March 23, 1999
47 * Written using The Unicode Standard, Version 2.0.
50 public class SentenceBreakIterator extends BaseBreakIterator
52 public Object clone ()
54 return new SentenceBreakIterator (this);
57 public SentenceBreakIterator ()
59 iter = null;
62 private SentenceBreakIterator (SentenceBreakIterator other)
64 iter = (CharacterIterator) other.iter.clone();
67 public int next ()
69 int end = iter.getEndIndex();
70 if (iter.getIndex() == end)
71 return DONE;
73 while (iter.getIndex() < end)
75 char c = iter.current();
76 if (c == CharacterIterator.DONE)
77 break;
78 int type = Character.getType(c);
80 char n = iter.next();
81 if (n == CharacterIterator.DONE)
82 break;
84 // Always break after paragraph separator.
85 if (type == Character.PARAGRAPH_SEPARATOR)
86 break;
88 if (c == '!' || c == '?')
90 // Skip close punctuation.
91 while (n != CharacterIterator.DONE
92 && Character.getType(n) == Character.END_PUNCTUATION)
93 n = iter.next();
94 // Skip (java) space, line and paragraph separators.
95 while (n != CharacterIterator.DONE && Character.isWhitespace(n))
96 n = iter.next();
98 // There's always a break somewhere after `!' or `?'.
99 break;
102 if (c == '.')
104 int save = iter.getIndex();
105 // Skip close punctuation.
106 while (n != CharacterIterator.DONE
107 && Character.getType(n) == Character.END_PUNCTUATION)
108 n = iter.next();
109 // Skip (java) space, line and paragraph separators.
110 // We keep count because we need at least one for this period to
111 // represent a terminator.
112 int spcount = 0;
113 while (n != CharacterIterator.DONE && Character.isWhitespace(n))
115 n = iter.next();
116 ++spcount;
118 if (spcount > 0)
120 int save2 = iter.getIndex();
121 // Skip over open puncutation.
122 while (n != CharacterIterator.DONE
123 && Character.getType(n) == Character.START_PUNCTUATION)
124 n = iter.next();
125 // Next character must not be lower case.
126 if (n == CharacterIterator.DONE
127 || ! Character.isLowerCase(n))
129 iter.setIndex(save2);
130 break;
133 iter.setIndex(save);
137 return iter.getIndex();
140 private final int previous_internal ()
142 int start = iter.getBeginIndex();
143 if (iter.getIndex() == start)
144 return DONE;
146 while (iter.getIndex() >= start)
148 char c = iter.previous();
149 if (c == CharacterIterator.DONE)
150 break;
152 char n = iter.previous();
153 if (n == CharacterIterator.DONE)
154 break;
155 iter.next();
156 int nt = Character.getType(n);
158 if (! Character.isLowerCase(c)
159 && (nt == Character.START_PUNCTUATION
160 || Character.isWhitespace(n)))
162 int save = iter.getIndex();
163 int save_nt = nt;
164 char save_n = n;
165 // Skip open punctuation.
166 while (n != CharacterIterator.DONE
167 && Character.getType(n) == Character.START_PUNCTUATION)
168 n = iter.previous();
169 if (n == CharacterIterator.DONE)
170 break;
171 if (Character.isWhitespace(n))
173 // Must have at least one (java) space after the `.'.
174 int save2 = iter.getIndex();
175 while (n != CharacterIterator.DONE
176 && Character.isWhitespace(n))
177 n = iter.previous();
178 // Skip close punctuation.
179 while (n != CharacterIterator.DONE
180 && Character.getType(n) == Character.END_PUNCTUATION)
181 n = iter.previous();
182 if (n == CharacterIterator.DONE || n == '.')
184 // Communicate location of actual end.
185 period = iter.getIndex();
186 iter.setIndex(save2);
187 break;
190 iter.setIndex(save);
191 nt = save_nt;
192 n = save_n;
195 if (nt == Character.PARAGRAPH_SEPARATOR)
197 // Communicate location of actual end.
198 period = iter.getIndex();
199 break;
201 else if (Character.isWhitespace(n)
202 || nt == Character.END_PUNCTUATION)
204 int save = iter.getIndex();
205 // Skip (java) space, line and paragraph separators.
206 while (n != CharacterIterator.DONE
207 && Character.isWhitespace(n))
208 n = iter.previous();
209 // Skip close punctuation.
210 while (n != CharacterIterator.DONE
211 && Character.getType(n) == Character.END_PUNCTUATION)
212 n = iter.previous();
213 int here = iter.getIndex();
214 iter.setIndex(save);
215 if (n == CharacterIterator.DONE || n == '!' || n == '?')
217 // Communicate location of actual end.
218 period = here;
219 break;
222 else if (n == '!' || n == '?')
224 // Communicate location of actual end.
225 period = iter.getIndex();
226 break;
230 return iter.getIndex();
233 public int previous ()
235 // We want to skip over the first sentence end to the second one.
236 // However, at the end of the string we want the first end.
237 int here = iter.getIndex();
238 period = here;
239 int first = previous_internal ();
240 if (here == iter.getEndIndex() || first == DONE)
241 return first;
242 iter.setIndex(period);
243 return previous_internal ();
246 // This is used for communication between previous and
247 // previous_internal.
248 private int period;