001 /* BreakIterator.java -- Breaks text into elements
002 Copyright (C) 1998, 1999, 2001, 2004, 2005, 2007
003 Free Software Foundation, Inc.
004
005 This file is part of GNU Classpath.
006
007 GNU Classpath is free software; you can redistribute it and/or modify
008 it under the terms of the GNU General Public License as published by
009 the Free Software Foundation; either version 2, or (at your option)
010 any later version.
011
012 GNU Classpath is distributed in the hope that it will be useful, but
013 WITHOUT ANY WARRANTY; without even the implied warranty of
014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 General Public License for more details.
016
017 You should have received a copy of the GNU General Public License
018 along with GNU Classpath; see the file COPYING. If not, write to the
019 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
020 02110-1301 USA.
021
022 Linking this library statically or dynamically with other modules is
023 making a combined work based on this library. Thus, the terms and
024 conditions of the GNU General Public License cover the whole
025 combination.
026
027 As a special exception, the copyright holders of this library give you
028 permission to link this library with independent modules to produce an
029 executable, regardless of the license terms of these independent
030 modules, and to copy and distribute the resulting executable under
031 terms of your choice, provided that you also meet, for each linked
032 independent module, the terms and conditions of the license of that
033 module. An independent module is a module which is not derived from
034 or based on this library. If you modify this library, you may extend
035 this exception to your version of the library, but you are not
036 obligated to do so. If you do not wish to do so, delete this
037 exception statement from your version. */
038
039
040 package java.text;
041
042 import gnu.java.locale.LocaleHelper;
043
044 import gnu.java.text.CharacterBreakIterator;
045 import gnu.java.text.LineBreakIterator;
046 import gnu.java.text.SentenceBreakIterator;
047 import gnu.java.text.WordBreakIterator;
048
049 import java.text.spi.BreakIteratorProvider;
050
051 import java.util.Locale;
052 import java.util.MissingResourceException;
053 import java.util.ResourceBundle;
054 import java.util.ServiceLoader;
055
056 /**
057 * This class iterates over text elements such as words, lines, sentences,
058 * and characters. It can only iterate over one of these text elements at
059 * a time. An instance of this class configured for the desired iteration
060 * type is created by calling one of the static factory methods, not
061 * by directly calling a constructor.
062 *
063 * The standard iterators created by the factory methods in this
064 * class will be valid upon creation. That is, their methods will
065 * not cause exceptions if called before you call setText().
066 *
067 * @author Tom Tromey (tromey@cygnus.com)
068 * @author Aaron M. Renn (arenn@urbanophile.com)
069 * @date March 19, 1999
070 */
071 /* Written using "Java Class Libraries", 2nd edition, plus online
072 * API docs for JDK 1.2 beta from http://www.javasoft.com.
073 * Status: Believed complete and correct to 1.1.
074 */
075 public abstract class BreakIterator implements Cloneable
076 {
077 /**
078 * This value is returned by the <code>next()</code> and
079 * <code>previous</code> in order to indicate that the end of the
080 * text has been reached.
081 */
082 // The value was discovered by writing a test program.
083 public static final int DONE = -1;
084
085 /**
086 * This method initializes a new instance of <code>BreakIterator</code>.
087 * This protected constructor is available to subclasses as a default
088 * no-arg superclass constructor.
089 */
090 protected BreakIterator ()
091 {
092 }
093
094 /**
095 * Create a clone of this object.
096 */
097 public Object clone ()
098 {
099 try
100 {
101 return super.clone();
102 }
103 catch (CloneNotSupportedException e)
104 {
105 return null;
106 }
107 }
108
109 /**
110 * This method returns the index of the current text element boundary.
111 *
112 * @return The current text boundary.
113 */
114 public abstract int current ();
115
116 /**
117 * This method returns the first text element boundary in the text being
118 * iterated over.
119 *
120 * @return The first text boundary.
121 */
122 public abstract int first ();
123
124 /**
125 * This methdod returns the offset of the text element boundary following
126 * the specified offset.
127 *
128 * @param pos The text index from which to find the next text boundary.
129 *
130 * @return The next text boundary following the specified index.
131 */
132 public abstract int following (int pos);
133
134 /**
135 * This method returns a list of locales for which instances of
136 * <code>BreakIterator</code> are available.
137 *
138 * @return A list of available locales
139 */
140 public static synchronized Locale[] getAvailableLocales ()
141 {
142 Locale[] l = new Locale[1];
143 l[0] = Locale.US;
144 return l;
145 }
146
147 private static BreakIterator getInstance (String type, Locale loc)
148 {
149 String className;
150 try
151 {
152 ResourceBundle res
153 = ResourceBundle.getBundle("gnu.java.locale.LocaleInformation",
154 loc, ClassLoader.getSystemClassLoader());
155 className = res.getString(type);
156 }
157 catch (MissingResourceException x)
158 {
159 return null;
160 }
161 try
162 {
163 Class k = Class.forName(className);
164 return (BreakIterator) k.newInstance();
165 }
166 catch (ClassNotFoundException x1)
167 {
168 return null;
169 }
170 catch (InstantiationException x2)
171 {
172 return null;
173 }
174 catch (IllegalAccessException x3)
175 {
176 return null;
177 }
178 }
179
180 /**
181 * This method returns an instance of <code>BreakIterator</code> that will
182 * iterate over characters as defined in the default locale.
183 *
184 * @return A <code>BreakIterator</code> instance for the default locale.
185 */
186 public static BreakIterator getCharacterInstance ()
187 {
188 return getCharacterInstance (Locale.getDefault());
189 }
190
191 /**
192 * This method returns an instance of <code>BreakIterator</code> that will
193 * iterate over characters as defined in the specified locale.
194 *
195 * @param locale The desired locale.
196 *
197 * @return A <code>BreakIterator</code> instance for the specified locale.
198 */
199 public static BreakIterator getCharacterInstance (Locale locale)
200 {
201 BreakIterator r = getInstance("CharacterIterator", locale);
202 if (r != null)
203 return r;
204 for (BreakIteratorProvider p :
205 ServiceLoader.load(BreakIteratorProvider.class))
206 {
207 for (Locale loc : p.getAvailableLocales())
208 {
209 if (loc.equals(locale))
210 {
211 BreakIterator bi = p.getCharacterInstance(locale);
212 if (bi != null)
213 return bi;
214 break;
215 }
216 }
217 }
218 if (locale.equals(Locale.ROOT))
219 return new CharacterBreakIterator();
220 return getCharacterInstance(LocaleHelper.getFallbackLocale(locale));
221 }
222
223 /**
224 * This method returns an instance of <code>BreakIterator</code> that will
225 * iterate over line breaks as defined in the default locale.
226 *
227 * @return A <code>BreakIterator</code> instance for the default locale.
228 */
229 public static BreakIterator getLineInstance ()
230 {
231 return getLineInstance (Locale.getDefault());
232 }
233
234 /**
235 * This method returns an instance of <code>BreakIterator</code> that will
236 * iterate over line breaks as defined in the specified locale.
237 *
238 * @param locale The desired locale.
239 *
240 * @return A <code>BreakIterator</code> instance for the default locale.
241 */
242 public static BreakIterator getLineInstance (Locale locale)
243 {
244 BreakIterator r = getInstance ("LineIterator", locale);
245 if (r != null)
246 return r;
247 for (BreakIteratorProvider p :
248 ServiceLoader.load(BreakIteratorProvider.class))
249 {
250 for (Locale loc : p.getAvailableLocales())
251 {
252 if (loc.equals(locale))
253 {
254 BreakIterator bi = p.getLineInstance(locale);
255 if (bi != null)
256 return bi;
257 break;
258 }
259 }
260 }
261 if (locale.equals(Locale.ROOT))
262 return new LineBreakIterator();
263 return getLineInstance(LocaleHelper.getFallbackLocale(locale));
264 }
265
266 /**
267 * This method returns an instance of <code>BreakIterator</code> that will
268 * iterate over sentences as defined in the default locale.
269 *
270 * @return A <code>BreakIterator</code> instance for the default locale.
271 */
272 public static BreakIterator getSentenceInstance ()
273 {
274 return getSentenceInstance (Locale.getDefault());
275 }
276
277 /**
278 * This method returns an instance of <code>BreakIterator</code> that will
279 * iterate over sentences as defined in the specified locale.
280 *
281 * @param locale The desired locale.
282 *
283 * @return A <code>BreakIterator</code> instance for the default locale.
284 */
285 public static BreakIterator getSentenceInstance (Locale locale)
286 {
287 BreakIterator r = getInstance ("SentenceIterator", locale);
288 if (r != null)
289 return r;
290 for (BreakIteratorProvider p :
291 ServiceLoader.load(BreakIteratorProvider.class))
292 {
293 for (Locale loc : p.getAvailableLocales())
294 {
295 if (loc.equals(locale))
296 {
297 BreakIterator bi = p.getSentenceInstance(locale);
298 if (bi != null)
299 return bi;
300 break;
301 }
302 }
303 }
304 if (locale.equals(Locale.ROOT))
305 return new SentenceBreakIterator();
306 return getSentenceInstance(LocaleHelper.getFallbackLocale(locale));
307 }
308
309 /**
310 * This method returns the text this object is iterating over as a
311 * <code>CharacterIterator</code>.
312 *
313 * @return The text being iterated over.
314 */
315 public abstract CharacterIterator getText ();
316
317 /**
318 * This method returns an instance of <code>BreakIterator</code> that will
319 * iterate over words as defined in the default locale.
320 *
321 * @return A <code>BreakIterator</code> instance for the default locale.
322 */
323 public static BreakIterator getWordInstance ()
324 {
325 return getWordInstance (Locale.getDefault());
326 }
327
328 /**
329 * This method returns an instance of <code>BreakIterator</code> that will
330 * iterate over words as defined in the specified locale.
331 *
332 * @param locale The desired locale.
333 *
334 * @return A <code>BreakIterator</code> instance for the default locale.
335 */
336 public static BreakIterator getWordInstance (Locale locale)
337 {
338 BreakIterator r = getInstance ("WordIterator", locale);
339 if (r != null)
340 return r;
341 for (BreakIteratorProvider p :
342 ServiceLoader.load(BreakIteratorProvider.class))
343 {
344 for (Locale loc : p.getAvailableLocales())
345 {
346 if (loc.equals(locale))
347 {
348 BreakIterator bi = p.getWordInstance(locale);
349 if (bi != null)
350 return bi;
351 break;
352 }
353 }
354 }
355 if (locale.equals(Locale.ROOT))
356 return new WordBreakIterator();
357 return getWordInstance(LocaleHelper.getFallbackLocale(locale));
358 }
359
360 /**
361 * This method tests whether or not the specified position is a text
362 * element boundary.
363 *
364 * @param pos The text position to test.
365 *
366 * @return <code>true</code> if the position is a boundary,
367 * <code>false</code> otherwise.
368 */
369 public boolean isBoundary (int pos)
370 {
371 if (pos == 0)
372 return true;
373 return following (pos - 1) == pos;
374 }
375
376 /**
377 * This method returns the last text element boundary in the text being
378 * iterated over.
379 *
380 * @return The last text boundary.
381 */
382 public abstract int last ();
383
384 /**
385 * This method returns the text element boundary following the current
386 * text position.
387 *
388 * @return The next text boundary.
389 */
390 public abstract int next ();
391
392 /**
393 * This method returns the n'th text element boundary following the current
394 * text position.
395 *
396 * @param n The number of text element boundaries to skip.
397 *
398 * @return The next text boundary.
399 */
400 public abstract int next (int n);
401
402 /**
403 * This methdod returns the offset of the text element boundary preceding
404 * the specified offset.
405 *
406 * @param pos The text index from which to find the preceding text boundary.
407 *
408 * @returns The next text boundary preceding the specified index.
409 */
410 public int preceding (int pos)
411 {
412 if (following (pos) == DONE)
413 last ();
414 while (previous () >= pos)
415 ;
416 return current ();
417 }
418
419 /**
420 * This method returns the text element boundary preceding the current
421 * text position.
422 *
423 * @return The previous text boundary.
424 */
425 public abstract int previous ();
426
427 /**
428 * This method sets the text string to iterate over.
429 *
430 * @param newText The <code>String</code> to iterate over.
431 */
432 public void setText (String newText)
433 {
434 setText (new StringCharacterIterator (newText));
435 }
436
437 /**
438 * This method sets the text to iterate over from the specified
439 * <code>CharacterIterator</code>.
440 *
441 * @param newText The desired <code>CharacterIterator</code>.
442 */
443 public abstract void setText (CharacterIterator newText);
444 }