libjava/classpath/java/io/StreamTokenizer.java

   1 /* StreamTokenizer.java -- parses streams of characters into tokens
   2    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003  Free Software Foundation
   3
   4 This file is part of GNU Classpath.
   5
   6 GNU Classpath is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Classpath is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Classpath; see the file COPYING.  If not, write to the
  18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301 USA.
  20
  21 Linking this library statically or dynamically with other modules is
  22 making a combined work based on this library.  Thus, the terms and
  23 conditions of the GNU General Public License cover the whole
  24 combination.
  25
  26 As a special exception, the copyright holders of this library give you
  27 permission to link this library with independent modules to produce an
  28 executable, regardless of the license terms of these independent
  29 modules, and to copy and distribute the resulting executable under
  30 terms of your choice, provided that you also meet, for each linked
  31 independent module, the terms and conditions of the license of that
  32 module.  An independent module is a module which is not derived from
  33 or based on this library.  If you modify this library, you may extend
  34 this exception to your version of the library, but you are not
  35 obligated to do so.  If you do not wish to do so, delete this
  36 exception statement from your version. */
  37
  38 package java.io;
  39
  40 import gnu.java.lang.CPStringBuilder;
  41
  42 /**
  43  * This class parses streams of characters into tokens.  There are a
  44  * million-zillion flags that can be set to control the parsing, as
  45  * described under the various method headings.
  46  *
  47  * @author Warren Levy (warrenl@cygnus.com)
  48  * @date October 25, 1998.
  49  */
  50 /* Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3
  51  * "The Java Language Specification", ISBN 0-201-63451-1
  52  * plus online API docs for JDK 1.2 beta from http://www.javasoft.com.
  53  * Status:  Believed complete and correct.
  54  */
  55
  56 public class StreamTokenizer
  57 {
  58   /** A constant indicating that the end of the stream has been read. */
  59   public static final int TT_EOF = -1;
  60
  61   /** A constant indicating that the end of the line has been read. */
  62   public static final int TT_EOL = '\n';
  63
  64   /** A constant indicating that a number token has been read. */
  65   public static final int TT_NUMBER = -2;
  66
  67   /** A constant indicating that a word token has been read. */
  68   public static final int TT_WORD = -3;
  69
  70   /** A constant indicating that no tokens have been read yet. */
  71   private static final int TT_NONE = -4;
  72
  73   /**
  74    * Contains the type of the token read resulting from a call to nextToken
  75    * The rules are as follows:
  76    * <ul>
  77    * <li>For a token consisting of a single ordinary character, this is the
  78    *     value of that character.</li>
  79    * <li>For a quoted string, this is the value of the quote character</li>
  80    * <li>For a word, this is TT_WORD</li>
  81    * <li>For a number, this is TT_NUMBER</li>
  82    * <li>For the end of the line, this is TT_EOL</li>
  83    * <li>For the end of the stream, this is TT_EOF</li>
  84    * </ul>
  85    */
  86   public int ttype = TT_NONE;
  87
  88   /** The String associated with word and string tokens. */
  89   public String sval;
  90
  91   /** The numeric value associated with number tokens. */
  92   public double nval;
  93
  94   /* Indicates whether end-of-line is recognized as a token. */
  95   private boolean eolSignificant = false;
  96
  97   /* Indicates whether word tokens are automatically made lower case. */
  98   private boolean lowerCase = false;
  99
 100   /* Indicates whether C++ style comments are recognized and skipped. */
 101   private boolean slashSlash = false;
 102
 103   /* Indicates whether C style comments are recognized and skipped. */
 104   private boolean slashStar = false;
 105
 106   /* Attribute tables of each byte from 0x00 to 0xFF. */
 107   private boolean[] whitespace = new boolean[256];
 108   private boolean[] alphabetic = new boolean[256];
 109   private boolean[] numeric = new boolean[256];
 110   private boolean[] quote = new boolean[256];
 111   private boolean[] comment = new boolean[256];
 112
 113   /* The Reader associated with this class. */
 114   private PushbackReader in;
 115
 116   /* Indicates if a token has been pushed back. */
 117   private boolean pushedBack = false;
 118
 119   /* Contains the current line number of the reader. */
 120   private int lineNumber = 1;
 121
 122   /**
 123    * This method reads bytes from an <code>InputStream</code> and tokenizes
 124    * them.  For details on how this method operates by default, see
 125    * <code>StreamTokenizer(Reader)</code>.
 126    *
 127    * @param is The <code>InputStream</code> to read from
 128    *
 129    * @deprecated Since JDK 1.1.
 130    */
 131   public StreamTokenizer(InputStream is)
 132   {
 133     this(new InputStreamReader(is));
 134   }
 135
 136   /**
 137    * This method initializes a new <code>StreamTokenizer</code> to read
 138    * characters from a <code>Reader</code> and parse them.  The char values
 139    * have their hight bits masked so that the value is treated a character
 140    * in the range of 0x0000 to 0x00FF.
 141    * <p>
 142    * This constructor sets up the parsing table to parse the stream in the
 143    * following manner:
 144    * <ul>
 145    * <li>The values 'A' through 'Z', 'a' through 'z' and 0xA0 through 0xFF
 146    *     are initialized as alphabetic</li>
 147    * <li>The values 0x00 through 0x20 are initialized as whitespace</li>
 148    * <li>The values '\'' and '"' are initialized as quote characters</li>
 149    * <li>'/' is a comment character</li>
 150    * <li>Numbers will be parsed</li>
 151    * <li>EOL is not treated as significant</li>
 152    * <li>C  and C++ (//) comments are not recognized</li>
 153    * </ul>
 154    *
 155    * @param r The <code>Reader</code> to read chars from
 156    */
 157   public StreamTokenizer(Reader r)
 158   {
 159     in = new PushbackReader(r);
 160
 161     whitespaceChars(0x00, 0x20);
 162     wordChars('A', 'Z');
 163     wordChars('a', 'z');
 164     wordChars(0xA0, 0xFF);
 165     commentChar('/');
 166     quoteChar('\'');
 167     quoteChar('"');
 168     parseNumbers();
 169   }
 170
 171   /**
 172    * This method sets the comment attribute on the specified
 173    * character.  Other attributes for the character are cleared.
 174    *
 175    * @param ch The character to set the comment attribute for, passed as an int
 176    */
 177   public void commentChar(int ch)
 178   {
 179     if (ch >= 0 && ch <= 255)
 180       {
 181         comment[ch] = true;
 182         whitespace[ch] = false;
 183         alphabetic[ch] = false;
 184         numeric[ch] = false;
 185         quote[ch] = false;
 186       }
 187   }
 188
 189   /**
 190    * This method sets a flag that indicates whether or not the end of line
 191    * sequence terminates and is a token.  The defaults to <code>false</code>
 192    *
 193    * @param flag <code>true</code> if EOF is significant, <code>false</code>
 194    *             otherwise
 195    */
 196   public void eolIsSignificant(boolean flag)
 197   {
 198     eolSignificant = flag;
 199   }
 200
 201   /**
 202    * This method returns the current line number.  Note that if the
 203    * <code>pushBack()</code> method is called, it has no effect on the
 204    * line number returned by this method.
 205    *
 206    * @return The current line number
 207    */
 208   public int lineno()
 209   {
 210     return lineNumber;
 211   }
 212
 213   /**
 214    * This method sets a flag that indicates whether or not alphabetic
 215    * tokens that are returned should be converted to lower case.
 216    *
 217    * @param flag <code>true</code> to convert to lower case,
 218    *             <code>false</code> otherwise
 219    */
 220   public void lowerCaseMode(boolean flag)
 221   {
 222     lowerCase = flag;
 223   }
 224
 225   private boolean isWhitespace(int ch)
 226   {
 227     return (ch >= 0 && ch <= 255 && whitespace[ch]);
 228   }
 229
 230   private boolean isAlphabetic(int ch)
 231   {
 232     return ((ch > 255) || (ch >= 0 && alphabetic[ch]));
 233   }
 234
 235   private boolean isNumeric(int ch)
 236   {
 237     return (ch >= 0 && ch <= 255 && numeric[ch]);
 238   }
 239
 240   private boolean isQuote(int ch)
 241   {
 242     return (ch >= 0 && ch <= 255 && quote[ch]);
 243   }
 244
 245   private boolean isComment(int ch)
 246   {
 247     return (ch >= 0 && ch <= 255 && comment[ch]);
 248   }
 249
 250   /**
 251    * This method reads the next token from the stream.  It sets the
 252    * <code>ttype</code> variable to the appropriate token type and
 253    * returns it.  It also can set <code>sval</code> or <code>nval</code>
 254    * as described below.  The parsing strategy is as follows:
 255    * <ul>
 256    * <li>Skip any whitespace characters.</li>
 257    * <li>If a numeric character is encountered, attempt to parse a numeric
 258    * value.  Leading '-' characters indicate a numeric only if followed by
 259    * another non-'-' numeric.  The value of the numeric token is terminated
 260    * by either the first non-numeric encountered, or the second occurrence of
 261    * '-' or '.'.  The token type returned is TT_NUMBER and <code>nval</code>
 262    * is set to the value parsed.</li>
 263    * <li>If an alphabetic character is parsed, all subsequent characters
 264    * are read until the first non-alphabetic or non-numeric character is
 265    * encountered.  The token type returned is TT_WORD and the value parsed
 266    * is stored in <code>sval</code>.  If lower case mode is set, the token
 267    * stored in <code>sval</code> is converted to lower case.  The end of line
 268    * sequence terminates a word only if EOL signficance has been turned on.
 269    * The start of a comment also terminates a word.  Any character with a
 270    * non-alphabetic and non-numeric attribute (such as white space, a quote,
 271    * or a commet) are treated as non-alphabetic and terminate the word.</li>
 272    * <li>If a comment character is parsed, then all remaining characters on
 273    * the current line are skipped and another token is parsed.  Any EOL or
 274    * EOF's encountered are not discarded, but rather terminate the comment.</li>
 275    * <li>If a quote character is parsed, then all characters up to the
 276    * second occurrence of the same quote character are parsed into a
 277    * <code>String</code>.  This <code>String</code> is stored as
 278    * <code>sval</code>, but is not converted to lower case, even if lower case
 279    * mode is enabled.  The token type returned is the value of the quote
 280    * character encountered.  Any escape sequences
 281    * (\b (backspace), \t (HTAB), \n (linefeed), \f (form feed), \r
 282    * (carriage return), \" (double quote), \' (single quote), \\
 283    * (backslash), \XXX (octal esacpe)) are converted to the appropriate
 284    * char values.  Invalid esacape sequences are left in untranslated.
 285    * Unicode characters like ('\ u0000') are not recognized. </li>
 286    * <li>If the C++ comment sequence "//" is encountered, and the parser
 287    * is configured to handle that sequence, then the remainder of the line
 288    * is skipped and another token is read exactly as if a character with
 289    * the comment attribute was encountered.</li>
 290    * <li>If the C comment sequence "/*" is encountered, and the parser
 291    * is configured to handle that sequence, then all characters up to and
 292    * including the comment terminator sequence are discarded and another
 293    * token is parsed.</li>
 294    * <li>If all cases above are not met, then the character is an ordinary
 295    * character that is parsed as a token by itself.  The char encountered
 296    * is returned as the token type.</li>
 297    * </ul>
 298    *
 299    * @return The token type
 300    * @exception IOException If an I/O error occurs
 301    */
 302   public int nextToken() throws IOException
 303   {
 304     if (pushedBack)
 305       {
 306         pushedBack = false;
 307         if (ttype != TT_NONE)
 308           return ttype;
 309       }
 310
 311     sval = null;
 312     int ch;
 313
 314     // Skip whitespace.  Deal with EOL along the way.
 315     while (isWhitespace(ch = in.read()))
 316       if (ch == '\n' || ch == '\r')
 317         {
 318           lineNumber++;
 319
 320           // Throw away \n if in combination with \r.
 321           if (ch == '\r' && (ch = in.read()) != '\n')
 322             {
 323               if (ch != TT_EOF)
 324                 in.unread(ch);
 325             }
 326           if (eolSignificant)
 327             return (ttype = TT_EOL);
 328         }
 329
 330     if (ch == '/')
 331       if ((ch = in.read()) == '/' && slashSlash)
 332         {
 333           while ((ch = in.read()) != '\n' && ch != '\r' && ch != TT_EOF)
 334             ;
 335
 336           if (ch != TT_EOF)
 337             in.unread(ch);
 338           return nextToken(); // Recursive, but not too deep in normal cases
 339         }
 340       else if (ch == '*' && slashStar)
 341         {
 342           while (true)
 343             {
 344               ch = in.read();
 345               if (ch == '*')
 346                 {
 347                   if ((ch = in.read()) == '/')
 348                     break;
 349                   else if (ch != TT_EOF)
 350                     in.unread(ch);
 351                 }
 352               else if (ch == '\n' || ch == '\r')
 353                 {
 354                   lineNumber++;
 355                   if (ch == '\r' && (ch = in.read()) != '\n')
 356                     {
 357                       if (ch != TT_EOF)
 358                         in.unread(ch);
 359                     }
 360                 }
 361               else if (ch == TT_EOF)
 362                 {
 363                   break;
 364                 }
 365             }
 366           return nextToken(); // Recursive, but not too deep in normal cases
 367         }
 368       else
 369         {
 370           if (ch != TT_EOF)
 371             in.unread(ch);
 372           ch = '/';
 373         }
 374
 375     if (ch == TT_EOF)
 376       ttype = TT_EOF;
 377     else if (isNumeric(ch))
 378       {
 379         boolean isNegative = false;
 380         if (ch == '-')
 381           {
 382             // Read ahead to see if this is an ordinary '-' rather than numeric.
 383             ch = in.read();
 384             if (isNumeric(ch) && ch != '-')
 385               {
 386                 isNegative = true;
 387               }
 388             else
 389               {
 390                 if (ch != TT_EOF)
 391                   in.unread(ch);
 392                 return (ttype = '-');
 393               }
 394           }
 395
 396         CPStringBuilder tokbuf = new CPStringBuilder();
 397         tokbuf.append((char) ch);
 398
 399         int decCount = 0;
 400         while (isNumeric(ch = in.read()) && ch != '-')
 401           if (ch == '.' && decCount++ > 0)
 402             break;
 403           else
 404             tokbuf.append((char) ch);
 405
 406         if (ch != TT_EOF)
 407           in.unread(ch);
 408         ttype = TT_NUMBER;
 409         try
 410           {
 411             nval = Double.valueOf(tokbuf.toString()).doubleValue();
 412           }
 413         catch (NumberFormatException _)
 414           {
 415             nval = 0.0;
 416           }
 417         if (isNegative)
 418           nval = -nval;
 419       }
 420     else if (isAlphabetic(ch))
 421       {
 422         CPStringBuilder tokbuf = new CPStringBuilder();
 423         tokbuf.append((char) ch);
 424         while (isAlphabetic(ch = in.read()) || isNumeric(ch))
 425           tokbuf.append((char) ch);
 426         if (ch != TT_EOF)
 427           in.unread(ch);
 428         ttype = TT_WORD;
 429         sval = tokbuf.toString();
 430         if (lowerCase)
 431           sval = sval.toLowerCase();
 432       }
 433     else if (isComment(ch))
 434       {
 435         while ((ch = in.read()) != '\n' && ch != '\r' && ch != TT_EOF)
 436           ;
 437
 438         if (ch != TT_EOF)
 439           in.unread(ch);
 440         return nextToken();     // Recursive, but not too deep in normal cases.
 441       }
 442     else if (isQuote(ch))
 443       {
 444         ttype = ch;
 445         CPStringBuilder tokbuf = new CPStringBuilder();
 446         while ((ch = in.read()) != ttype && ch != '\n' && ch != '\r' &&
 447                ch != TT_EOF)
 448           {
 449             if (ch == '\\')
 450               switch (ch = in.read())
 451                 {
 452                   case 'a':     ch = 0x7;
 453                     break;
 454                   case 'b':     ch = '\b';
 455                     break;
 456                   case 'f':     ch = 0xC;
 457                     break;
 458                   case 'n':     ch = '\n';
 459                     break;
 460                   case 'r':     ch = '\r';
 461                     break;
 462                   case 't':     ch = '\t';
 463                     break;
 464                   case 'v':     ch = 0xB;
 465                     break;
 466                   case '\n':    ch = '\n';
 467                     break;
 468                   case '\r':    ch = '\r';
 469                     break;
 470                   case '\"':
 471                   case '\'':
 472                   case '\\':
 473                     break;
 474                   default:
 475                     int ch1, nextch;
 476                     if ((nextch = ch1 = ch) >= '0' && ch <= '7')
 477                       {
 478                         ch -= '0';
 479                         if ((nextch = in.read()) >= '0' && nextch <= '7')
 480                           {
 481                             ch = ch * 8 + nextch - '0';
 482                             if ((nextch = in.read()) >= '0' && nextch <= '7' &&
 483                                 ch1 >= '0' && ch1 <= '3')
 484                               {
 485                                 ch = ch * 8 + nextch - '0';
 486                                 nextch = in.read();
 487                               }
 488                           }
 489                       }
 490
 491                     if (nextch != TT_EOF)
 492                       in.unread(nextch);
 493                 }
 494
 495             tokbuf.append((char) ch);
 496           }
 497
 498         // Throw away matching quote char.
 499         if (ch != ttype && ch != TT_EOF)
 500           in.unread(ch);
 501
 502         sval = tokbuf.toString();
 503       }
 504     else
 505       {
 506         ttype = ch;
 507       }
 508
 509     return ttype;
 510   }
 511
 512   private void resetChar(int ch)
 513   {
 514     whitespace[ch] = alphabetic[ch] = numeric[ch] = quote[ch] = comment[ch] =
 515       false;
 516   }
 517
 518   /**
 519    * This method makes the specified character an ordinary character.  This
 520    * means that none of the attributes (whitespace, alphabetic, numeric,
 521    * quote, or comment) will be set on this character.  This character will
 522    * parse as its own token.
 523    *
 524    * @param ch The character to make ordinary, passed as an int
 525    */
 526   public void ordinaryChar(int ch)
 527   {
 528     if (ch >= 0 && ch <= 255)
 529       resetChar(ch);
 530   }
 531
 532   /**
 533    * This method makes all the characters in the specified range, range
 534    * terminators included, ordinary.  This means the none of the attributes
 535    * (whitespace, alphabetic, numeric, quote, or comment) will be set on
 536    * any of the characters in the range.  This makes each character in this
 537    * range parse as its own token.
 538    *
 539    * @param low The low end of the range of values to set the whitespace
 540    * attribute for
 541    * @param hi The high end of the range of values to set the whitespace
 542    * attribute for
 543    */
 544   public void ordinaryChars(int low, int hi)
 545   {
 546     if (low < 0)
 547       low = 0;
 548     if (hi > 255)
 549       hi = 255;
 550     for (int i = low; i <= hi; i++)
 551       resetChar(i);
 552   }
 553
 554   /**
 555    * This method sets the numeric attribute on the characters '0' - '9' and
 556    * the characters '.' and '-'.
 557    * When this method is used, the result of giving other attributes
 558    * (whitespace, quote, or comment) to the numeric characters may
 559    * vary depending on the implementation. For example, if
 560    * parseNumbers() and then whitespaceChars('1', '1') are called,
 561    * this implementation reads "121" as 2, while some other implementation
 562    * will read it as 21.
 563    */
 564   public void parseNumbers()
 565   {
 566     for (int i = 0; i <= 9; i++)
 567       numeric['0' + i] = true;
 568
 569     numeric['.'] = true;
 570     numeric['-'] = true;
 571   }
 572
 573   /**
 574    * Puts the current token back into the StreamTokenizer so
 575    * <code>nextToken</code> will return the same value on the next call.
 576    * May cause the lineno method to return an incorrect value
 577    * if lineno is called before the next call to nextToken.
 578    */
 579   public void pushBack()
 580   {
 581     pushedBack = true;
 582   }
 583
 584   /**
 585    * This method sets the quote attribute on the specified character.
 586    * Other attributes for the character are cleared.
 587    *
 588    * @param ch The character to set the quote attribute for, passed as an int.
 589    */
 590   public void quoteChar(int ch)
 591   {
 592     if (ch >= 0 && ch <= 255)
 593       {
 594         quote[ch] = true;
 595         comment[ch] = false;
 596         whitespace[ch] = false;
 597         alphabetic[ch] = false;
 598         numeric[ch] = false;
 599       }
 600   }
 601
 602   /**
 603    * This method removes all attributes (whitespace, alphabetic, numeric,
 604    * quote, and comment) from all characters.  It is equivalent to calling
 605    * <code>ordinaryChars(0x00, 0xFF)</code>.
 606    *
 607    * @see #ordinaryChars(int, int)
 608    */
 609   public void resetSyntax()
 610   {
 611     ordinaryChars(0x00, 0xFF);
 612   }
 613
 614   /**
 615    * This method sets a flag that indicates whether or not "C++" language style
 616    * comments ("//" comments through EOL ) are handled by the parser.
 617    * If this is <code>true</code> commented out sequences are skipped and
 618    * ignored by the parser.  This defaults to <code>false</code>.
 619    *
 620    * @param flag <code>true</code> to recognized and handle "C++" style
 621    *             comments, <code>false</code> otherwise
 622    */
 623   public void slashSlashComments(boolean flag)
 624   {
 625     slashSlash = flag;
 626   }
 627
 628   /**
 629    * This method sets a flag that indicates whether or not "C" language style
 630    * comments (with nesting not allowed) are handled by the parser.
 631    * If this is <code>true</code> commented out sequences are skipped and
 632    * ignored by the parser.  This defaults to <code>false</code>.
 633    *
 634    * @param flag <code>true</code> to recognized and handle "C" style comments,
 635    *             <code>false</code> otherwise
 636    */
 637   public void slashStarComments(boolean flag)
 638   {
 639     slashStar = flag;
 640   }
 641
 642   /**
 643    * This method returns the current token value as a <code>String</code> in
 644    * the form "Token[x], line n", where 'n' is the current line numbers and
 645    * 'x' is determined as follows.
 646    * <p>
 647    * <ul>
 648    * <li>If no token has been read, then 'x' is "NOTHING" and 'n' is 0</li>
 649    * <li>If <code>ttype</code> is TT_EOF, then 'x' is "EOF"</li>
 650    * <li>If <code>ttype</code> is TT_EOL, then 'x' is "EOL"</li>
 651    * <li>If <code>ttype</code> is TT_WORD, then 'x' is <code>sval</code></li>
 652    * <li>If <code>ttype</code> is TT_NUMBER, then 'x' is "n=strnval" where
 653    * 'strnval' is <code>String.valueOf(nval)</code>.</li>
 654    * <li>If <code>ttype</code> is a quote character, then 'x' is
 655    * <code>sval</code></li>
 656    * <li>For all other cases, 'x' is <code>ttype</code></li>
 657    * </ul>
 658    */
 659   public String toString()
 660   {
 661     String tempstr;
 662     if (ttype == TT_EOF)
 663       tempstr = "EOF";
 664     else if (ttype == TT_EOL)
 665       tempstr = "EOL";
 666     else if (ttype == TT_WORD)
 667       tempstr = sval;
 668     else if (ttype == TT_NUMBER)
 669       tempstr = "n=" + nval;
 670     else if (ttype == TT_NONE)
 671       tempstr = "NOTHING";
 672     else // must be an ordinary char.
 673       tempstr = "\'" + (char) ttype + "\'";
 674
 675     return "Token[" + tempstr + "], line " + lineno();
 676   }
 677
 678   /**
 679    * This method sets the whitespace attribute for all characters in the
 680    * specified range, range terminators included.
 681    *
 682    * @param low The low end of the range of values to set the whitespace
 683    * attribute for
 684    * @param hi The high end of the range of values to set the whitespace
 685    * attribute for
 686    */
 687   public void whitespaceChars(int low, int hi)
 688   {
 689     if (low < 0)
 690       low = 0;
 691     if (hi > 255)
 692       hi = 255;
 693     for (int i = low; i <= hi; i++)
 694       {
 695         resetChar(i);
 696         whitespace[i] = true;
 697       }
 698   }
 699
 700   /**
 701    * This method sets the alphabetic attribute for all characters in the
 702    * specified range, range terminators included.
 703    *
 704    * @param low The low end of the range of values to set the alphabetic
 705    * attribute for
 706    * @param hi The high end of the range of values to set the alphabetic
 707    * attribute for
 708    */
 709   public void wordChars(int low, int hi)
 710   {
 711     if (low < 0)
 712       low = 0;
 713     if (hi > 255)
 714       hi = 255;
 715     for (int i = low; i <= hi; i++)
 716       alphabetic[i] = true;
 717   }
 718 }