Reset branch to trunk.
[official-gcc.git] / trunk / libjava / classpath / gnu / xml / xpath / XPathTokenizer.java
blobbf4516afae7aca50356b4a3f2ea0979e69d01acf
1 /* XPathTokenizer.java --
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
38 package gnu.xml.xpath;
40 import gnu.java.lang.CPStringBuilder;
42 import java.io.BufferedReader;
43 import java.io.IOException;
44 import java.io.Reader;
45 import java.io.StringReader;
46 import java.util.Map;
47 import java.util.TreeMap;
49 /*import antlr.Token;
50 import antlr.TokenStream;
51 import antlr.TokenStreamException;
52 import antlr.TokenStreamIOException;*/
54 /**
55 * XPath 1.0 expression tokenizer.
57 * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
59 public class XPathTokenizer
60 implements XPathParser.yyInput
61 //implements TokenStream
64 static class XPathToken
65 //extends Token
68 int type;
69 String val;
71 XPathToken (int type)
73 this (type, null);
76 XPathToken (int type, String val)
78 //super (type);
79 this.type = type;
80 this.val = val;
83 public String getText ()
85 return val;
88 public String toString ()
90 return val;
95 static final Map<String,Integer> keywords = new TreeMap<String,Integer> ();
96 static
98 keywords.put ("ancestor", new Integer (XPathParser.ANCESTOR));
99 keywords.put ("ancestor-or-self", new Integer (XPathParser.ANCESTOR_OR_SELF));
100 keywords.put ("attribute", new Integer (XPathParser.ATTRIBUTE));
101 keywords.put ("child", new Integer (XPathParser.CHILD));
102 keywords.put ("descendant", new Integer (XPathParser.DESCENDANT));
103 keywords.put ("descendant-or-self", new Integer (XPathParser.DESCENDANT_OR_SELF));
104 keywords.put ("following", new Integer (XPathParser.FOLLOWING));
105 keywords.put ("following-sibling", new Integer (XPathParser.FOLLOWING_SIBLING));
106 keywords.put ("namespace", new Integer (XPathParser.NAMESPACE));
107 keywords.put ("parent", new Integer (XPathParser.PARENT));
108 keywords.put ("preceding", new Integer (XPathParser.PRECEDING));
109 keywords.put ("preceding-sibling", new Integer (XPathParser.PRECEDING_SIBLING));
110 keywords.put ("self", new Integer (XPathParser.SELF));
111 keywords.put ("div", new Integer (XPathParser.DIV));
112 keywords.put ("mod", new Integer (XPathParser.MOD));
113 keywords.put ("or", new Integer (XPathParser.OR));
114 keywords.put ("and", new Integer (XPathParser.AND));
115 keywords.put ("comment", new Integer (XPathParser.COMMENT));
116 keywords.put ("processing-instruction", new Integer (XPathParser.PROCESSING_INSTRUCTION));
117 keywords.put ("text", new Integer (XPathParser.TEXT));
118 keywords.put ("node", new Integer (XPathParser.NODE));
121 Reader in;
122 XPathToken token;
123 XPathToken lastToken;
125 public XPathTokenizer (String expr)
127 this (new StringReader (expr));
130 XPathTokenizer (Reader in)
132 this.in = in.markSupported () ? in : new BufferedReader (in);
135 /* Begin ANTLR specific *
137 public Token nextToken ()
138 throws TokenStreamException
142 if (!advance ())
144 throw new TokenStreamException ("eof");
146 token ();
147 return token;
149 catch (IOException e)
151 throw new TokenStreamIOException (e);
155 * End ANTLR specific */
157 public boolean advance ()
158 throws IOException
160 lastToken = token;
161 int c = in.read ();
162 switch (c)
164 case -1: // eof
165 return false;
166 case 0x20:
167 case 0x09:
168 case 0x0d:
169 case 0x0a: // skip whitespace
170 return advance ();
171 case 0x22: // "
172 case 0x27: // '
173 token = consume_literal (c);
174 break;
175 case 0x28: // (
176 token = new XPathToken (XPathParser.LP);
177 break;
178 case 0x29: // )
179 token = new XPathToken (XPathParser.RP);
180 break;
181 case 0x5b: // [
182 token = new XPathToken (XPathParser.LB);
183 break;
184 case 0x5d: // ]
185 token = new XPathToken (XPathParser.RB);
186 break;
187 case 0x2c: // ,
188 token = new XPathToken (XPathParser.COMMA);
189 break;
190 case 0x7c: // |
191 token = new XPathToken (XPathParser.PIPE);
192 break;
193 case 0x2f: // /
194 in.mark (1);
195 int d1 = in.read ();
196 if (d1 == 0x2f)
198 token = new XPathToken (XPathParser.DOUBLE_SLASH);
200 else
202 in.reset ();
203 token = new XPathToken (XPathParser.SLASH);
205 break;
206 case 0x3d: // =
207 token = new XPathToken (XPathParser.EQ);
208 break;
209 case 0x21: // !
210 in.mark (1);
211 int d2 = in.read ();
212 if (d2 == 0x3d) // =
214 token = new XPathToken (XPathParser.NE);
216 else
218 in.reset ();
219 token = new XPathToken (XPathParser.yyErrorCode);
221 break;
222 case 0x3e: // >
223 in.mark (1);
224 int d3 = in.read ();
225 if (d3 == 0x3d) // =
227 token = new XPathToken (XPathParser.GTE);
229 else
231 in.reset ();
232 token = new XPathToken (XPathParser.GT);
234 break;
235 case 0x3c: // <
236 in.mark (1);
237 int d4 = in.read ();
238 if (d4 == 0x3d) // =
240 token = new XPathToken (XPathParser.LTE);
242 else
244 in.reset ();
245 token = new XPathToken (XPathParser.LT);
247 break;
248 case 0x2b: // +
249 token = new XPathToken (XPathParser.PLUS);
250 break;
251 case 0x2d: // -
252 token = new XPathToken (XPathParser.MINUS);
253 break;
254 case 0x40: // @
255 token = new XPathToken (XPathParser.AT);
256 break;
257 case 0x2a: // *
258 token = new XPathToken (XPathParser.STAR);
259 break;
260 case 0x24: // $
261 token = new XPathToken (XPathParser.DOLLAR);
262 break;
263 case 0x3a: // :
264 in.mark (1);
265 int d5 = in.read ();
266 if (d5 == 0x3a)
268 token = new XPathToken (XPathParser.DOUBLE_COLON);
270 else
272 in.reset ();
273 token = new XPathToken (XPathParser.COLON);
275 break;
276 case 0x2e: // .
277 in.mark (1);
278 int d6 = in.read ();
279 if (d6 == 0x2e)
281 token = new XPathToken (XPathParser.DOUBLE_DOT);
283 else
285 in.reset ();
286 token = new XPathToken (XPathParser.DOT);
288 break;
289 default:
290 if (c >= 0x30 && c <= 0x39)
292 token = consume_digits (c);
294 else if (c == 0x5f || Character.isLetter ((char) c))
296 token = consume_name (c);
298 else
300 token = new XPathToken (XPathParser.yyErrorCode);
303 return true;
306 public int token ()
308 return token.type;
311 public Object value ()
313 return token.val;
316 XPathToken consume_literal (int delimiter)
317 throws IOException
319 CPStringBuilder buf = new CPStringBuilder ();
320 while (true)
322 int c = in.read ();
323 if (c == -1)
325 return new XPathToken (XPathParser.yyErrorCode);
327 else if (c == delimiter)
329 return new XPathToken (XPathParser.LITERAL, buf.toString ());
331 else
333 buf.append ((char) c);
338 XPathToken consume_digits (int c)
339 throws IOException
341 CPStringBuilder buf = new CPStringBuilder ();
342 buf.append ((char) c);
343 while (true)
345 in.mark (1);
346 c = in.read ();
347 if (c >= 0x30 && c <= 0x39)
349 buf.append ((char) c);
351 else
353 in.reset ();
354 return new XPathToken (XPathParser.DIGITS, buf.toString ());
359 XPathToken consume_name (int c)
360 throws IOException
362 CPStringBuilder buf = new CPStringBuilder ();
363 buf.append ((char) c);
364 while (true)
366 in.mark (1);
367 c = in.read ();
368 if (isNameChar (c))
370 buf.append ((char) c);
372 else
374 in.reset ();
375 String name = buf.toString ();
376 Integer keyword = (Integer) keywords.get (name);
377 if (keyword == null)
379 return new XPathToken (XPathParser.NAME, name);
381 else
383 int val = keyword.intValue ();
384 switch (val)
386 case XPathParser.NODE:
387 case XPathParser.COMMENT:
388 case XPathParser.TEXT:
389 case XPathParser.PROCESSING_INSTRUCTION:
390 // Consume subsequent (
391 in.mark (1);
394 c = in.read ();
396 while (c == 0x20 || c == 0x09);
397 if (c != 0x28)
399 in.reset ();
400 return new XPathToken (XPathParser.NAME, name);
402 break;
403 case XPathParser.CHILD:
404 case XPathParser.PARENT:
405 case XPathParser.SELF:
406 case XPathParser.DESCENDANT:
407 case XPathParser.ANCESTOR:
408 case XPathParser.DESCENDANT_OR_SELF:
409 case XPathParser.ANCESTOR_OR_SELF:
410 case XPathParser.ATTRIBUTE:
411 case XPathParser.NAMESPACE:
412 case XPathParser.FOLLOWING:
413 case XPathParser.FOLLOWING_SIBLING:
414 case XPathParser.PRECEDING:
415 case XPathParser.PRECEDING_SIBLING:
416 // Check that this is an axis specifier
417 in.mark(1);
420 c = in.read();
422 while (c == 0x20 || c == 0x09);
423 if (c == 0x3a)
425 c = in.read();
426 if (c == 0x3a)
428 in.reset();
429 return new XPathToken(val);
432 in.reset();
433 return new XPathToken(XPathParser.NAME, name);
434 case XPathParser.DIV:
435 case XPathParser.MOD:
436 // May be a name
437 if (lastToken == null)
439 return new XPathToken(XPathParser.NAME, name);
441 switch (lastToken.type)
443 case XPathParser.LP:
444 case XPathParser.LB:
445 case XPathParser.COMMA:
446 case XPathParser.PIPE:
447 case XPathParser.EQ:
448 case XPathParser.NE:
449 case XPathParser.GT:
450 case XPathParser.LT:
451 case XPathParser.GTE:
452 case XPathParser.LTE:
453 case XPathParser.PLUS:
454 case XPathParser.MINUS:
455 case XPathParser.STAR:
456 case XPathParser.AT:
457 case XPathParser.DOLLAR:
458 case XPathParser.COLON:
459 case XPathParser.DOUBLE_COLON:
460 case XPathParser.DIV:
461 case XPathParser.MOD:
462 case XPathParser.OR:
463 case XPathParser.AND:
464 case XPathParser.SLASH:
465 return new XPathToken(XPathParser.NAME, name);
467 break;
469 return new XPathToken (val);
475 boolean isNameChar (int c)
477 /* Name */
478 return (c == 0x5f
479 || c == 0x2d
480 || c == 0x2e
481 || (c >= 0x30 && c <= 0x39)
482 /* CombiningChar */
483 || (c >= 0x0300 && c <= 0x0345)
484 || (c >= 0x0360 && c <= 0x0361)
485 || (c >= 0x0483 && c <= 0x0486)
486 || (c >= 0x0591 && c <= 0x05A1)
487 || (c >= 0x05A3 && c <= 0x05B9)
488 || (c >= 0x05BB && c <= 0x05BD)
489 || c == 0x05BF
490 || (c >= 0x05C1 && c <= 0x05C2)
491 || c == 0x05C4
492 || (c >= 0x064B && c <= 0x0652)
493 || c == 0x0670
494 || (c >= 0x06D6 && c <= 0x06DC)
495 || (c >= 0x06DD && c <= 0x06DF)
496 || (c >= 0x06E0 && c <= 0x06E4)
497 || (c >= 0x06E7 && c <= 0x06E8)
498 || (c >= 0x06EA && c <= 0x06ED)
499 || (c >= 0x0901 && c <= 0x0903)
500 || c == 0x093C
501 || (c >= 0x093E && c <= 0x094C)
502 || c == 0x094D
503 || (c >= 0x0951 && c <= 0x0954)
504 || (c >= 0x0962 && c <= 0x0963)
505 || (c >= 0x0981 && c <= 0x0983)
506 || c == 0x09BC
507 || c == 0x09BE
508 || c == 0x09BF
509 || (c >= 0x09C0 && c <= 0x09C4)
510 || (c >= 0x09C7 && c <= 0x09C8)
511 || (c >= 0x09CB && c <= 0x09CD)
512 || c == 0x09D7
513 || (c >= 0x09E2 && c <= 0x09E3)
514 || c == 0x0A02
515 || c == 0x0A3C
516 || c == 0x0A3E
517 || c == 0x0A3F
518 || (c >= 0x0A40 && c <= 0x0A42)
519 || (c >= 0x0A47 && c <= 0x0A48)
520 || (c >= 0x0A4B && c <= 0x0A4D)
521 || (c >= 0x0A70 && c <= 0x0A71)
522 || (c >= 0x0A81 && c <= 0x0A83)
523 || c == 0x0ABC
524 || (c >= 0x0ABE && c <= 0x0AC5)
525 || (c >= 0x0AC7 && c <= 0x0AC9)
526 || (c >= 0x0ACB && c <= 0x0ACD)
527 || (c >= 0x0B01 && c <= 0x0B03)
528 || c == 0x0B3C
529 || (c >= 0x0B3E && c <= 0x0B43)
530 || (c >= 0x0B47 && c <= 0x0B48)
531 || (c >= 0x0B4B && c <= 0x0B4D)
532 || (c >= 0x0B56 && c <= 0x0B57)
533 || (c >= 0x0B82 && c <= 0x0B83)
534 || (c >= 0x0BBE && c <= 0x0BC2)
535 || (c >= 0x0BC6 && c <= 0x0BC8)
536 || (c >= 0x0BCA && c <= 0x0BCD)
537 || c == 0x0BD7
538 || (c >= 0x0C01 && c <= 0x0C03)
539 || (c >= 0x0C3E && c <= 0x0C44)
540 || (c >= 0x0C46 && c <= 0x0C48)
541 || (c >= 0x0C4A && c <= 0x0C4D)
542 || (c >= 0x0C55 && c <= 0x0C56)
543 || (c >= 0x0C82 && c <= 0x0C83)
544 || (c >= 0x0CBE && c <= 0x0CC4)
545 || (c >= 0x0CC6 && c <= 0x0CC8)
546 || (c >= 0x0CCA && c <= 0x0CCD)
547 || (c >= 0x0CD5 && c <= 0x0CD6)
548 || (c >= 0x0D02 && c <= 0x0D03)
549 || (c >= 0x0D3E && c <= 0x0D43)
550 || (c >= 0x0D46 && c <= 0x0D48)
551 || (c >= 0x0D4A && c <= 0x0D4D)
552 || c == 0x0D57
553 || c == 0x0E31
554 || (c >= 0x0E34 && c <= 0x0E3A)
555 || (c >= 0x0E47 && c <= 0x0E4E)
556 || c == 0x0EB1
557 || (c >= 0x0EB4 && c <= 0x0EB9)
558 || (c >= 0x0EBB && c <= 0x0EBC)
559 || (c >= 0x0EC8 && c <= 0x0ECD)
560 || (c >= 0x0F18 && c <= 0x0F19)
561 || c == 0x0F35
562 || c == 0x0F37
563 || c == 0x0F39
564 || c == 0x0F3E
565 || c == 0x0F3F
566 || (c >= 0x0F71 && c <= 0x0F84)
567 || (c >= 0x0F86 && c <= 0x0F8B)
568 || (c >= 0x0F90 && c <= 0x0F95)
569 || c == 0x0F97
570 || (c >= 0x0F99 && c <= 0x0FAD)
571 || (c >= 0x0FB1 && c <= 0x0FB7)
572 || c == 0x0FB9
573 || (c >= 0x20D0 && c <= 0x20DC)
574 || c == 0x20E1
575 || (c >= 0x302A && c <= 0x302F)
576 || c == 0x3099
577 || c == 0x309A
578 /* Extender */
579 || c == 0x00B7
580 || c == 0x02D0
581 || c == 0x02D1
582 || c == 0x0387
583 || c == 0x0640
584 || c == 0x0E46
585 || c == 0x0EC6
586 || c == 0x3005
587 || (c >= 0x3031 && c <= 0x3035)
588 || (c >= 0x309D && c <= 0x309E)
589 || (c >= 0x30FC && c <= 0x30FE)
590 /* Name */
591 || Character.isLetter ((char) c));