1 /* XPathTokenizer.java --
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
38 package gnu
.xml
.xpath
;
40 import java
.io
.BufferedReader
;
41 import java
.io
.IOException
;
42 import java
.io
.Reader
;
43 import java
.io
.StringReader
;
45 import java
.util
.TreeMap
;
48 import antlr.TokenStream;
49 import antlr.TokenStreamException;
50 import antlr.TokenStreamIOException;*/
53 * XPath 1.0 expression tokenizer.
55 * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
57 public class XPathTokenizer
58 implements XPathParser
.yyInput
59 //implements TokenStream
62 static class XPathToken
74 XPathToken (int type
, String val
)
81 public String
getText ()
86 public String
toString ()
93 static final Map keywords
= new TreeMap ();
96 keywords
.put ("ancestor", new Integer (XPathParser
.ANCESTOR
));
97 keywords
.put ("ancestor-or-self", new Integer (XPathParser
.ANCESTOR_OR_SELF
));
98 keywords
.put ("attribute", new Integer (XPathParser
.ATTRIBUTE
));
99 keywords
.put ("child", new Integer (XPathParser
.CHILD
));
100 keywords
.put ("descendant", new Integer (XPathParser
.DESCENDANT
));
101 keywords
.put ("descendant-or-self", new Integer (XPathParser
.DESCENDANT_OR_SELF
));
102 keywords
.put ("following", new Integer (XPathParser
.FOLLOWING
));
103 keywords
.put ("following-sibling", new Integer (XPathParser
.FOLLOWING_SIBLING
));
104 keywords
.put ("namespace", new Integer (XPathParser
.NAMESPACE
));
105 keywords
.put ("parent", new Integer (XPathParser
.PARENT
));
106 keywords
.put ("preceding", new Integer (XPathParser
.PRECEDING
));
107 keywords
.put ("preceding-sibling", new Integer (XPathParser
.PRECEDING_SIBLING
));
108 keywords
.put ("self", new Integer (XPathParser
.SELF
));
109 keywords
.put ("div", new Integer (XPathParser
.DIV
));
110 keywords
.put ("mod", new Integer (XPathParser
.MOD
));
111 keywords
.put ("or", new Integer (XPathParser
.OR
));
112 keywords
.put ("and", new Integer (XPathParser
.AND
));
113 keywords
.put ("comment", new Integer (XPathParser
.COMMENT
));
114 keywords
.put ("processing-instruction", new Integer (XPathParser
.PROCESSING_INSTRUCTION
));
115 keywords
.put ("text", new Integer (XPathParser
.TEXT
));
116 keywords
.put ("node", new Integer (XPathParser
.NODE
));
121 XPathToken lastToken
;
123 public XPathTokenizer (String expr
)
125 this (new StringReader (expr
));
128 XPathTokenizer (Reader in
)
130 this.in
= in
.markSupported () ? in
: new BufferedReader (in
);
133 /* Begin ANTLR specific *
135 public Token nextToken ()
136 throws TokenStreamException
142 throw new TokenStreamException ("eof");
147 catch (IOException e)
149 throw new TokenStreamIOException (e);
153 * End ANTLR specific */
155 public boolean advance ()
167 case 0x0a: // skip whitespace
171 token
= consume_literal (c
);
174 token
= new XPathToken (XPathParser
.LP
);
177 token
= new XPathToken (XPathParser
.RP
);
180 token
= new XPathToken (XPathParser
.LB
);
183 token
= new XPathToken (XPathParser
.RB
);
186 token
= new XPathToken (XPathParser
.COMMA
);
189 token
= new XPathToken (XPathParser
.PIPE
);
196 token
= new XPathToken (XPathParser
.DOUBLE_SLASH
);
201 token
= new XPathToken (XPathParser
.SLASH
);
205 token
= new XPathToken (XPathParser
.EQ
);
212 token
= new XPathToken (XPathParser
.NE
);
217 token
= new XPathToken (XPathParser
.yyErrorCode
);
225 token
= new XPathToken (XPathParser
.GTE
);
230 token
= new XPathToken (XPathParser
.GT
);
238 token
= new XPathToken (XPathParser
.LTE
);
243 token
= new XPathToken (XPathParser
.LT
);
247 token
= new XPathToken (XPathParser
.PLUS
);
250 token
= new XPathToken (XPathParser
.MINUS
);
253 token
= new XPathToken (XPathParser
.AT
);
256 token
= new XPathToken (XPathParser
.STAR
);
259 token
= new XPathToken (XPathParser
.DOLLAR
);
266 token
= new XPathToken (XPathParser
.DOUBLE_COLON
);
271 token
= new XPathToken (XPathParser
.COLON
);
279 token
= new XPathToken (XPathParser
.DOUBLE_DOT
);
284 token
= new XPathToken (XPathParser
.DOT
);
288 if (c
>= 0x30 && c
<= 0x39)
290 token
= consume_digits (c
);
292 else if (c
== 0x5f || Character
.isLetter ((char) c
))
294 token
= consume_name (c
);
298 token
= new XPathToken (XPathParser
.yyErrorCode
);
309 public Object
value ()
314 XPathToken
consume_literal (int delimiter
)
317 StringBuffer buf
= new StringBuffer ();
323 return new XPathToken (XPathParser
.yyErrorCode
);
325 else if (c
== delimiter
)
327 return new XPathToken (XPathParser
.LITERAL
, buf
.toString ());
331 buf
.append ((char) c
);
336 XPathToken
consume_digits (int c
)
339 StringBuffer buf
= new StringBuffer ();
340 buf
.append ((char) c
);
345 if (c
>= 0x30 && c
<= 0x39)
347 buf
.append ((char) c
);
352 return new XPathToken (XPathParser
.DIGITS
, buf
.toString ());
357 XPathToken
consume_name (int c
)
360 StringBuffer buf
= new StringBuffer ();
361 buf
.append ((char) c
);
368 buf
.append ((char) c
);
373 String name
= buf
.toString ();
374 Integer keyword
= (Integer
) keywords
.get (name
);
377 return new XPathToken (XPathParser
.NAME
, name
);
381 int val
= keyword
.intValue ();
384 case XPathParser
.NODE
:
385 case XPathParser
.COMMENT
:
386 case XPathParser
.TEXT
:
387 case XPathParser
.PROCESSING_INSTRUCTION
:
388 // Consume subsequent (
394 while (c
== 0x20 || c
== 0x09);
398 return new XPathToken (XPathParser
.NAME
, name
);
401 case XPathParser
.CHILD
:
402 case XPathParser
.PARENT
:
403 case XPathParser
.SELF
:
404 case XPathParser
.DESCENDANT
:
405 case XPathParser
.ANCESTOR
:
406 case XPathParser
.DESCENDANT_OR_SELF
:
407 case XPathParser
.ANCESTOR_OR_SELF
:
408 case XPathParser
.ATTRIBUTE
:
409 case XPathParser
.NAMESPACE
:
410 case XPathParser
.FOLLOWING
:
411 case XPathParser
.FOLLOWING_SIBLING
:
412 case XPathParser
.PRECEDING
:
413 case XPathParser
.PRECEDING_SIBLING
:
414 // Check that this is an axis specifier
420 while (c
== 0x20 || c
== 0x09);
427 return new XPathToken(val
);
431 return new XPathToken(XPathParser
.NAME
, name
);
432 case XPathParser
.DIV
:
433 case XPathParser
.MOD
:
435 if (lastToken
== null)
437 return new XPathToken(XPathParser
.NAME
, name
);
439 switch (lastToken
.type
)
443 case XPathParser
.COMMA
:
444 case XPathParser
.PIPE
:
449 case XPathParser
.GTE
:
450 case XPathParser
.LTE
:
451 case XPathParser
.PLUS
:
452 case XPathParser
.MINUS
:
453 case XPathParser
.STAR
:
455 case XPathParser
.DOLLAR
:
456 case XPathParser
.COLON
:
457 case XPathParser
.DOUBLE_COLON
:
458 case XPathParser
.DIV
:
459 case XPathParser
.MOD
:
461 case XPathParser
.AND
:
462 case XPathParser
.SLASH
:
463 return new XPathToken(XPathParser
.NAME
, name
);
467 return new XPathToken (val
);
473 boolean isNameChar (int c
)
479 || (c
>= 0x30 && c
<= 0x39)
481 || (c
>= 0x0300 && c
<= 0x0345)
482 || (c
>= 0x0360 && c
<= 0x0361)
483 || (c
>= 0x0483 && c
<= 0x0486)
484 || (c
>= 0x0591 && c
<= 0x05A1)
485 || (c
>= 0x05A3 && c
<= 0x05B9)
486 || (c
>= 0x05BB && c
<= 0x05BD)
488 || (c
>= 0x05C1 && c
<= 0x05C2)
490 || (c
>= 0x064B && c
<= 0x0652)
492 || (c
>= 0x06D6 && c
<= 0x06DC)
493 || (c
>= 0x06DD && c
<= 0x06DF)
494 || (c
>= 0x06E0 && c
<= 0x06E4)
495 || (c
>= 0x06E7 && c
<= 0x06E8)
496 || (c
>= 0x06EA && c
<= 0x06ED)
497 || (c
>= 0x0901 && c
<= 0x0903)
499 || (c
>= 0x093E && c
<= 0x094C)
501 || (c
>= 0x0951 && c
<= 0x0954)
502 || (c
>= 0x0962 && c
<= 0x0963)
503 || (c
>= 0x0981 && c
<= 0x0983)
507 || (c
>= 0x09C0 && c
<= 0x09C4)
508 || (c
>= 0x09C7 && c
<= 0x09C8)
509 || (c
>= 0x09CB && c
<= 0x09CD)
511 || (c
>= 0x09E2 && c
<= 0x09E3)
516 || (c
>= 0x0A40 && c
<= 0x0A42)
517 || (c
>= 0x0A47 && c
<= 0x0A48)
518 || (c
>= 0x0A4B && c
<= 0x0A4D)
519 || (c
>= 0x0A70 && c
<= 0x0A71)
520 || (c
>= 0x0A81 && c
<= 0x0A83)
522 || (c
>= 0x0ABE && c
<= 0x0AC5)
523 || (c
>= 0x0AC7 && c
<= 0x0AC9)
524 || (c
>= 0x0ACB && c
<= 0x0ACD)
525 || (c
>= 0x0B01 && c
<= 0x0B03)
527 || (c
>= 0x0B3E && c
<= 0x0B43)
528 || (c
>= 0x0B47 && c
<= 0x0B48)
529 || (c
>= 0x0B4B && c
<= 0x0B4D)
530 || (c
>= 0x0B56 && c
<= 0x0B57)
531 || (c
>= 0x0B82 && c
<= 0x0B83)
532 || (c
>= 0x0BBE && c
<= 0x0BC2)
533 || (c
>= 0x0BC6 && c
<= 0x0BC8)
534 || (c
>= 0x0BCA && c
<= 0x0BCD)
536 || (c
>= 0x0C01 && c
<= 0x0C03)
537 || (c
>= 0x0C3E && c
<= 0x0C44)
538 || (c
>= 0x0C46 && c
<= 0x0C48)
539 || (c
>= 0x0C4A && c
<= 0x0C4D)
540 || (c
>= 0x0C55 && c
<= 0x0C56)
541 || (c
>= 0x0C82 && c
<= 0x0C83)
542 || (c
>= 0x0CBE && c
<= 0x0CC4)
543 || (c
>= 0x0CC6 && c
<= 0x0CC8)
544 || (c
>= 0x0CCA && c
<= 0x0CCD)
545 || (c
>= 0x0CD5 && c
<= 0x0CD6)
546 || (c
>= 0x0D02 && c
<= 0x0D03)
547 || (c
>= 0x0D3E && c
<= 0x0D43)
548 || (c
>= 0x0D46 && c
<= 0x0D48)
549 || (c
>= 0x0D4A && c
<= 0x0D4D)
552 || (c
>= 0x0E34 && c
<= 0x0E3A)
553 || (c
>= 0x0E47 && c
<= 0x0E4E)
555 || (c
>= 0x0EB4 && c
<= 0x0EB9)
556 || (c
>= 0x0EBB && c
<= 0x0EBC)
557 || (c
>= 0x0EC8 && c
<= 0x0ECD)
558 || (c
>= 0x0F18 && c
<= 0x0F19)
564 || (c
>= 0x0F71 && c
<= 0x0F84)
565 || (c
>= 0x0F86 && c
<= 0x0F8B)
566 || (c
>= 0x0F90 && c
<= 0x0F95)
568 || (c
>= 0x0F99 && c
<= 0x0FAD)
569 || (c
>= 0x0FB1 && c
<= 0x0FB7)
571 || (c
>= 0x20D0 && c
<= 0x20DC)
573 || (c
>= 0x302A && c
<= 0x302F)
585 || (c
>= 0x3031 && c
<= 0x3035)
586 || (c
>= 0x309D && c
<= 0x309E)
587 || (c
>= 0x30FC && c
<= 0x30FE)
589 || Character
.isLetter ((char) c
));