1 /* XPathTokenizer.java --
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
38 package gnu
.xml
.xpath
;
40 import gnu
.java
.lang
.CPStringBuilder
;
42 import java
.io
.BufferedReader
;
43 import java
.io
.IOException
;
44 import java
.io
.Reader
;
45 import java
.io
.StringReader
;
47 import java
.util
.TreeMap
;
50 import antlr.TokenStream;
51 import antlr.TokenStreamException;
52 import antlr.TokenStreamIOException;*/
55 * XPath 1.0 expression tokenizer.
57 * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
59 public class XPathTokenizer
60 implements XPathParser
.yyInput
61 //implements TokenStream
64 static class XPathToken
76 XPathToken (int type
, String val
)
83 public String
getText ()
88 public String
toString ()
95 static final Map
<String
,Integer
> keywords
= new TreeMap
<String
,Integer
> ();
98 keywords
.put ("ancestor", new Integer (XPathParser
.ANCESTOR
));
99 keywords
.put ("ancestor-or-self", new Integer (XPathParser
.ANCESTOR_OR_SELF
));
100 keywords
.put ("attribute", new Integer (XPathParser
.ATTRIBUTE
));
101 keywords
.put ("child", new Integer (XPathParser
.CHILD
));
102 keywords
.put ("descendant", new Integer (XPathParser
.DESCENDANT
));
103 keywords
.put ("descendant-or-self", new Integer (XPathParser
.DESCENDANT_OR_SELF
));
104 keywords
.put ("following", new Integer (XPathParser
.FOLLOWING
));
105 keywords
.put ("following-sibling", new Integer (XPathParser
.FOLLOWING_SIBLING
));
106 keywords
.put ("namespace", new Integer (XPathParser
.NAMESPACE
));
107 keywords
.put ("parent", new Integer (XPathParser
.PARENT
));
108 keywords
.put ("preceding", new Integer (XPathParser
.PRECEDING
));
109 keywords
.put ("preceding-sibling", new Integer (XPathParser
.PRECEDING_SIBLING
));
110 keywords
.put ("self", new Integer (XPathParser
.SELF
));
111 keywords
.put ("div", new Integer (XPathParser
.DIV
));
112 keywords
.put ("mod", new Integer (XPathParser
.MOD
));
113 keywords
.put ("or", new Integer (XPathParser
.OR
));
114 keywords
.put ("and", new Integer (XPathParser
.AND
));
115 keywords
.put ("comment", new Integer (XPathParser
.COMMENT
));
116 keywords
.put ("processing-instruction", new Integer (XPathParser
.PROCESSING_INSTRUCTION
));
117 keywords
.put ("text", new Integer (XPathParser
.TEXT
));
118 keywords
.put ("node", new Integer (XPathParser
.NODE
));
123 XPathToken lastToken
;
125 public XPathTokenizer (String expr
)
127 this (new StringReader (expr
));
130 XPathTokenizer (Reader in
)
132 this.in
= in
.markSupported () ? in
: new BufferedReader (in
);
135 /* Begin ANTLR specific *
137 public Token nextToken ()
138 throws TokenStreamException
144 throw new TokenStreamException ("eof");
149 catch (IOException e)
151 throw new TokenStreamIOException (e);
155 * End ANTLR specific */
157 public boolean advance ()
169 case 0x0a: // skip whitespace
173 token
= consume_literal (c
);
176 token
= new XPathToken (XPathParser
.LP
);
179 token
= new XPathToken (XPathParser
.RP
);
182 token
= new XPathToken (XPathParser
.LB
);
185 token
= new XPathToken (XPathParser
.RB
);
188 token
= new XPathToken (XPathParser
.COMMA
);
191 token
= new XPathToken (XPathParser
.PIPE
);
198 token
= new XPathToken (XPathParser
.DOUBLE_SLASH
);
203 token
= new XPathToken (XPathParser
.SLASH
);
207 token
= new XPathToken (XPathParser
.EQ
);
214 token
= new XPathToken (XPathParser
.NE
);
219 token
= new XPathToken (XPathParser
.yyErrorCode
);
227 token
= new XPathToken (XPathParser
.GTE
);
232 token
= new XPathToken (XPathParser
.GT
);
240 token
= new XPathToken (XPathParser
.LTE
);
245 token
= new XPathToken (XPathParser
.LT
);
249 token
= new XPathToken (XPathParser
.PLUS
);
252 token
= new XPathToken (XPathParser
.MINUS
);
255 token
= new XPathToken (XPathParser
.AT
);
258 token
= new XPathToken (XPathParser
.STAR
);
261 token
= new XPathToken (XPathParser
.DOLLAR
);
268 token
= new XPathToken (XPathParser
.DOUBLE_COLON
);
273 token
= new XPathToken (XPathParser
.COLON
);
281 token
= new XPathToken (XPathParser
.DOUBLE_DOT
);
286 token
= new XPathToken (XPathParser
.DOT
);
290 if (c
>= 0x30 && c
<= 0x39)
292 token
= consume_digits (c
);
294 else if (c
== 0x5f || Character
.isLetter ((char) c
))
296 token
= consume_name (c
);
300 token
= new XPathToken (XPathParser
.yyErrorCode
);
311 public Object
value ()
316 XPathToken
consume_literal (int delimiter
)
319 CPStringBuilder buf
= new CPStringBuilder ();
325 return new XPathToken (XPathParser
.yyErrorCode
);
327 else if (c
== delimiter
)
329 return new XPathToken (XPathParser
.LITERAL
, buf
.toString ());
333 buf
.append ((char) c
);
338 XPathToken
consume_digits (int c
)
341 CPStringBuilder buf
= new CPStringBuilder ();
342 buf
.append ((char) c
);
347 if (c
>= 0x30 && c
<= 0x39)
349 buf
.append ((char) c
);
354 return new XPathToken (XPathParser
.DIGITS
, buf
.toString ());
359 XPathToken
consume_name (int c
)
362 CPStringBuilder buf
= new CPStringBuilder ();
363 buf
.append ((char) c
);
370 buf
.append ((char) c
);
375 String name
= buf
.toString ();
376 Integer keyword
= (Integer
) keywords
.get (name
);
379 return new XPathToken (XPathParser
.NAME
, name
);
383 int val
= keyword
.intValue ();
386 case XPathParser
.NODE
:
387 case XPathParser
.COMMENT
:
388 case XPathParser
.TEXT
:
389 case XPathParser
.PROCESSING_INSTRUCTION
:
390 // Consume subsequent (
396 while (c
== 0x20 || c
== 0x09);
400 return new XPathToken (XPathParser
.NAME
, name
);
403 case XPathParser
.CHILD
:
404 case XPathParser
.PARENT
:
405 case XPathParser
.SELF
:
406 case XPathParser
.DESCENDANT
:
407 case XPathParser
.ANCESTOR
:
408 case XPathParser
.DESCENDANT_OR_SELF
:
409 case XPathParser
.ANCESTOR_OR_SELF
:
410 case XPathParser
.ATTRIBUTE
:
411 case XPathParser
.NAMESPACE
:
412 case XPathParser
.FOLLOWING
:
413 case XPathParser
.FOLLOWING_SIBLING
:
414 case XPathParser
.PRECEDING
:
415 case XPathParser
.PRECEDING_SIBLING
:
416 // Check that this is an axis specifier
422 while (c
== 0x20 || c
== 0x09);
429 return new XPathToken(val
);
433 return new XPathToken(XPathParser
.NAME
, name
);
434 case XPathParser
.DIV
:
435 case XPathParser
.MOD
:
437 if (lastToken
== null)
439 return new XPathToken(XPathParser
.NAME
, name
);
441 switch (lastToken
.type
)
445 case XPathParser
.COMMA
:
446 case XPathParser
.PIPE
:
451 case XPathParser
.GTE
:
452 case XPathParser
.LTE
:
453 case XPathParser
.PLUS
:
454 case XPathParser
.MINUS
:
455 case XPathParser
.STAR
:
457 case XPathParser
.DOLLAR
:
458 case XPathParser
.COLON
:
459 case XPathParser
.DOUBLE_COLON
:
460 case XPathParser
.DIV
:
461 case XPathParser
.MOD
:
463 case XPathParser
.AND
:
464 case XPathParser
.SLASH
:
465 return new XPathToken(XPathParser
.NAME
, name
);
469 return new XPathToken (val
);
475 boolean isNameChar (int c
)
481 || (c
>= 0x30 && c
<= 0x39)
483 || (c
>= 0x0300 && c
<= 0x0345)
484 || (c
>= 0x0360 && c
<= 0x0361)
485 || (c
>= 0x0483 && c
<= 0x0486)
486 || (c
>= 0x0591 && c
<= 0x05A1)
487 || (c
>= 0x05A3 && c
<= 0x05B9)
488 || (c
>= 0x05BB && c
<= 0x05BD)
490 || (c
>= 0x05C1 && c
<= 0x05C2)
492 || (c
>= 0x064B && c
<= 0x0652)
494 || (c
>= 0x06D6 && c
<= 0x06DC)
495 || (c
>= 0x06DD && c
<= 0x06DF)
496 || (c
>= 0x06E0 && c
<= 0x06E4)
497 || (c
>= 0x06E7 && c
<= 0x06E8)
498 || (c
>= 0x06EA && c
<= 0x06ED)
499 || (c
>= 0x0901 && c
<= 0x0903)
501 || (c
>= 0x093E && c
<= 0x094C)
503 || (c
>= 0x0951 && c
<= 0x0954)
504 || (c
>= 0x0962 && c
<= 0x0963)
505 || (c
>= 0x0981 && c
<= 0x0983)
509 || (c
>= 0x09C0 && c
<= 0x09C4)
510 || (c
>= 0x09C7 && c
<= 0x09C8)
511 || (c
>= 0x09CB && c
<= 0x09CD)
513 || (c
>= 0x09E2 && c
<= 0x09E3)
518 || (c
>= 0x0A40 && c
<= 0x0A42)
519 || (c
>= 0x0A47 && c
<= 0x0A48)
520 || (c
>= 0x0A4B && c
<= 0x0A4D)
521 || (c
>= 0x0A70 && c
<= 0x0A71)
522 || (c
>= 0x0A81 && c
<= 0x0A83)
524 || (c
>= 0x0ABE && c
<= 0x0AC5)
525 || (c
>= 0x0AC7 && c
<= 0x0AC9)
526 || (c
>= 0x0ACB && c
<= 0x0ACD)
527 || (c
>= 0x0B01 && c
<= 0x0B03)
529 || (c
>= 0x0B3E && c
<= 0x0B43)
530 || (c
>= 0x0B47 && c
<= 0x0B48)
531 || (c
>= 0x0B4B && c
<= 0x0B4D)
532 || (c
>= 0x0B56 && c
<= 0x0B57)
533 || (c
>= 0x0B82 && c
<= 0x0B83)
534 || (c
>= 0x0BBE && c
<= 0x0BC2)
535 || (c
>= 0x0BC6 && c
<= 0x0BC8)
536 || (c
>= 0x0BCA && c
<= 0x0BCD)
538 || (c
>= 0x0C01 && c
<= 0x0C03)
539 || (c
>= 0x0C3E && c
<= 0x0C44)
540 || (c
>= 0x0C46 && c
<= 0x0C48)
541 || (c
>= 0x0C4A && c
<= 0x0C4D)
542 || (c
>= 0x0C55 && c
<= 0x0C56)
543 || (c
>= 0x0C82 && c
<= 0x0C83)
544 || (c
>= 0x0CBE && c
<= 0x0CC4)
545 || (c
>= 0x0CC6 && c
<= 0x0CC8)
546 || (c
>= 0x0CCA && c
<= 0x0CCD)
547 || (c
>= 0x0CD5 && c
<= 0x0CD6)
548 || (c
>= 0x0D02 && c
<= 0x0D03)
549 || (c
>= 0x0D3E && c
<= 0x0D43)
550 || (c
>= 0x0D46 && c
<= 0x0D48)
551 || (c
>= 0x0D4A && c
<= 0x0D4D)
554 || (c
>= 0x0E34 && c
<= 0x0E3A)
555 || (c
>= 0x0E47 && c
<= 0x0E4E)
557 || (c
>= 0x0EB4 && c
<= 0x0EB9)
558 || (c
>= 0x0EBB && c
<= 0x0EBC)
559 || (c
>= 0x0EC8 && c
<= 0x0ECD)
560 || (c
>= 0x0F18 && c
<= 0x0F19)
566 || (c
>= 0x0F71 && c
<= 0x0F84)
567 || (c
>= 0x0F86 && c
<= 0x0F8B)
568 || (c
>= 0x0F90 && c
<= 0x0F95)
570 || (c
>= 0x0F99 && c
<= 0x0FAD)
571 || (c
>= 0x0FB1 && c
<= 0x0FB7)
573 || (c
>= 0x20D0 && c
<= 0x20DC)
575 || (c
>= 0x302A && c
<= 0x302F)
587 || (c
>= 0x3031 && c
<= 0x3035)
588 || (c
>= 0x309D && c
<= 0x309E)
589 || (c
>= 0x30FC && c
<= 0x30FE)
591 || Character
.isLetter ((char) c
));