5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 using System
.Collections
;
32 using System
.Collections
.Generic
;
34 using System
.Security
.Policy
;
36 using System
.Xml
.Query
;
37 using System
.Xml
.Schema
;
38 using System
.Xml
.XPath
;
39 using Mono
.Xml
.XQuery
;
40 using Mono
.Xml
.XPath2
;
42 namespace Mono
.Xml
.XQuery
.Parser
44 // FIXME: make internal in the future
45 public class XQueryTokenizer
46 : Mono
.Xml
.XQuery
.Parser
.yyParser
.yyInput
, IXmlLineInfo
50 bool nextIncrementLine
;
53 XmlNamespaceManager nsResolver
;
54 string defaultFunctionNamespace
= XQueryFunction
.Namespace
;
65 int lookAheadToken
= -1;
66 object lookAheadTokenValue
;
69 WhitespaceHandling ws
= WhitespaceHandling
.Arbitrary
;
70 ParseState state
= ParseState
.Default
;
73 char [] buffer
= new char [30];
76 public XQueryTokenizer (TextReader reader
)
80 stateStack
= new Stack ();
82 nsResolver
= new XmlNamespaceManager (new NameTable ());
83 nsResolver
.AddNamespace ("xs", XmlSchema
.Namespace
);
84 nsResolver
.AddNamespace ("xdt", XmlSchema
.XdtNamespace
);
85 // FIXME: Are they really predefined?
86 nsResolver
.AddNamespace ("xsi", XmlSchema
.InstanceNamespace
);
87 nsResolver
.AddNamespace ("fn", "http://www.w3.org/2003/11/xpath-functions");
88 nsResolver
.AddNamespace ("local", "http://www.w3.org/2003/11/xquery-local-functions");
91 internal IXmlNamespaceResolver NSResolver
{
92 get { return nsResolver; }
95 internal string DefaultFunctionNamespace
{
96 get { return defaultFunctionNamespace; }
97 set { defaultFunctionNamespace = value; }
100 public void AddNamespace (string prefix
, string ns
)
102 nsResolver
.AddNamespace (prefix
, ns
);
105 public bool advance ()
107 if (currentToken
< 0)
109 if (lookAheadToken
>= 0) {
110 tokenValue
= lookAheadTokenValue
;
111 currentToken
= lookAheadToken
;
115 currentToken
= ParseToken ();
116 return currentToken
>= 0;
124 public object value ()
129 public bool HasLineInfo ()
134 public int LineNumber
{
138 public int LinePosition
{
139 get { return column; }
142 internal WhitespaceHandling Space
{
147 internal ParseState State
{
148 get { return state; }
150 // Console.Error.WriteLine ("**** eno **** state transition from {0} to {1}, stack count = {2}", state, value, stateStack.Count);
151 //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
156 internal void PushState (ParseState newState
)
158 stateStack
.Push (newState
);
159 // Console.Error.WriteLine ("**** eno **** state pushed {0}, added stack count = {1}", newState, stateStack.Count);
160 //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
163 internal void PopState ()
165 if (stateStack
.Count
== 0)
166 throw Error ("Internal state transition error. State stack is empty.");
167 state
= (ParseState
) stateStack
.Pop ();
168 // Console.Error.WriteLine ("**** eno **** state pop, now as {0}, stack count = {1}", state, stateStack.Count);
169 //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
172 private XmlQueryCompileException
Error (string message
)
174 return new XmlQueryCompileException (message
, this, null, null);
177 private int ParseToken ()
182 case ParseState
.StartTag
:
190 case WhitespaceHandling.Arbitrary:
193 case WhitespaceHandling.Explicit:
194 if (!XmlChar.IsWhitespace (PeekChar ()))
195 throw Error ("Whitespace is required.");
196 goto case WhitespaceHandling.Arbitrary;
204 // FIXME: consider DOUBLE_LITERAL
205 if (Char
.IsNumber ((char) c
)) {
206 tokenValue
= ReadDecimal (false);
207 return Token
.DECIMAL_LITERAL
;
211 case ParseState
.OccurenceIndicator
:
212 return ParseOccurenceIndicator ();
213 case ParseState
.XmlPIContent
:
214 return ParseXmlPIContent ();
215 case ParseState
.XmlComment
:
216 return ParseXmlCommentContent ();
217 case ParseState
.ElementContent
:
218 return ParseElementContent ();
219 case ParseState
.StartTag
:
220 return ParseStartTag ();
221 case ParseState
.QuotAttributeContent
:
222 return ParseAttributeContent ('"');
223 case ParseState
.AposAttributeContent
:
224 return ParseAttributeContent ('\'');
226 return ParseDefault ();
230 private int ParseXQueryComment ()
235 throw Error ("Unexpected end of query text inside XML processing instruction content");
237 if (PeekChar () == ')') {
239 tokenValue
= CreateValueString ();
240 return Token
.XML_PI_TO_END
;
246 AddValueChar ((char) c
);
250 private int ParseXmlPIContent ()
255 throw Error ("Unexpected end of query text inside XML processing instruction content");
257 if (PeekChar () == '>') {
259 tokenValue
= CreateValueString ();
260 return Token
.XML_PI_TO_END
;
266 AddValueChar ((char) c
);
270 private int ParseXmlCommentContent ()
272 // FIXME: handle ---> correctly
276 throw Error ("Unexpected end of query text inside XML comment content");
278 if (PeekChar () == '-') {
280 if (PeekChar () == '>') {
281 tokenValue
= CreateValueString ();
282 return Token
.XML_COMMENT_TO_END
;
292 AddValueChar ((char) c
);
296 private int ParseXmlCDataContent ()
298 // FIXME: handle ]]]> correctly
302 throw Error ("Unexpected end of query text inside XML CDATA section content");
305 if (PeekChar () == ']') {
307 if (PeekChar () == '>') {
308 tokenValue
= CreateValueString ();
309 return Token
.XML_CDATA_TO_END
;
319 AddValueChar ((char) c
);
323 private int ParseElementContent ()
328 throw Error ("Unexpected end of query text inside XML processing instruction content");
332 return ParseDefault ();
338 throw Error ("Unexpected end of query text inside XML processing instruction content");
342 ReadPredefinedEntity ();
345 tokenValue
+= CreateValueString ();
346 return Token
.ELEM_CONTENT_LITERAL
;
348 AddValueChar ((char) c
);
355 private void ReadPredefinedEntity ()
357 string token
= ReadOneToken ();
376 throw Error (String
.Format ("Unexpected general entity name: {0} .", token
));
380 // FIXME: not used as yet
381 private int ParseExtContent ()
383 // FIXME: handle :::) correctly
387 throw Error ("Unexpected end of query text inside external content");
390 if (PeekChar () == ':') {
392 if (PeekChar () == ')') {
393 tokenValue
= CreateValueString ();
394 return Token
.EXT_CONTENT
;
404 AddValueChar ((char) c
);
408 private int ParseOccurenceIndicator ()
410 state
= ParseState
.Operator
;
411 switch (PeekChar ()) {
414 return Token
.QUESTION
;
417 return Token
.ASTERISK
;
422 return ParseOperator ();
426 private int ParseStartTag ()
438 return Token
.GREATER
;
442 return Token
.EMPTY_TAG_CLOSE
;
444 // FIXME: there seems a bug in the spec that StartTag
445 // state must accept QName without heading space for
447 // if (!XmlChar.IsWhitespace (PeekChar ()))
448 // throw Error ("Whitespace is required.");
450 return ParseDefault (); // only QName is allowed here.
453 private int ParseAttributeContent (char closeChar
)
455 int t
= Token
.ATT_VALUE_LITERAL
;
459 throw Error ("Unexpected end of attribute value content.");
460 if (c
== closeChar
) {
463 if (c
== closeChar
) {
465 AddValueChar (closeChar
);
468 t
= closeChar
== '"' ? Token
.QUOT
: Token
.APOS
;
478 t
= Token
.OPEN_CURLY
;
481 AddValueChar ((char) ReadChar ());
483 if (t
!= Token
.ATT_VALUE_LITERAL
) {
484 if (bufferIndex
> 0) {
486 tokenValue
= CreateValueString ();
487 return Token
.ATT_VALUE_LITERAL
;
495 private int ParseOperator ()
498 return ParseDefault ();
501 private int ParseDefault ()
506 if (PeekChar () == '.') {
510 else if (Char
.IsNumber ((char) PeekChar ())) {
511 tokenValue
= ReadDecimal (true);
517 return Token
.SEMICOLON
;
519 if (PeekChar () == ':') {
521 if (PeekChar () == ':') {
523 return Token
.PRAGMA_OPEN
;
525 ParseXQueryComment ();
526 return ParseToken (); // start again
528 return Token
.OPEN_PAREN
;
530 return Token
.CLOSE_PAREN
;
532 switch (PeekChar ()) {
535 if (PeekChar () == ')') {
537 return Token
.PRAGMA_CLOSE
;
542 return Token
.CLOSE_PAREN_COLON
;
545 return Token
.COLON_EQUAL
;
549 return Token
.OPEN_BRACKET
;
551 return Token
.CLOSE_BRACKET
;
553 return Token
.OPEN_CURLY
;
555 return Token
.CLOSE_CURLY
;
559 tokenValue
= ReadQuoted ('\'');
560 return Token
.STRING_LITERAL
;
562 tokenValue
= ReadQuoted ('"');
563 return Token
.STRING_LITERAL
;
567 // only happens when state is ElementContent
568 // (otherwise it might be "/foo</bar")
569 if (state
== ParseState
.ElementContent
) {
570 switch ((char) PeekChar ()) {
573 return Token
.END_TAG_START
;
576 switch (PeekChar ()) {
579 if (ReadChar () != '-')
580 throw Error ("Invalid sequence of characters '<!-'.");
582 return Token
.XML_COMMENT_START
;
586 return Token
.XML_CDATA_START
;
588 throw Error ("Invalid sequence of characters '<!'.");
591 return Token
.XML_PI_START
;
597 switch (PeekChar ()) {
600 return Token
.LESSER2
;
603 return Token
.LESSER_EQUAL
;
607 switch (PeekChar ()) {
610 return Token
.GREATER2
;
613 return Token
.GREATER_EQUAL
;
615 return Token
.GREATER
;
619 if (PeekChar () == ':') {
622 tokenValue
= new XmlQualifiedName (ReadOneToken (), "*");
623 return Token
.WILD_PREFIX
;
625 return Token
.ASTERISK
;
631 // only happens when state is StartTag
632 // (otherwise it might be "/>$extvar")
633 if (state
== ParseState
.StartTag
&& PeekChar () == '>') {
635 return Token
.EMPTY_TAG_CLOSE
;
637 if (PeekChar () == '/') {
643 return Token
.QUESTION
;
650 string name
= ReadOneToken ();
653 bool validKeyword
= false;
656 case ParseState
.XmlSpaceDecl
:
659 return Token
.PRESERVE
;
664 case ParseState
.CloseKindTest
:
665 if (name
== "nillable")
666 return Token
.NILLABLE
;
668 case ParseState
.ExtKey
:
673 return Token
.EXTENSION
;
676 case ParseState
.KindTest
:
679 return Token
.CONTEXT
;
681 return Token
.ELEMENT
;
688 case ParseState
.ItemType
:
691 return Token
.ATTRIBUTE
;
693 return Token
.COMMENT
;
694 case "document-node":
695 return Token
.DOCUMENT_NODE
;
697 return Token
.ELEMENT
;
704 case "processing-instruction":
705 return Token
.PROCESSING_INSTRUCTION
;
710 case ParseState
.NamespaceKeyword
:
713 return Token
.DECLARE
;
715 return Token
.DEFAULT
;
717 return Token
.ELEMENT
;
719 return Token
.FUNCTION
;
721 return Token
.NAMESPACE
;
724 case ParseState
.OccurenceIndicator
:
725 case ParseState
.Operator
:
794 case ParseState
.Default
:
797 case "ancestor-or-self":
808 case "descendant-or-self":
810 case "document-node":
814 case "following-sibling":
828 case "preceding-sibling":
829 case "processing-instruction":
854 return Token
.VERSION
;
858 return Token
.EXTENSION
;
862 return Token
.NAMESPACE
;
864 return Token
.DECLARE
;
866 return Token
.XMLSPACE
;
868 return Token
.PRESERVE
;
872 return Token
.DEFAULT
;
874 return Token
.CONSTRUCTION
;
876 return Token
.ORDERING
;
878 return Token
.ORDERED
;
880 return Token
.UNORDERED
;
881 case "document-node":
882 return Token
.DOCUMENT_NODE
;
884 return Token
.DOCUMENT
;
886 return Token
.ELEMENT
;
888 return Token
.ATTRIBUTE
;
889 case "processing-instruction":
890 return Token
.PROCESSING_INSTRUCTION
;
892 return Token
.COMMENT
;
898 return Token
.FUNCTION
;
900 return Token
.COLLATION
;
902 return Token
.BASEURI
;
910 return Token
.VARIABLE
;
914 return Token
.EXTERNAL
;
916 return Token
.VALIDATION
;
940 return Token
.ASCENDING
;
942 return Token
.DESCENDING
;
946 return Token
.GREATEST
;
954 return Token
.SATISFIES
;
976 return Token
.INSTANCE
;
986 return Token
.TYPESWITCH
;
992 return Token
.CASTABLE
;
1004 return Token
.INTERSECT
;
1006 return Token
.EXCEPT
;
1008 return Token
.VALIDATE
;
1010 return Token
.CONTEXT
;
1012 return Token
.NILLABLE
;
1016 return Token
.GLOBAL
;
1022 return Token
.DESCENDANT
;
1025 case "descendant-or-self":
1026 return Token
.DESCENDANT_OR_SELF
;
1027 case "following-sibling":
1028 return Token
.FOLLOWING_SIBLING
;
1030 return Token
.FOLLOWING
;
1032 return Token
.PARENT
;
1034 return Token
.ANCESTOR
;
1036 return Token
.PRECEDING
;
1037 case "preceding-sibling":
1038 return Token
.PRECEDING_SIBLING
;
1039 case "ancestor-or-self":
1040 return Token
.ANCESTOR_OR_SELF
;
1045 case ParseState
.NamespaceDecl
:
1046 case ParseState
.NamespaceKeyword
:
1047 case ParseState
.XmlSpaceDecl
:
1048 case ParseState
.KindTestForPI
:
1049 case ParseState
.XmlPI
:
1050 return Token
.NCNAME
;
1053 if (PeekChar () == ':') {
1056 switch (PeekChar ()) {
1061 case '=': // ex. let foo:= ...
1063 tokenValue
= new XmlQualifiedName (name
, nsResolver
.DefaultNamespace
);
1064 lookAheadToken
= Token
.COLON_EQUAL
;
1067 name
= ReadOneToken ();
1071 string ns
= nsResolver
.LookupNamespace (prefixName
);
1073 throw Error (String
.Format ("Prefix '{0}' is not mapped to any namespace URI.", prefixName
));
1074 tokenValue
= new XmlQualifiedName (name
, ns
);
1076 return name
== "*" ? Token
.WILD_LOCALNAME
: Token
.QNAME
;
1078 tokenValue
= new XmlQualifiedName (name
);
1082 private int PeekChar ()
1085 peekChar
= source
.Read ();
1089 private int ReadChar ()
1092 if (peekChar
!= -1) {
1097 ret
= source
.Read ();
1099 if (nextIncrementLine
) {
1102 nextIncrementLine
= false;
1109 nextIncrementLine
= true;
1118 private void SkipWhitespaces ()
1121 switch (PeekChar ()) {
1134 private void AddValueChar (char c
)
1136 if (bufferIndex
== buffer
.Length
) {
1137 char [] newBuf
= new char [bufferIndex
* 2];
1138 Array
.Copy (buffer
, newBuf
, bufferIndex
);
1141 buffer
[bufferIndex
++] = c
;
1144 private string CreateValueString ()
1146 return new string (buffer
, 0, bufferIndex
);
1149 private void Expect (string expected
)
1151 for (int i
= 0; i
< expected
.Length
; i
++)
1152 if (ReadChar () != expected
[i
])
1153 throw Error (String
.Format ("Expected token '{0}' did not appear.", expected
));
1156 // TODO: parse three quoted
1157 private string ReadQuoted (char quoteChar
)
1162 int c
= ReadChar ();
1166 if (quoteChar
== '"')
1170 if (quoteChar
== '\'')
1174 AddValueChar ((char) c
);
1179 return CreateValueString ();
1182 private decimal ReadDecimal (bool floatingPoint
)
1187 int c
= PeekChar ();
1191 // FIXME: more complex
1192 else if (Char
.IsNumber ((char) c
) || c
== '.') {
1194 AddValueChar ((char) c
);
1200 string s
= (floatingPoint
? "." : "") + CreateValueString ();
1201 return decimal.Parse (s
);
1204 private string ReadOneToken ()
1209 int c
= PeekChar ();
1219 if (!IsTokenContinuable (c
)) {
1221 if (prefixName
!= null)
1222 throw new XmlQueryCompileException ("Invalid colon was found.");
1223 prefixName
= CreateValueString ();
1230 AddValueChar ((char) c
);
1235 return CreateValueString ();
1238 private bool IsTokenContinuable (int c
)
1246 return XmlChar
.IsNCNameChar (c
);
1251 public enum WhitespaceHandling
{
1257 public enum ParseState
{
1279 QuotAttributeContent
,
1280 AposAttributeContent
,