5 // Atsushi Enomoto <atsushi@ximian.com>
7 // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 using System
.Collections
;
32 using System
.Collections
.Generic
;
34 using System
.Security
.Policy
;
36 using System
.Xml
.Query
;
37 using System
.Xml
.Schema
;
38 using System
.Xml
.XPath
;
39 using Mono
.Xml
.XQuery
;
40 using Mono
.Xml
.XPath2
;
44 namespace Mono
.Xml
.XPath2
.Parser
46 namespace Mono
.Xml
.XQuery
.Parser
49 // FIXME: make internal in the future
50 public class XQueryTokenizer
: yyParser
.yyInput
, IXmlLineInfo
54 bool nextIncrementLine
;
57 XmlNamespaceManager nsResolver
;
58 string defaultFunctionNamespace
= XQueryFunction
.Namespace
;
69 int lookAheadToken
= -1;
70 object lookAheadTokenValue
;
73 WhitespaceHandling ws
= WhitespaceHandling
.Arbitrary
;
74 ParseState state
= ParseState
.Default
;
77 char [] buffer
= new char [30];
80 public XQueryTokenizer (TextReader reader
)
84 stateStack
= new Stack ();
86 nsResolver
= new XmlNamespaceManager (new NameTable ());
87 nsResolver
.AddNamespace ("xs", XmlSchema
.Namespace
);
88 nsResolver
.AddNamespace ("xdt", InternalPool
.XdtNamespace
);
89 // FIXME: Are they really predefined?
90 nsResolver
.AddNamespace ("xsi", XmlSchema
.InstanceNamespace
);
91 nsResolver
.AddNamespace ("fn", "http://www.w3.org/2003/11/xpath-functions");
92 nsResolver
.AddNamespace ("local", "http://www.w3.org/2003/11/xquery-local-functions");
95 internal IXmlNamespaceResolver NSResolver
{
96 get { return nsResolver; }
99 internal string DefaultFunctionNamespace
{
100 get { return defaultFunctionNamespace; }
101 set { defaultFunctionNamespace = value; }
104 public void AddNamespace (string prefix
, string ns
)
106 nsResolver
.AddNamespace (prefix
, ns
);
109 public bool advance ()
111 if (currentToken
< 0)
113 if (lookAheadToken
>= 0) {
114 tokenValue
= lookAheadTokenValue
;
115 currentToken
= lookAheadToken
;
119 currentToken
= ParseToken ();
120 return currentToken
>= 0;
128 public object value ()
133 public bool HasLineInfo ()
138 public int LineNumber
{
142 public int LinePosition
{
143 get { return column; }
146 internal WhitespaceHandling Space
{
151 internal ParseState State
{
152 get { return state; }
154 // Console.Error.WriteLine ("**** eno **** state transition from {0} to {1}, stack count = {2}", state, value, stateStack.Count);
155 //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
160 internal void PushState (ParseState newState
)
162 stateStack
.Push (newState
);
163 // Console.Error.WriteLine ("**** eno **** state pushed {0}, added stack count = {1}", newState, stateStack.Count);
164 //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
167 internal void PopState ()
169 if (stateStack
.Count
== 0)
170 throw Error ("Internal state transition error. State stack is empty.");
171 state
= (ParseState
) stateStack
.Pop ();
172 // Console.Error.WriteLine ("**** eno **** state pop, now as {0}, stack count = {1}", state, stateStack.Count);
173 //foreach (ParseState ps in stateStack.ToArray ()) Console.Error.WriteLine ("***** eno ***** " + ps);
176 private XmlQueryCompileException
Error (string message
)
178 return new XmlQueryCompileException (message
, this, null, null);
181 private int ParseToken ()
186 case ParseState
.StartTag
:
194 case WhitespaceHandling.Arbitrary:
197 case WhitespaceHandling.Explicit:
198 if (!XmlChar.IsWhitespace (PeekChar ()))
199 throw Error ("Whitespace is required.");
200 goto case WhitespaceHandling.Arbitrary;
208 // FIXME: consider DOUBLE_LITERAL
209 if (Char
.IsNumber ((char) c
)) {
210 tokenValue
= ReadDecimal (false);
211 return Token
.DECIMAL_LITERAL
;
215 case ParseState
.OccurenceIndicator
:
216 return ParseOccurenceIndicator ();
217 case ParseState
.XmlPIContent
:
218 return ParseXmlPIContent ();
219 case ParseState
.XmlComment
:
220 return ParseXmlCommentContent ();
221 case ParseState
.ElementContent
:
222 return ParseElementContent ();
223 case ParseState
.StartTag
:
224 return ParseStartTag ();
225 case ParseState
.QuotAttributeContent
:
226 return ParseAttributeContent ('"');
227 case ParseState
.AposAttributeContent
:
228 return ParseAttributeContent ('\'');
230 return ParseDefault ();
234 private int ParseXQueryComment ()
239 throw Error ("Unexpected end of query text inside XML processing instruction content");
241 if (PeekChar () == ')') {
243 tokenValue
= CreateValueString ();
244 return Token
.XML_PI_TO_END
;
250 AddValueChar ((char) c
);
254 private int ParseXmlPIContent ()
259 throw Error ("Unexpected end of query text inside XML processing instruction content");
261 if (PeekChar () == '>') {
263 tokenValue
= CreateValueString ();
264 return Token
.XML_PI_TO_END
;
270 AddValueChar ((char) c
);
274 private int ParseXmlCommentContent ()
276 // FIXME: handle ---> correctly
280 throw Error ("Unexpected end of query text inside XML comment content");
282 if (PeekChar () == '-') {
284 if (PeekChar () == '>') {
285 tokenValue
= CreateValueString ();
286 return Token
.XML_COMMENT_TO_END
;
296 AddValueChar ((char) c
);
300 private int ParseXmlCDataContent ()
302 // FIXME: handle ]]]> correctly
306 throw Error ("Unexpected end of query text inside XML CDATA section content");
309 if (PeekChar () == ']') {
311 if (PeekChar () == '>') {
312 tokenValue
= CreateValueString ();
313 return Token
.XML_CDATA_TO_END
;
323 AddValueChar ((char) c
);
327 private int ParseElementContent ()
332 throw Error ("Unexpected end of query text inside XML processing instruction content");
336 return ParseDefault ();
342 throw Error ("Unexpected end of query text inside XML processing instruction content");
346 ReadPredefinedEntity ();
349 tokenValue
+= CreateValueString ();
350 return Token
.ELEM_CONTENT_LITERAL
;
352 AddValueChar ((char) c
);
359 private void ReadPredefinedEntity ()
361 string token
= ReadOneToken ();
380 throw Error (String
.Format ("Unexpected general entity name: {0} .", token
));
384 // FIXME: not used as yet
385 private int ParseExtContent ()
387 // FIXME: handle :::) correctly
391 throw Error ("Unexpected end of query text inside external content");
394 if (PeekChar () == ':') {
396 if (PeekChar () == ')') {
397 tokenValue
= CreateValueString ();
398 return Token
.EXT_CONTENT
;
408 AddValueChar ((char) c
);
412 private int ParseOccurenceIndicator ()
414 state
= ParseState
.Operator
;
415 switch (PeekChar ()) {
418 return Token
.QUESTION
;
421 return Token
.ASTERISK
;
426 return ParseOperator ();
430 private int ParseStartTag ()
442 return Token
.GREATER
;
446 return Token
.EMPTY_TAG_CLOSE
;
448 // FIXME: there seems a bug in the spec that StartTag
449 // state must accept QName without heading space for
451 // if (!XmlChar.IsWhitespace (PeekChar ()))
452 // throw Error ("Whitespace is required.");
454 return ParseDefault (); // only QName is allowed here.
457 private int ParseAttributeContent (char closeChar
)
459 int t
= Token
.ATT_VALUE_LITERAL
;
463 throw Error ("Unexpected end of attribute value content.");
464 if (c
== closeChar
) {
467 if (c
== closeChar
) {
469 AddValueChar (closeChar
);
472 t
= closeChar
== '"' ? Token
.QUOT
: Token
.APOS
;
482 t
= Token
.OPEN_CURLY
;
485 AddValueChar ((char) ReadChar ());
487 if (t
!= Token
.ATT_VALUE_LITERAL
) {
488 if (bufferIndex
> 0) {
490 tokenValue
= CreateValueString ();
491 return Token
.ATT_VALUE_LITERAL
;
499 private int ParseOperator ()
502 return ParseDefault ();
505 private int ParseDefault ()
510 if (PeekChar () == '.') {
514 else if (Char
.IsNumber ((char) PeekChar ())) {
515 tokenValue
= ReadDecimal (true);
521 return Token
.SEMICOLON
;
523 if (PeekChar () == ':') {
525 if (PeekChar () == ':') {
527 return Token
.PRAGMA_OPEN
;
529 ParseXQueryComment ();
530 return ParseToken (); // start again
532 return Token
.OPEN_PAREN
;
534 return Token
.CLOSE_PAREN
;
536 switch (PeekChar ()) {
539 if (PeekChar () == ')') {
541 return Token
.PRAGMA_CLOSE
;
546 return Token
.CLOSE_PAREN_COLON
;
549 return Token
.COLON_EQUAL
;
553 return Token
.OPEN_BRACKET
;
555 return Token
.CLOSE_BRACKET
;
557 return Token
.OPEN_CURLY
;
559 return Token
.CLOSE_CURLY
;
563 tokenValue
= ReadQuoted ('\'');
564 return Token
.STRING_LITERAL
;
566 tokenValue
= ReadQuoted ('"');
567 return Token
.STRING_LITERAL
;
571 // only happens when state is ElementContent
572 // (otherwise it might be "/foo</bar")
573 if (state
== ParseState
.ElementContent
) {
574 switch ((char) PeekChar ()) {
577 return Token
.END_TAG_START
;
580 switch (PeekChar ()) {
583 if (ReadChar () != '-')
584 throw Error ("Invalid sequence of characters '<!-'.");
586 return Token
.XML_COMMENT_START
;
590 return Token
.XML_CDATA_START
;
592 throw Error ("Invalid sequence of characters '<!'.");
595 return Token
.XML_PI_START
;
601 switch (PeekChar ()) {
604 return Token
.LESSER2
;
607 return Token
.LESSER_EQUAL
;
611 switch (PeekChar ()) {
614 return Token
.GREATER2
;
617 return Token
.GREATER_EQUAL
;
619 return Token
.GREATER
;
623 if (PeekChar () == ':') {
626 tokenValue
= new XmlQualifiedName (ReadOneToken (), "*");
627 return Token
.WILD_PREFIX
;
629 return Token
.ASTERISK
;
635 // only happens when state is StartTag
636 // (otherwise it might be "/>$extvar")
637 if (state
== ParseState
.StartTag
&& PeekChar () == '>') {
639 return Token
.EMPTY_TAG_CLOSE
;
641 if (PeekChar () == '/') {
647 return Token
.QUESTION
;
654 string name
= ReadOneToken ();
657 bool validKeyword
= false;
660 case ParseState
.XmlSpaceDecl
:
663 return Token
.PRESERVE
;
668 case ParseState
.CloseKindTest
:
669 if (name
== "nillable")
670 return Token
.NILLABLE
;
672 case ParseState
.ExtKey
:
677 return Token
.EXTENSION
;
680 case ParseState
.KindTest
:
683 return Token
.CONTEXT
;
685 return Token
.ELEMENT
;
692 case ParseState
.ItemType
:
695 return Token
.ATTRIBUTE
;
697 return Token
.COMMENT
;
698 case "document-node":
699 return Token
.DOCUMENT_NODE
;
701 return Token
.ELEMENT
;
708 case "processing-instruction":
709 return Token
.PROCESSING_INSTRUCTION
;
714 case ParseState
.NamespaceKeyword
:
717 return Token
.DECLARE
;
719 return Token
.DEFAULT
;
721 return Token
.ELEMENT
;
723 return Token
.FUNCTION
;
725 return Token
.NAMESPACE
;
728 case ParseState
.OccurenceIndicator
:
729 case ParseState
.Operator
:
798 case ParseState
.Default
:
801 case "ancestor-or-self":
812 case "descendant-or-self":
814 case "document-node":
818 case "following-sibling":
832 case "preceding-sibling":
833 case "processing-instruction":
858 return Token
.VERSION
;
862 return Token
.EXTENSION
;
866 return Token
.NAMESPACE
;
868 return Token
.DECLARE
;
870 return Token
.XMLSPACE
;
872 return Token
.PRESERVE
;
876 return Token
.DEFAULT
;
878 return Token
.CONSTRUCTION
;
880 return Token
.ORDERING
;
882 return Token
.ORDERED
;
884 return Token
.UNORDERED
;
885 case "document-node":
886 return Token
.DOCUMENT_NODE
;
888 return Token
.DOCUMENT
;
890 return Token
.ELEMENT
;
892 return Token
.ATTRIBUTE
;
893 case "processing-instruction":
894 return Token
.PROCESSING_INSTRUCTION
;
896 return Token
.COMMENT
;
902 return Token
.FUNCTION
;
904 return Token
.COLLATION
;
906 return Token
.BASEURI
;
914 return Token
.VARIABLE
;
918 return Token
.EXTERNAL
;
920 return Token
.VALIDATION
;
944 return Token
.ASCENDING
;
946 return Token
.DESCENDING
;
950 return Token
.GREATEST
;
958 return Token
.SATISFIES
;
980 return Token
.INSTANCE
;
990 return Token
.TYPESWITCH
;
996 return Token
.CASTABLE
;
1008 return Token
.INTERSECT
;
1010 return Token
.EXCEPT
;
1012 return Token
.VALIDATE
;
1014 return Token
.CONTEXT
;
1016 return Token
.NILLABLE
;
1020 return Token
.GLOBAL
;
1026 return Token
.DESCENDANT
;
1029 case "descendant-or-self":
1030 return Token
.DESCENDANT_OR_SELF
;
1031 case "following-sibling":
1032 return Token
.FOLLOWING_SIBLING
;
1034 return Token
.FOLLOWING
;
1036 return Token
.PARENT
;
1038 return Token
.ANCESTOR
;
1040 return Token
.PRECEDING
;
1041 case "preceding-sibling":
1042 return Token
.PRECEDING_SIBLING
;
1043 case "ancestor-or-self":
1044 return Token
.ANCESTOR_OR_SELF
;
1049 case ParseState
.NamespaceDecl
:
1050 case ParseState
.NamespaceKeyword
:
1051 case ParseState
.XmlSpaceDecl
:
1052 case ParseState
.KindTestForPI
:
1053 case ParseState
.XmlPI
:
1054 return Token
.NCNAME
;
1057 if (PeekChar () == ':') {
1060 switch (PeekChar ()) {
1065 case '=': // ex. let foo:= ...
1067 tokenValue
= new XmlQualifiedName (name
, nsResolver
.DefaultNamespace
);
1068 lookAheadToken
= Token
.COLON_EQUAL
;
1071 name
= ReadOneToken ();
1075 string ns
= nsResolver
.LookupNamespace (prefixName
);
1077 throw Error (String
.Format ("Prefix '{0}' is not mapped to any namespace URI.", prefixName
));
1078 tokenValue
= new XmlQualifiedName (name
, ns
);
1080 return name
== "*" ? Token
.WILD_LOCALNAME
: Token
.QNAME
;
1082 tokenValue
= new XmlQualifiedName (name
);
1086 private int PeekChar ()
1089 peekChar
= source
.Read ();
1093 private int ReadChar ()
1096 if (peekChar
!= -1) {
1101 ret
= source
.Read ();
1103 if (nextIncrementLine
) {
1106 nextIncrementLine
= false;
1113 nextIncrementLine
= true;
1122 private void SkipWhitespaces ()
1125 switch (PeekChar ()) {
1138 private void AddValueChar (char c
)
1140 if (bufferIndex
== buffer
.Length
) {
1141 char [] newBuf
= new char [bufferIndex
* 2];
1142 Array
.Copy (buffer
, newBuf
, bufferIndex
);
1145 buffer
[bufferIndex
++] = c
;
1148 private string CreateValueString ()
1150 return new string (buffer
, 0, bufferIndex
);
1153 private void Expect (string expected
)
1155 for (int i
= 0; i
< expected
.Length
; i
++)
1156 if (ReadChar () != expected
[i
])
1157 throw Error (String
.Format ("Expected token '{0}' did not appear.", expected
));
1160 // TODO: parse three quoted
1161 private string ReadQuoted (char quoteChar
)
1166 int c
= ReadChar ();
1170 if (quoteChar
== '"')
1174 if (quoteChar
== '\'')
1178 AddValueChar ((char) c
);
1183 return CreateValueString ();
1186 private decimal ReadDecimal (bool floatingPoint
)
1191 int c
= PeekChar ();
1195 // FIXME: more complex
1196 else if (Char
.IsNumber ((char) c
) || c
== '.') {
1198 AddValueChar ((char) c
);
1204 string s
= (floatingPoint
? "." : "") + CreateValueString ();
1205 return decimal.Parse (s
);
1208 private string ReadOneToken ()
1213 int c
= PeekChar ();
1223 if (!IsTokenContinuable (c
)) {
1225 if (prefixName
!= null)
1226 throw new XmlQueryCompileException ("Invalid colon was found.");
1227 prefixName
= CreateValueString ();
1234 AddValueChar ((char) c
);
1239 return CreateValueString ();
1242 private bool IsTokenContinuable (int c
)
1250 return XmlChar
.IsNCNameChar (c
);
1255 public enum WhitespaceHandling
{
1261 public enum ParseState
{
1283 QuotAttributeContent
,
1284 AposAttributeContent
,