2 // RELAX NG Compact Syntax parser
5 // Atsushi Enomoto <ginga@kit.hi-ho.ne.jp>
7 // (C)2003 Atsushi Enomoto
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 using System
.Collections
;
36 using Commons
.Xml
.Relaxng
;
38 namespace Commons
.Xml
.Relaxng
.Rnc
40 internal class RncTokenizer
: Commons
.Xml
.Relaxng
.Rnc
.yyParser
.yyInput
53 int savedLineNumber
= 1;
54 int savedLinePosition
;
55 bool nextIncrementLine
;
58 public RncTokenizer (TextReader source
, string baseUri
)
61 this.baseUri
= baseUri
;
64 public bool IsElement
{
65 get { return isElement; }
69 get { return savedLineNumber; }
73 get { return savedLinePosition; }
76 public string BaseUri
{
77 get { return baseUri; }
80 // jay interface implementation
87 public bool advance ()
90 currentToken
= ParseToken (false);
91 savedLineNumber
= line
;
92 savedLinePosition
= column
;
93 return currentToken
!= Token
.EOF
;
96 public object value ()
103 private int ReadEscapedHexNumber (int current
)
105 int i
= source
.Read ();
117 current
= current
* 16 + (i
- '0');
118 return ReadEscapedHexNumber (current
);
125 current
= current
* 16 + (i
- 'A') + 10;
126 return ReadEscapedHexNumber (current
);
133 current
= current
* 16 + (i
- 'a' + 10);
134 return ReadEscapedHexNumber (current
);
140 private int ReadFromStream ()
142 int ret
= source
.Read ();
145 ret
= source
.Read ();
152 tmp
= source
.Read ();
153 } while (tmp
== 'x');
155 peekString
= new string ('x', xcount
);
157 peekString
+= (char) tmp
;
160 ret
= ReadEscapedHexNumber (0);
166 peekString
= new string ((char) ret
, 1);
170 private int PeekChar ()
173 if (peekString
!= null) {
174 peekChar
= peekString
[0];
175 peekString
= peekString
.Length
== 1 ?
176 null : peekString
.Substring (1);
179 peekChar
= ReadFromStream ();
185 private int ReadChar ()
192 else if (peekString
!= null) {
193 ret
= peekString
[0];
194 peekString
= peekString
.Length
== 1 ?
195 null : peekString
.Substring (1);
198 ret
= ReadFromStream ();
200 if (nextIncrementLine
) {
203 nextIncrementLine
= false;
209 nextIncrementLine
= true;
219 private void SkipWhitespaces ()
222 switch (PeekChar ()) {
235 char [] nameBuffer
= new char [30];
237 private string ReadQuoted (char quoteChar
)
253 throw new RelaxngException ("Unterminated quoted literal.");
254 if (XmlChar
.IsInvalid (c
))
255 throw new RelaxngException ("Invalid character in literal.");
256 AppendNameChar (c
, ref index
);
261 return new string (nameBuffer
, 0, index
);
264 private void AppendNameChar (int c
, ref int index
)
266 if (nameBuffer
.Length
== index
) {
267 char [] arr
= new char [index
* 2];
268 Array
.Copy (nameBuffer
, arr
, index
);
272 AppendNameChar ((c
- 0x10000) / 0x400 + 0xD800, ref index
);
273 AppendNameChar ((c
- 0x10000) % 0x400 + 0xDC00, ref index
);
276 nameBuffer
[index
++] = (char) c
;
279 private string ReadTripleQuoted (char quoteChar
)
293 if ((c
= PeekChar ()) != quoteChar
) {
294 AppendNameChar (quoteChar
, ref index
);
299 if ((c
= PeekChar ()) == quoteChar
) {
304 AppendNameChar (quoteChar
, ref index
);
305 AppendNameChar (quoteChar
, ref index
);
309 throw new RelaxngException ("Unterminated triple-quoted literal.");
310 if (XmlChar
.IsInvalid (c
))
311 throw new RelaxngException ("Invalid character in literal.");
312 AppendNameChar (c
, ref index
);
317 return new string (nameBuffer
, 0, index
);
320 private string ReadOneName ()
325 if (!XmlChar
.IsFirstNameChar (c
) || !XmlChar
.IsNCNameChar (c
))
326 throw new RelaxngException (String
.Format ("Invalid NCName start character: {0}", c
));
339 if (!XmlChar
.IsNCNameChar (c
)) {
345 if (nameBuffer
.Length
== index
) {
346 char [] arr
= new char [index
* 2];
347 Array
.Copy (nameBuffer
, arr
, index
);
350 nameBuffer
[index
++] = (char) c
;
355 return new string (nameBuffer
, 0, index
);
358 private string ReadLine ()
360 string s
= source
.ReadLine ();
366 private int ParseToken (bool backslashed
)
381 return Token
.OpenCurly
;
383 return Token
.CloseCurly
;
385 return Token
.OpenParen
;
387 return Token
.CloseParen
;
389 return Token
.OpenBracket
;
391 return Token
.CloseBracket
;
393 if (PeekChar () != '=')
396 return Token
.AndEquals
;
398 if (PeekChar () != '=')
401 return Token
.OrEquals
;
403 return Token
.Question
;
405 // See also ':' for NsName
406 return Token
.Asterisk
;
410 return ParseToken (true);
416 if (PeekChar () == '>') {
418 return Token
.TwoGreaters
;
423 // tokenValue = ReadLine ();
424 // return Token.Documentation;
426 return ParseToken (false);
429 if (PeekChar () != c
)
430 name
= ReadQuoted ((char) c
);
433 if (PeekChar () == c
) {
435 name
= ReadTripleQuoted ((char) c
);
439 int invidx
= XmlChar
.IndexOfInvalid (name
, true) ;
441 throw new RelaxngException (String
.Format ("Invalid XML character in compact syntax literal segment at {0:X}", (int) name
[invidx
]));
443 return Token
.LiteralSegment
;
445 if (!XmlChar
.IsNCNameChar (c
))
446 throw new RelaxngException ("Invalid NCName character.");
448 name
= ReadOneName ();
449 if (PeekChar () == ':') {
451 if (PeekChar () == '*') {
456 tokenValue
= name
+ ":" + ReadOneName ();
462 return Token
.QuotedIdentifier
;
466 return Token
.KeywordAttribute
;
469 return Token
.KeywordElement
;
471 return Token
.KeywordDatatypes
;
473 return Token
.KeywordDefault
;
475 return Token
.KeywordDiv
;
477 return Token
.KeywordEmpty
;
479 return Token
.KeywordExternal
;
481 return Token
.KeywordGrammar
;
483 return Token
.KeywordInclude
;
485 return Token
.KeywordInherit
;
487 return Token
.KeywordList
;
489 return Token
.KeywordMixed
;
491 return Token
.KeywordNamespace
;
493 return Token
.KeywordNotAllowed
;
495 return Token
.KeywordParent
;
497 return Token
.KeywordStart
;
499 return Token
.KeywordString
;
501 return Token
.KeywordText
;
503 return Token
.KeywordToken
;