flush
[mcs.git] / class / Mono.Cecil / Mono.Xml / SmallXmlParser.cs
blob176670884b7582674eecae9e398098d35d91162d
1 //
2 // SmallXmlParser.cs
3 //
4 // Author:
5 // Atsushi Enomoto <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 // small xml parser that is mostly compatible with
33 using System;
34 using System.Collections;
35 using System.Globalization;
36 using System.IO;
37 using System.Text;
39 namespace Mono.Xml
41 internal sealed class DefaultHandler : SmallXmlParser.IContentHandler
43 public void OnStartParsing (SmallXmlParser parser)
47 public void OnEndParsing (SmallXmlParser parser)
51 public void OnStartElement (string name, SmallXmlParser.IAttrList attrs)
55 public void OnEndElement (string name)
59 public void OnChars (string s)
63 public void OnIgnorableWhitespace (string s)
67 public void OnProcessingInstruction (string name, string text)
72 internal class SmallXmlParser
74 public interface IContentHandler
76 void OnStartParsing (SmallXmlParser parser);
77 void OnEndParsing (SmallXmlParser parser);
78 void OnStartElement (string name, IAttrList attrs);
79 void OnEndElement (string name);
80 void OnProcessingInstruction (string name, string text);
81 void OnChars (string text);
82 void OnIgnorableWhitespace (string text);
85 public interface IAttrList
87 int Length { get; }
88 bool IsEmpty { get; }
89 string GetName (int i);
90 string GetValue (int i);
91 string GetValue (string name);
92 string [] Names { get; }
93 string [] Values { get; }
96 sealed class AttrListImpl : IAttrList
98 public int Length {
99 get { return attrNames.Count; }
101 public bool IsEmpty {
102 get { return attrNames.Count == 0; }
104 public string GetName (int i)
106 return (string) attrNames [i];
108 public string GetValue (int i)
110 return (string) attrValues [i];
112 public string GetValue (string name)
114 for (int i = 0; i < attrNames.Count; i++)
115 if ((string) attrNames [i] == name)
116 return (string) attrValues [i];
117 return null;
119 public string [] Names {
120 get { return (string []) attrNames.ToArray (typeof (string)); }
122 public string [] Values {
123 get { return (string []) attrValues.ToArray (typeof (string)); }
126 ArrayList attrNames = new ArrayList ();
127 ArrayList attrValues = new ArrayList ();
129 internal void Clear ()
131 attrNames.Clear ();
132 attrValues.Clear ();
135 internal void Add (string name, string value)
137 attrNames.Add (name);
138 attrValues.Add (value);
142 IContentHandler handler;
143 TextReader reader;
144 Stack elementNames = new Stack ();
145 Stack xmlSpaces = new Stack ();
146 string xmlSpace;
147 StringBuilder buffer = new StringBuilder (200);
148 char [] nameBuffer = new char [30];
149 bool isWhitespace;
151 AttrListImpl attributes = new AttrListImpl ();
152 int line = 1, column;
153 bool resetColumn;
155 public SmallXmlParser ()
159 private Exception Error (string msg)
161 return new SmallXmlParserException (msg, line, column);
164 private Exception UnexpectedEndError ()
166 string [] arr = new string [elementNames.Count];
167 // COMPACT FRAMEWORK NOTE: CopyTo is not visible through the Stack class
168 (elementNames as ICollection).CopyTo (arr, 0);
169 return Error (String.Format (
170 "Unexpected end of stream. Element stack content is {0}", String.Join (",", arr)));
174 private bool IsNameChar (char c, bool start)
176 switch (c) {
177 case ':':
178 case '_':
179 return true;
180 case '-':
181 case '.':
182 return !start;
184 if (c > 0x100) { // optional condition for optimization
185 switch (c) {
186 case '\u0559':
187 case '\u06E5':
188 case '\u06E6':
189 return true;
191 if ('\u02BB' <= c && c <= '\u02C1')
192 return true;
194 switch (Char.GetUnicodeCategory (c)) {
195 case UnicodeCategory.LowercaseLetter:
196 case UnicodeCategory.UppercaseLetter:
197 case UnicodeCategory.OtherLetter:
198 case UnicodeCategory.TitlecaseLetter:
199 case UnicodeCategory.LetterNumber:
200 return true;
201 case UnicodeCategory.SpacingCombiningMark:
202 case UnicodeCategory.EnclosingMark:
203 case UnicodeCategory.NonSpacingMark:
204 case UnicodeCategory.ModifierLetter:
205 case UnicodeCategory.DecimalDigitNumber:
206 return !start;
207 default:
208 return false;
212 private bool IsWhitespace (int c)
214 switch (c) {
215 case ' ':
216 case '\r':
217 case '\t':
218 case '\n':
219 return true;
220 default:
221 return false;
226 public void SkipWhitespaces ()
228 SkipWhitespaces (false);
231 private void HandleWhitespaces ()
233 while (IsWhitespace (Peek ()))
234 buffer.Append ((char) Read ());
235 if (Peek () != '<' && Peek () >= 0)
236 isWhitespace = false;
239 public void SkipWhitespaces (bool expected)
241 while (true) {
242 switch (Peek ()) {
243 case ' ':
244 case '\r':
245 case '\t':
246 case '\n':
247 Read ();
248 if (expected)
249 expected = false;
250 continue;
252 if (expected)
253 throw Error ("Whitespace is expected.");
254 return;
259 private int Peek ()
261 return reader.Peek ();
264 private int Read ()
266 int i = reader.Read ();
267 if (i == '\n')
268 resetColumn = true;
269 if (resetColumn) {
270 line++;
271 resetColumn = false;
272 column = 1;
274 else
275 column++;
276 return i;
279 public void Expect (int c)
281 int p = Read ();
282 if (p < 0)
283 throw UnexpectedEndError ();
284 else if (p != c)
285 throw Error (String.Format ("Expected '{0}' but got {1}", (char) c, (char) p));
288 private string ReadUntil (char until, bool handleReferences)
290 while (true) {
291 if (Peek () < 0)
292 throw UnexpectedEndError ();
293 char c = (char) Read ();
294 if (c == until)
295 break;
296 else if (handleReferences && c == '&')
297 ReadReference ();
298 else
299 buffer.Append (c);
301 string ret = buffer.ToString ();
302 buffer.Length = 0;
303 return ret;
306 public string ReadName ()
308 int idx = 0;
309 if (Peek () < 0 || !IsNameChar ((char) Peek (), true))
310 throw Error ("XML name start character is expected.");
311 for (int i = Peek (); i >= 0; i = Peek ()) {
312 char c = (char) i;
313 if (!IsNameChar (c, false))
314 break;
315 if (idx == nameBuffer.Length) {
316 char [] tmp = new char [idx * 2];
317 // COMPACT FRAMEWORK NOTE: Array.Copy(sourceArray, destinationArray, count) is not available.
318 Array.Copy (nameBuffer, 0, tmp, 0, idx);
319 nameBuffer = tmp;
321 nameBuffer [idx++] = c;
322 Read ();
324 if (idx == 0)
325 throw Error ("Valid XML name is expected.");
326 return new string (nameBuffer, 0, idx);
330 public void Parse (TextReader input, IContentHandler handler)
332 this.reader = input;
333 this.handler = handler;
335 handler.OnStartParsing (this);
337 while (Peek () >= 0)
338 ReadContent ();
339 HandleBufferedContent ();
340 if (elementNames.Count > 0)
341 throw Error (String.Format ("Insufficient close tag: {0}", elementNames.Peek ()));
343 handler.OnEndParsing (this);
345 Cleanup ();
348 private void Cleanup ()
350 line = 1;
351 column = 0;
352 handler = null;
353 reader = null;
354 #if CF_1_0
355 elementNames = new Stack ();
356 xmlSpaces = new Stack ();
357 #else
358 elementNames.Clear ();
359 xmlSpaces.Clear ();
360 #endif
361 attributes.Clear ();
362 buffer.Length = 0;
363 xmlSpace = null;
364 isWhitespace = false;
367 public void ReadContent ()
369 string name;
370 if (IsWhitespace (Peek ())) {
371 if (buffer.Length == 0)
372 isWhitespace = true;
373 HandleWhitespaces ();
375 if (Peek () == '<') {
376 Read ();
377 switch (Peek ()) {
378 case '!': // declarations
379 Read ();
380 if (Peek () == '[') {
381 Read ();
382 if (ReadName () != "CDATA")
383 throw Error ("Invalid declaration markup");
384 Expect ('[');
385 ReadCDATASection ();
386 return;
388 else if (Peek () == '-') {
389 ReadComment ();
390 return;
392 else if (ReadName () != "DOCTYPE")
393 throw Error ("Invalid declaration markup.");
394 else
395 throw Error ("This parser does not support document type.");
396 case '?': // PIs
397 HandleBufferedContent ();
398 Read ();
399 name = ReadName ();
400 SkipWhitespaces ();
401 string text = String.Empty;
402 if (Peek () != '?') {
403 while (true) {
404 text += ReadUntil ('?', false);
405 if (Peek () == '>')
406 break;
407 text += "?";
410 handler.OnProcessingInstruction (
411 name, text);
412 Expect ('>');
413 return;
414 case '/': // end tags
415 HandleBufferedContent ();
416 if (elementNames.Count == 0)
417 throw UnexpectedEndError ();
418 Read ();
419 name = ReadName ();
420 SkipWhitespaces ();
421 string expected = (string) elementNames.Pop ();
422 xmlSpaces.Pop ();
423 if (xmlSpaces.Count > 0)
424 xmlSpace = (string) xmlSpaces.Peek ();
425 else
426 xmlSpace = null;
427 if (name != expected)
428 throw Error (String.Format ("End tag mismatch: expected {0} but found {1}", expected, name));
429 handler.OnEndElement (name);
430 Expect ('>');
431 return;
432 default: // start tags (including empty tags)
433 HandleBufferedContent ();
434 name = ReadName ();
435 while (Peek () != '>' && Peek () != '/')
436 ReadAttribute (attributes);
437 handler.OnStartElement (name, attributes);
438 attributes.Clear ();
439 SkipWhitespaces ();
440 if (Peek () == '/') {
441 Read ();
442 handler.OnEndElement (name);
444 else {
445 elementNames.Push (name);
446 xmlSpaces.Push (xmlSpace);
448 Expect ('>');
449 return;
452 else
453 ReadCharacters ();
456 private void HandleBufferedContent ()
458 if (buffer.Length == 0)
459 return;
460 if (isWhitespace)
461 handler.OnIgnorableWhitespace (buffer.ToString ());
462 else
463 handler.OnChars (buffer.ToString ());
464 buffer.Length = 0;
465 isWhitespace = false;
468 private void ReadCharacters ()
470 isWhitespace = false;
471 while (true) {
472 int i = Peek ();
473 switch (i) {
474 case -1:
475 return;
476 case '<':
477 return;
478 case '&':
479 Read ();
480 ReadReference ();
481 continue;
482 default:
483 buffer.Append ((char) Read ());
484 continue;
489 private void ReadReference ()
491 if (Peek () == '#') {
492 // character reference
493 Read ();
494 ReadCharacterReference ();
495 } else {
496 string name = ReadName ();
497 Expect (';');
498 switch (name) {
499 case "amp":
500 buffer.Append ('&');
501 break;
502 case "quot":
503 buffer.Append ('"');
504 break;
505 case "apos":
506 buffer.Append ('\'');
507 break;
508 case "lt":
509 buffer.Append ('<');
510 break;
511 case "gt":
512 buffer.Append ('>');
513 break;
514 default:
515 throw Error ("General non-predefined entity reference is not supported in this parser.");
520 private int ReadCharacterReference ()
522 int n = 0;
523 if (Peek () == 'x') { // hex
524 Read ();
525 for (int i = Peek (); i >= 0; i = Peek ()) {
526 if ('0' <= i && i <= '9')
527 n = n << 4 + i - '0';
528 else if ('A' <= i && i <='F')
529 n = n << 4 + i - 'A' + 10;
530 else if ('a' <= i && i <='f')
531 n = n << 4 + i - 'a' + 10;
532 else
533 break;
534 Read ();
536 } else {
537 for (int i = Peek (); i >= 0; i = Peek ()) {
538 if ('0' <= i && i <= '9')
539 n = n << 4 + i - '0';
540 else
541 break;
542 Read ();
545 return n;
548 private void ReadAttribute (AttrListImpl a)
550 SkipWhitespaces (true);
551 if (Peek () == '/' || Peek () == '>')
552 // came here just to spend trailing whitespaces
553 return;
555 string name = ReadName ();
556 string value;
557 SkipWhitespaces ();
558 Expect ('=');
559 SkipWhitespaces ();
560 switch (Read ()) {
561 case '\'':
562 value = ReadUntil ('\'', true);
563 break;
564 case '"':
565 value = ReadUntil ('"', true);
566 break;
567 default:
568 throw Error ("Invalid attribute value markup.");
570 if (name == "xml:space")
571 xmlSpace = value;
572 a.Add (name, value);
575 private void ReadCDATASection ()
577 int nBracket = 0;
578 while (true) {
579 if (Peek () < 0)
580 throw UnexpectedEndError ();
581 char c = (char) Read ();
582 if (c == ']')
583 nBracket++;
584 else if (c == '>' && nBracket > 1) {
585 for (int i = nBracket; i > 2; i--)
586 buffer.Append (']');
587 break;
589 else {
590 for (int i = 0; i < nBracket; i++)
591 buffer.Append (']');
592 nBracket = 0;
593 buffer.Append (c);
598 private void ReadComment ()
600 Expect ('-');
601 Expect ('-');
602 while (true) {
603 if (Read () != '-')
604 continue;
605 if (Read () != '-')
606 continue;
607 if (Read () != '>')
608 throw Error ("'--' is not allowed inside comment markup.");
609 break;
614 internal sealed class SmallXmlParserException : SystemException
616 int line;
617 int column;
619 public SmallXmlParserException (string msg, int line, int column)
620 : base (String.Format ("{0}. At ({1},{2})", msg, line, column))
622 this.line = line;
623 this.column = column;
626 public int Line {
627 get { return line; }
630 public int Column {
631 get { return column; }