2010-04-07 Jb Evain <jbevain@novell.com>
[mcs.git] / class / System.XML / System.Xml / XmlChar.cs
blob299b887b2f246fcf5bd3aead122cfc028b2c2e07
1 // -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
2 //
3 // System.Xml.XmlChar.cs
4 //
5 // Author:
6 // Jason Diamond (jason@injektilo.org)
7 //
8 // (C) 2001 Jason Diamond http://injektilo.org/
9 //
12 // Permission is hereby granted, free of charge, to any person obtaining
13 // a copy of this software and associated documentation files (the
14 // "Software"), to deal in the Software without restriction, including
15 // without limitation the rights to use, copy, modify, merge, publish,
16 // distribute, sublicense, and/or sell copies of the Software, and to
17 // permit persons to whom the Software is furnished to do so, subject to
18 // the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be
21 // included in all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 namespace System.Xml
34 internal class XmlChar
36 public static readonly char [] WhitespaceChars = new char [] {' ', '\n', '\t', '\r'};
38 public static bool IsWhitespace (int ch)
40 return ch == 0x20 || ch == 0x9 || ch == 0xD || ch == 0xA;
43 public static bool IsWhitespace (string str)
45 for (int i = 0; i < str.Length; i++)
46 if (!IsWhitespace (str [i])) return false;
48 return true;
51 public static int IndexOfNonWhitespace (string str)
53 for (int i = 0; i < str.Length; i++)
54 if (!IsWhitespace (str [i])) return i;
55 return -1;
58 public static bool IsFirstNameChar (int ch)
60 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
61 return true;
62 } else if ((uint) ch <= 0xFFFF) {
63 return (nameBitmap[(firstNamePages[ch >> 8] << 3) + ((ch & 0xFF) >> 5)] & (1 << (ch & 0x1F))) != 0;
66 return false;
69 public static bool IsValid (int ch)
71 return !IsInvalid (ch);
74 public static bool IsInvalid (int ch)
76 switch (ch) {
77 case 9:
78 case 10:
79 case 13:
80 return false;
82 if (ch < 32)
83 return true;
84 if (ch < 0xD800)
85 return false;
86 if (ch < 0xE000)
87 return true;
88 if (ch < 0xFFFE)
89 return false;
90 if (ch < 0x10000)
91 return true;
92 if (ch < 0x110000)
93 return false;
94 else
95 return true;
98 public static int IndexOfInvalid (string s, bool allowSurrogate)
100 for (int i = 0; i < s.Length; i++)
101 if (IsInvalid (s [i])) {
102 if (!allowSurrogate ||
103 i + 1 == s.Length ||
104 s [i] < '\uD800' ||
105 s [i] >= '\uDC00' ||
106 s [i + 1] < '\uDC00' ||
107 s [i + 1] >= '\uE000')
108 return i;
109 i++;
111 return -1;
114 public static int IndexOfInvalid (char [] s, int start, int length, bool allowSurrogate)
116 int end = start + length;
117 if (s.Length < end)
118 throw new ArgumentOutOfRangeException ("length");
119 for (int i = start; i < end; i++)
120 if (IsInvalid (s [i])) {
121 if (!allowSurrogate ||
122 i + 1 == end ||
123 s [i] < '\uD800' ||
124 s [i] >= '\uDC00' ||
125 s [i + 1] < '\uDC00' ||
126 s [i + 1] >= '\uE000')
127 return i;
128 i++;
130 return -1;
133 public static bool IsNameChar (int ch)
135 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
136 return true;
137 } else if ((uint) ch <= 0xFFFF) {
138 return (nameBitmap[(namePages[ch >> 8] << 3) + ((ch & 0xFF) >> 5)] & (1 << (ch & 0x1F))) != 0;
139 } else {
140 return false;
144 public static bool IsNCNameChar (int ch)
146 bool result = false;
148 if (ch >= 0 && ch <= 0xFFFF && ch != ':')
150 result = (nameBitmap[(namePages[ch >> 8] << 3) + ((ch & 0xFF) >> 5)] & (1 << (ch & 0x1F))) != 0;
153 return result;
156 public static bool IsName (string str)
158 if (str.Length == 0)
159 return false;
160 if (!IsFirstNameChar (str [0]))
161 return false;
162 for (int i = 1; i < str.Length; i++)
163 if (!IsNameChar (str [i]))
164 return false;
165 return true;
168 public static bool IsNCName (string str)
170 if (str.Length == 0)
171 return false;
172 if (!IsFirstNameChar (str [0]))
173 return false;
174 for (int i = 0; i < str.Length; i++)
175 if (!IsNCNameChar (str [i]))
176 return false;
177 return true;
180 public static bool IsNmToken (string str)
182 if (str.Length == 0)
183 return false;
184 for (int i = 0; i < str.Length; i++)
185 if (!IsNameChar (str [i]))
186 return false;
187 return true;
190 public static bool IsPubidChar (int ch)
192 return (IsWhitespace(ch) && ch != '\t') | ('a' <= ch && ch <= 'z') | ('A' <= ch && ch <= 'Z') | ('0' <= ch && ch <= '9') | "-'()+,./:=?;!*#@$_%".IndexOf((char)ch) >= 0;
195 public static bool IsPubid (string str)
197 for (int i = 0; i < str.Length; i++)
198 if (!IsPubidChar (str [i]))
199 return false;
200 return true;
203 // encodings (copied from XmlConstructs.cs)
205 /// <summary>
206 /// Returns true if the encoding name is a valid IANA encoding.
207 /// This method does not verify that there is a decoder available
208 /// for this encoding, only that the characters are valid for an
209 /// IANA encoding name.
210 /// </summary>
211 /// <param name="ianaEncoding">The encoding to check.</param>
212 /// <returns></returns>
213 public static bool IsValidIANAEncoding (String ianaEncoding)
215 if (ianaEncoding != null)
217 int length = ianaEncoding.Length;
218 if (length > 0)
220 char c = ianaEncoding[0];
221 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
223 for (int i = 1; i < length; i++)
225 c = ianaEncoding[i];
226 if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
227 (c < '0' || c > '9') && c != '.' && c != '_' &&
228 c != '-')
230 return false;
233 return true;
237 return false;
240 public static int GetPredefinedEntity (string name)
242 switch (name) {
243 case "amp":
244 return '&';
245 case "lt":
246 return '<';
247 case "gt":
248 return '>';
249 case "quot":
250 return '"';
251 case "apos":
252 return '\'';
253 default:
254 return -1;
258 static readonly byte [] firstNamePages =
260 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00,
261 0x00, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
262 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
263 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13,
264 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
265 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
266 0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
267 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
268 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
269 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
270 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
271 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
272 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
273 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
274 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
275 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
276 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
277 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
278 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
279 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17,
280 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
281 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
282 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
283 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
284 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
285 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
286 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
287 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
288 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
289 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
290 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
291 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
294 static readonly byte [] namePages =
296 0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00,
297 0x00, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25,
298 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
299 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13,
300 0x26, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
301 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
302 0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
303 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
304 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
305 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
306 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
307 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
308 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
309 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
310 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
311 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
312 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
313 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
314 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
315 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17,
316 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
317 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
318 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
319 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
320 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
321 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
322 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
323 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
324 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
325 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
330 static readonly uint [] nameBitmap =
332 0x00000000, 0x00000000, 0x00000000, 0x00000000,
333 0x00000000, 0x00000000, 0x00000000, 0x00000000,
334 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
335 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
336 0x00000000, 0x04000000, 0x87FFFFFE, 0x07FFFFFE,
337 0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF,
338 0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF,
339 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE00F, 0xFC31FFFF,
340 0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
341 0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000,
342 0x00000000, 0x00000000, 0x00000000, 0x00000000,
343 0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
344 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
345 0xFFFF0003, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF,
346 0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
347 0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF,
348 0x00000000, 0x07FFFFFE, 0x000007FE, 0xFFFE0000,
349 0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060,
350 0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003,
351 0xFFF99FE0, 0x03C5FDFF, 0xB0000000, 0x00030003,
352 0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000,
353 0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001,
354 0xFFF99FE0, 0x23CDFDFF, 0xB0000000, 0x00000003,
355 0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000,
356 0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003,
357 0xFFFDDFE0, 0x03EFFDFF, 0x40000000, 0x00000003,
358 0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003,
359 0x00000000, 0x00000000, 0x00000000, 0x00000000,
360 0xFFFFFFFE, 0x000D7FFF, 0x0000003F, 0x00000000,
361 0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000,
362 0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF,
363 0x00000000, 0x00000000, 0x00000000, 0x00000000,
364 0x00000000, 0x00000000, 0x00000000, 0x00000000,
365 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF,
366 0x0007DAED, 0x50000000, 0x82315001, 0x002C62AB,
367 0x40000000, 0xF580C900, 0x00000007, 0x02010800,
368 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
369 0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
370 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
371 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
372 0x00000000, 0x00004C40, 0x00000000, 0x00000000,
373 0x00000007, 0x00000000, 0x00000000, 0x00000000,
374 0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF,
375 0x001FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x07FFFFFF,
376 0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000,
377 0x00000000, 0x00000000, 0x00000000, 0x00000000,
378 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
379 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
380 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
381 0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
382 0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE,
383 0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF,
384 0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
385 0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000,
386 0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003,
387 0xFFFFD7C0, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
388 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
389 0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF,
390 0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
391 0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF,
392 0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF,
393 0xFFFFFFFF, 0x7CFFFFFF, 0xFFEF7FFF, 0x03FF3DFF,
394 0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF,
395 0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF,
396 0xFFF987E4, 0xD36DFDFF, 0x5E003987, 0x001FFFC0,
397 0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1,
398 0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3,
399 0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80,
400 0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3,
401 0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3,
402 0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3,
403 0x00000000, 0x00000000, 0x00000000, 0x00000000,
404 0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000,
405 0xFEF02596, 0x3BFF6CAE, 0x03FF3F5F, 0x00000000,
406 0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF,
407 0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000,
408 0x00000000, 0x00000000, 0x00000000, 0x00000000,
409 0x00000000, 0x00000000, 0x1FFF0000, 0x00000002,
410 0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF,
411 0x661FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x77FFFFFF