Fixed the licensing statements - I had erroneously marked the files as covered by...
[lwes-dotnet/github-mirror.git] / Org.Lwes / ESF / EsfParser.cs
blob9d932db4348f071defb0baa0520d39f653b1aaa4
1 //
2 // This file is part of the LWES .NET Binding (LWES.net)
3 //
4 // COPYRIGHT (C) 2009, Phillip Clark (cerebralkungfu[at*g mail[dot*com)
5 // original .NET implementation
6 //
7 // LWES.net is free software: you can redistribute it and/or modify
8 // it under the terms of the Lesser GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
12 // LWES.net is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
17 // You should have received a copy of the Lesser GNU General Public License
18 // along with LWES.net. If not, see <http://www.gnu.org/licenses/>.
20 namespace Org.Lwes.ESF
22 using System;
23 using System.Collections.Generic;
24 using System.IO;
26 /// <summary>
27 /// Utility class for parsing ESF files.
28 /// </summary>
29 public class EsfParser
31 #region Fields
33 /// <summary>
34 /// Array of characters used as line terminators.
35 /// </summary>
36 public static readonly char[] LineTerminatorChars = new char[] { '\r', '\n', '\u2028', '\u2029' };
38 /// <summary>
39 /// Map of type token names.
40 /// </summary>
41 public static readonly string[] TypeTokenNameMap =
42 { "undefined", // -
43 "uint16", // 0x01 - UINT16
44 "int16", // 0x02 - INT16
45 "uint32", // 0x03 - UINT32
46 "int32", // 0x04 - INT32
47 "string", // 0x05 - STRING
48 "ip_addr", // 0x06 - IPADDR
49 "int64", // 0x07 - INT64
50 "uint64", // 0x08 - UINT64
51 "boolean" // 0x09 - BOOLEAN
54 /// <summary>
55 /// Constant for the colon.
56 /// </summary>
57 public const char Colon = ':';
59 /// <summary>
60 /// Constant for the latin number nine.
61 /// </summary>
62 public const char DigitNine = '9';
64 /// <summary>
65 /// Constant for the latin number zero.
66 /// </summary>
67 public const char DigitZero = '0';
69 /// <summary>
70 /// Constant for the latin uppercase letter A.
71 /// </summary>
72 public const char LatinCapitalLetterA = 'A';
74 /// <summary>
75 /// Constant for the latin uppercase letter Z.
76 /// </summary>
77 public const char LatinCapitalLetterZ = 'Z';
79 /// <summary>
80 /// Constant for the latin lowercase letter a.
81 /// </summary>
82 public const char LatinSmallLetterA = 'a';
84 /// <summary>
85 /// Constant for the latin lowercase letter z.
86 /// </summary>
87 public const char LatinSmallLetterZ = 'z';
89 /// <summary>
90 /// Constant for the left-curly-bracket - delimits the beginning of an attribute-list.
91 /// </summary>
92 public const char LeftCurlyBracket = '{';
94 /// <summary>
95 /// Constant for the lowline (underscore).
96 /// </summary>
97 public const char LowLine = '_';
99 /// <summary>
100 /// Constant for the numbersign - delimits the beginning of a single line comment.
101 /// </summary>
102 public const char NumberSign = '#';
104 /// <summary>
105 /// Constant for the right-curly-bracket - delimits the end of an attribute-list.
106 /// </summary>
107 public const char RightCurlyBracket = '}';
109 /// <summary>
110 /// Constant for the semicolon - indicates the end of an attribute definition.
111 /// </summary>
112 public const char SemiColon = ';';
114 private const char NoCharacter = '\u0000';
116 #endregion Fields
118 #region Methods
120 /// <summary>
121 /// Validates that the character at the cursor is the expected
122 /// character and advances the cursor.
123 /// </summary>
124 /// <param name="input">array of input characters</param>
125 /// <param name="cursor">reference to the parse cursor</param>
126 /// <param name="ch">the expected character</param>
127 /// <returns>the expected character (taken from the input) if the
128 /// expected character is present; otherwise a ParseException is thrown</returns>
129 /// <exception cref="ArgumentNullException">thrown if the <paramref name="input"/> is null.</exception>
130 /// <exception cref="ArgumentOutOfRangeException">thrown if the cursor is beyond the end of the input.</exception>
131 /// <exception cref="ParseException">thrown if the expected character is not present.</exception>
132 public static char ExpectChar(char[] input, ref Cursor cursor, char ch)
134 if (input == null)
135 throw new ArgumentNullException("input");
137 if (cursor < 0
138 || input.Length <= cursor)
139 throw new ArgumentOutOfRangeException("cursor", "Cursor out of input range");
141 if (input[cursor] != ch)
142 throw new ParseException(cursor, ch.ToString());
144 return input[cursor++];
147 /// <summary>
148 /// Validates that a sequence of characters are present at the cursor.
149 /// </summary>
150 /// <param name="input">input characters</param>
151 /// <param name="cursor">input cursor</param>
152 /// <param name="sequence">string containing the expected sequence of characters</param>
153 /// <param name="numAlreadyValidated">Number of characters already validated</param>
154 /// <returns>If the expected characters (<paramref name="sequence"/>) are present at the cursor
155 /// then the sequence is returned; otherwise an exception is raised.</returns>
156 public static string ExpectCharSequence(char[] input, ref Cursor cursor, string sequence, int numAlreadyValidated)
158 if (sequence == null)
159 throw new ArgumentNullException("sequence");
161 return new String(ExpectCharSequence(input, ref cursor, sequence.ToCharArray(), numAlreadyValidated));
164 /// <summary>
165 /// Validates that a sequence of characters are present at the cursor.
166 /// </summary>
167 /// <param name="input">input characters</param>
168 /// <param name="cursor">input cursor</param>
169 /// <param name="sequence">the expected sequence of characters</param>
170 /// <param name="numAlreadyValidated">Number of characters already validated</param>
171 /// <returns>If the expected characters (<paramref name="sequence"/>) are present at the cursor
172 /// then the sequence is returned; otherwise an exception is raised.</returns>
173 public static char[] ExpectCharSequence(char[] input, ref Cursor cursor, char[] sequence, int numAlreadyValidated)
175 if (input == null)
176 throw new ArgumentNullException("input");
178 if (cursor < 0 || input.Length <= cursor)
179 throw new ArgumentOutOfRangeException("cursor", "Cursor out of input range");
181 if (numAlreadyValidated < 0)
182 throw new ArgumentOutOfRangeException("ofs", "Cursor out of input range");
184 if (sequence == null)
185 throw new ArgumentNullException("sequence");
187 // If the number of characters already validated is greater than the number
188 // of characters in the sequence then we're already done,
189 // advance the cursor and return.
190 if (numAlreadyValidated > sequence.Length)
192 cursor += sequence.Length;
193 return sequence;
196 int len = sequence.Length;
197 if (input.Length < cursor + len)
198 throw new ParseException(String.Concat("Expected '", new String(sequence), "'"), cursor);
200 Cursor c = new Cursor(cursor + numAlreadyValidated);
201 for (int i = numAlreadyValidated; i < len; i++, c++)
203 if (input[c] != sequence[i])
205 throw new ParseException(String.Concat("Expected '", new String(sequence), "'"), cursor);
208 cursor = c;
209 return sequence;
212 /// <summary>
213 /// Validates that a newline occurs at cursor and advances cursor.
214 /// </summary>
215 /// <param name="input">input characters.</param>
216 /// <param name="cursor">Reference to the cursor.</param>
217 public static void ExpectSkipLineTerminator(char[] input, ref Cursor cursor)
219 char ch = PeekExpectChar(input, cursor, LineTerminatorChars);
221 // If we got carriage return, see if there is a linefeed...
222 if ((ch == '\r')
223 && input.Length > (cursor + 1)
224 && (input[cursor + 1] == '\n'))
225 { // ... there is, skip it too...
226 cursor = cursor.Newline(2);
228 else
229 cursor = cursor.Newline();
232 /// <summary>
233 /// Validates that an ESF type token appears in the input at the cursor,
234 /// returns the token and advances the cursor.
235 /// </summary>
236 /// <param name="input">input characters</param>
237 /// <param name="cursor">input cursor</param>
238 /// <returns>the ESF type token located at the cursor</returns>
239 public static TypeToken ExpectTypeToken(char[] input, ref Cursor cursor)
241 if (input == null)
242 throw new ArgumentNullException("input");
244 if (cursor < 0
245 || input.Length <= cursor)
246 throw new ArgumentOutOfRangeException("cursor", "Cursor out of input range");
248 Cursor c = cursor;
249 int rmn = input.Length - c;
251 // all token names are atleast 5 characters in length
252 if (rmn < 5)
253 throw new ParseException("Expected an ESF type name", cursor);
255 switch (input[c])
257 case 'u':
258 c++;
259 if (input[c] == 'i')
261 c++;
262 EsfParser.ExpectCharSequence(input, ref c, "nt", 0);
263 if (input[c] == '1')
264 { // can only be uint16
265 c++;
266 EsfParser.ExpectChar(input, ref c, '6');
267 cursor = c;
268 return TypeToken.UINT16;
270 else if (input[c] == '3')
271 { // can only be uint32
272 c++;
273 EsfParser.ExpectChar(input, ref c, '2');
274 cursor = c;
275 return TypeToken.UINT32;
277 else if (input[c] == '6')
278 { // can only be uint64
279 c++;
280 EsfParser.ExpectChar(input, ref c, '4');
281 cursor = c;
282 return TypeToken.UINT64;
285 break;
286 case 'i':
287 c++;
288 if (input[c] == 'n')
290 c++;
291 EsfParser.ExpectChar(input, ref c, 't');
292 if (input[c] == '1')
293 { // can only be int16
294 c++;
295 EsfParser.ExpectChar(input, ref c, '6');
296 cursor = c;
297 return TypeToken.INT16;
299 else if (input[c] == '3')
300 { // can only be int32
301 c++;
302 EsfParser.ExpectChar(input, ref c, '2');
303 cursor = c;
304 return TypeToken.INT32;
306 else if (input[c] == '6')
307 { // can only be int64
308 c++;
309 EsfParser.ExpectChar(input, ref c, '4');
310 cursor = c;
311 return TypeToken.INT64;
314 else if (input[c] == 'p')
315 { // can only be ip_addr
316 EsfParser.ExpectCharSequence(input, ref cursor, TypeTokenNameMap[(int)TypeToken.IP_ADDR], 2);
317 return TypeToken.IP_ADDR;
319 break;
320 case 's':
321 EsfParser.ExpectCharSequence(input, ref cursor, TypeTokenNameMap[(int)TypeToken.STRING], 1);
322 return TypeToken.STRING;
323 case 'b':
324 EsfParser.ExpectCharSequence(input, ref cursor, TypeTokenNameMap[(int)TypeToken.BOOLEAN], 1);
325 return TypeToken.BOOLEAN;
328 throw new ParseException("Expected an ESF type name", cursor);
331 /// <summary>
332 /// Validates that an ESF word is present at the cursor
333 /// and advances the cursor.
334 /// </summary>
335 /// <param name="input">input characters</param>
336 /// <param name="cursor">input cursor</param>
337 /// <returns>the ESF word located </returns>
338 public static string ExpectWord(char[] input, ref Cursor cursor)
340 if (input == null)
341 throw new ArgumentNullException("input");
343 if (cursor < 0
344 || input.Length <= cursor)
345 throw new ArgumentOutOfRangeException("cursor", "Cursor out of input range");
347 Cursor c = new Cursor(cursor);
348 while (c < input.Length && IsWordCharacter(input[c])) c++;
350 if (c == cursor)
351 throw new ParseException("Expected either an EVENTWORD or an ATTRIBUTEWORD at the cursor", cursor);
353 int ofs = cursor;
354 cursor = c;
355 return new String(input, ofs, cursor.Offset - ofs);
358 /// <summary>
359 /// Determines if a character is a newline character.
360 /// </summary>
361 /// <param name="ch">character to check</param>
362 /// <returns><em>true</em> if the character is a newline character; otherwise <em>false</em></returns>
363 public static bool IsLineTerminator(char ch)
365 return (ch == '\r' // Unicode4 codepoint CarriageReturn
366 || ch == '\n' // Unicode4 codepoint LineFeed
367 || ch == '\u2028' // Unicode4 codepoint LineSeparator
368 || ch == '\u2029'); // Unicode4 codepoint ParagraphSeparator
371 /// <summary>
372 /// Determins if a word is an ESF word character.
373 /// </summary>
374 /// <param name="c">a character</param>
375 /// <returns><em>true</em> if the character is a legal
376 /// word char; otherwise <em>false</em></returns>
377 public static bool IsWordCharacter(char c)
379 // assumes lower case characters are more common,
380 // then upper case characters
381 // then digits (and the colon),
382 // then the lowline.
384 // 0..9 and : are contiguous \u0030..\u0x003A
385 return (c >= LatinSmallLetterA && c <= LatinSmallLetterZ)
386 || (c >= LatinCapitalLetterA && c <= LatinCapitalLetterZ)
387 || (c >= DigitZero && c <= Colon)
388 || c == LowLine;
391 /// <summary>
392 /// Validates that the character at the cursor is the expected character
393 /// but does not advance the cursor.
394 /// </summary>
395 /// <param name="input">input characters</param>
396 /// <param name="cursor">reference to the parse cursor</param>
397 /// <param name="ch">the expected character</param>
398 /// <returns>the expected character (taken from the input) if the
399 /// expected character is present; otherwise a ParseException is thrown</returns>
400 /// <exception cref="ArgumentNullException">thrown if the <paramref name="input"/> is null.</exception>
401 /// <exception cref="ArgumentOutOfRangeException">thrown if the cursor is beyond the end of the input.</exception>
402 /// <exception cref="ParseException">thrown if the expected character is not present.</exception>
403 public static char PeekExpectChar(char[] input, Cursor cursor, char ch)
405 if (input == null)
406 throw new ArgumentNullException("input");
408 if (cursor < 0
409 || input.Length <= cursor)
410 throw new ArgumentOutOfRangeException("cursor", "Cursor out of input range");
412 if (input[cursor] != ch)
413 throw new ParseException(cursor, ch.ToString());
415 return input[cursor];
418 /// <summary>
419 /// Validates that the character at the cursor is one of the expected characters
420 /// but does not advance the cursor.
421 /// </summary>
422 /// <param name="input">input characters</param>
423 /// <param name="cursor">reference to the parse cursor</param>
424 /// <param name="chars">an array containing the expected characters</param>
425 /// <returns>the expected character at the cursor's position (taken from the input)
426 /// if an expected character is present; otherwise a ParseException is thrown</returns>
427 /// <exception cref="ArgumentNullException">thrown if the <paramref name="input"/> is null.</exception>
428 /// <exception cref="ArgumentOutOfRangeException">thrown if the cursor is beyond the end of the input.</exception>
429 /// <exception cref="ParseException">thrown if an expected character is not present at the cursor position.</exception>
430 public static char PeekExpectChar(char[] input, Cursor cursor, char[] chars)
432 if (input == null)
433 throw new ArgumentNullException("input");
435 if (cursor < 0
436 || input.Length <= cursor)
437 throw new ArgumentOutOfRangeException("cursor", "Cursor out of input range");
439 for (int i = 0; i < chars.Length; i++)
441 if (input[cursor] == chars[i])
443 return input[cursor];
446 throw new ParseException("Expected newline characters", cursor);
449 /// <summary>
450 /// Advances the cursor to the next line terminator or
451 /// the end of input, whichever is first.
452 /// </summary>
453 /// <param name="input">input characters</param>
454 /// <param name="cursor">input cursor</param>
455 public static void SkipToLineTerminator(char[] input, ref Cursor cursor)
457 if (input == null)
458 throw new ArgumentNullException("input");
460 if (cursor < 0 || input.Length <= cursor)
461 throw new ArgumentOutOfRangeException("cursor", "Cursor out of input range");
463 while (cursor < input.Length
464 && !IsLineTerminator(input[cursor]))
465 cursor++;
468 /// <summary>
469 /// Advances the cursor past any whitespace or comments.
470 /// </summary>
471 /// <param name="input">input characters</param>
472 /// <param name="cursor"></param>
473 public static void SkipWhitespaceAndComments(char[] input, ref Cursor cursor)
475 if (input == null)
476 throw new ArgumentNullException("input");
478 if (cursor < 0)
479 throw new ArgumentOutOfRangeException("cursor", "Cursor out of input range");
481 while (cursor < input.Length)
483 while (Char.IsWhiteSpace(input[cursor]))
485 if (IsLineTerminator(input[cursor]))
486 ExpectSkipLineTerminator(input, ref cursor);
487 else
488 cursor++;
490 if (input[cursor] == NumberSign)
492 SkipToLineTerminator(input, ref cursor);
494 else
495 { // Done skipping
496 return;
501 /// <summary>
502 /// Gets the byte value of an ESF token given the token name.
503 /// </summary>
504 /// <param name="tokenName">ESF token name</param>
505 /// <returns>byte value of the ESF token</returns>
506 public static byte TokenNameToByte(string tokenName)
508 if (tokenName == null) throw new ArgumentNullException("tokenName");
509 if (tokenName.Length < 5) throw new ArgumentException("not a valid token name");
511 Cursor c = new Cursor();
514 return (byte)EsfParser.ExpectTypeToken(tokenName.ToCharArray(), ref c);
516 catch (ParseException)
518 return (byte)TypeToken.UNDEFINED;
522 /// <summary>
523 /// Parses event templates from the input stream.
524 /// </summary>
525 /// <param name="inputStream">input stream</param>
526 /// <returns>Enumerable of TempateEvents</returns>
527 public IEnumerable<EventTemplate> ParseEventTemplates(Stream inputStream)
529 char[] input;
530 using (StreamReader r = new StreamReader(inputStream))
532 input = r.ReadToEnd().ToCharArray();
534 return ParseEventTemplates(input);
537 private IEnumerable<EventTemplate> ParseEventTemplates(char[] input)
539 Cursor c = new Cursor();
541 // read the attribute list
542 List<EventTemplate> events = new List<EventTemplate>();
543 while (c < input.Length)
545 EsfParser.SkipWhitespaceAndComments(input, ref c);
546 if (c < input.Length)
548 events.Add(EventTemplate.ExpectEvent(input, ref c));
552 return events;
555 #endregion Methods