source/ubiqx/debugparse.c

   1 /* ========================================================================== **
   2  *                                debugparse.c
   3  *
   4  * Copyright (C) 1998 by Christopher R. Hertel
   5  *
   6  * Email: crh@ubiqx.mn.org
   7  *
   8  * -------------------------------------------------------------------------- **
   9  * This module is a very simple parser for Samba debug log files.
  10  * -------------------------------------------------------------------------- **
  11  *
  12  *  This library is free software; you can redistribute it and/or
  13  *  modify it under the terms of the GNU Library General Public
  14  *  License as published by the Free Software Foundation; either
  15  *  version 2 of the License, or (at your option) any later version.
  16  *
  17  *  This library is distributed in the hope that it will be useful,
  18  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  *  Library General Public License for more details.
  21  *
  22  *  You should have received a copy of the GNU Library General Public
  23  *  License along with this library; if not, write to the Free
  24  *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  25  *
  26  * -------------------------------------------------------------------------- **
  27  * The important function in this module is dbg_char2token().  The rest is
  28  * basically fluff.  (Potentially useful fluff, but still fluff.)
  29  * ========================================================================== **
  30  */
  31
  32 #include "debugparse.h"
  33
  34 /* -------------------------------------------------------------------------- **
  35  * Constants...
  36  *
  37  *  DBG_BSIZE - This internal constant is used only by dbg_test().  It is the
  38  *          size of the read buffer.  I've tested the function using a
  39  *          DBG_BSIZE value of 2.
  40  */
  41
  42 #define DBG_BSIZE 128
  43
  44 /* -------------------------------------------------------------------------- **
  45  * Functions...
  46  */
  47
  48 char *dbg_token2string( dbg_Token tok )
  49   /* ------------------------------------------------------------------------ **
  50    * Given a token, return a string describing the token.
  51    *
  52    *  Input:  tok - One of the set of dbg_Tokens defined in debugparse.h.
  53    *
  54    *  Output: A string identifying the token.  This is useful for debugging,
  55    *          etc.
  56    *
  57    *  Note:   If the token is not known, this function will return the
  58    *          string "<unknown>".
  59    *
  60    * ------------------------------------------------------------------------ **
  61    */
  62   {
  63   switch( tok )
  64     {
  65     case dbg_null:
  66       return( "null" );
  67     case dbg_ignore:
  68       return( "ignore" );
  69     case dbg_header:
  70       return( "header" );
  71     case dbg_timestamp:
  72       return( "time stamp" );
  73     case dbg_level:
  74       return( "level" );
  75     case dbg_sourcefile:
  76       return( "source file" );
  77     case dbg_function:
  78       return( "function" );
  79     case dbg_lineno:
  80       return( "line number" );
  81     case dbg_message:
  82       return( "message" );
  83     case dbg_eof:
  84       return( "[EOF]" );
  85     }
  86   return( "<unknown>" );
  87   } /* dbg_token2string */
  88
  89 dbg_Token dbg_char2token( dbg_Token *state, int c )
  90   /* ------------------------------------------------------------------------ **
  91    * Parse input one character at a time.
  92    *
  93    *  Input:  state - A pointer to a token variable.  This is used to
  94    *                  maintain the parser state between calls.  For
  95    *                  each input stream, you should set up a separate
  96    *                  state variable and initialize it to dbg_null.
  97    *                  Pass a pointer to it into this function with each
  98    *                  character in the input stream.  See dbg_test()
  99    *                  for an example.
 100    *          c     - The "current" character in the input stream.
 101    *
 102    *  Output: A token.
 103    *          The token value will change when delimiters are found,
 104    *          which indicate a transition between syntactical objects.
 105    *          Possible return values are:
 106    *
 107    *          dbg_null        - The input character was an end-of-line.
 108    *                            This resets the parser to its initial state
 109    *                            in preparation for parsing the next line.
 110    *          dbg_eof         - Same as dbg_null, except that the character
 111    *                            was an end-of-file.
 112    *          dbg_ignore      - Returned for whitespace and delimiters.
 113    *                            These lexical tokens are only of interest
 114    *                            to the parser.
 115    *          dbg_header      - Indicates the start of a header line.  The
 116    *                            input character was '[' and was the first on
 117    *                            the line.
 118    *          dbg_timestamp   - Indicates that the input character was part
 119    *                            of a header timestamp.
 120    *          dbg_level       - Indicates that the input character was part
 121    *                            of the debug-level value in the header.
 122    *          dbg_sourcefile  - Indicates that the input character was part
 123    *                            of the sourcefile name in the header.
 124    *          dbg_function    - Indicates that the input character was part
 125    *                            of the function name in the header.
 126    *          dbg_lineno      - Indicates that the input character was part
 127    *                            of the DEBUG call line number in the header.
 128    *          dbg_message     - Indicates that the input character was part
 129    *                            of the DEBUG message text.
 130    *
 131    * ------------------------------------------------------------------------ **
 132    */
 133   {
 134   /* The terminating characters that we see will greatly depend upon
 135    * how they are read.  For example, if gets() is used instead of
 136    * fgets(), then we will not see newline characters.  A lot also
 137    * depends on the calling function, which may handle terminators
 138    * itself.
 139    *
 140    * '\n', '\0', and EOF are all considered line terminators.  The
 141    * dbg_eof token is sent back if an EOF is encountered.
 142    *
 143    * Warning:  only allow the '\0' character to be sent if you are
 144    *           using gets() to read whole lines (thus replacing '\n'
 145    *           with '\0').  Sending '\0' at the wrong time will mess
 146    *           up the parsing.
 147    */
 148   switch( c )
 149     {
 150     case EOF:
 151       *state = dbg_null;   /* Set state to null (initial state) so */
 152       return( dbg_eof );   /* that we can restart with new input.  */
 153     case '\n':
 154     case '\0':
 155       *state = dbg_null;   /* A newline or eoln resets to the null state. */
 156       return( dbg_null );
 157     }
 158
 159   /* When within the body of the message, only a line terminator
 160    * can cause a change of state.  We've already checked for line
 161    * terminators, so if the current state is dbg_msgtxt, simply
 162    * return that as our current token.
 163    */
 164   if( dbg_message == *state )
 165     return( dbg_message );
 166
 167   /* If we are at the start of a new line, and the input character
 168    * is an opening bracket, then the line is a header line, otherwise
 169    * it's a message body line.
 170    */
 171   if( dbg_null == *state )
 172     {
 173     if( '[' == c )
 174       {
 175       *state = dbg_timestamp;
 176       return( dbg_header );
 177       }
 178     *state = dbg_message;
 179     return( dbg_message );
 180     }
 181
 182   /* We've taken care of terminators, text blocks and new lines.
 183    * The remaining possibilities are all within the header line
 184    * itself.
 185    */
 186
 187   /* Within the header line, whitespace can be ignored *except*
 188    * within the timestamp.
 189    */
 190   if( isspace( c ) )
 191     {
 192     /* Fudge.  The timestamp may contain space characters. */
 193     if( (' ' == c) && (dbg_timestamp == *state) )
 194       return( dbg_timestamp );
 195     /* Otherwise, ignore whitespace. */
 196     return( dbg_ignore );
 197     }
 198
 199   /* Okay, at this point we know we're somewhere in the header.
 200    * Valid header *states* are: dbg_timestamp, dbg_level,
 201    * dbg_sourcefile, dbg_function, and dbg_lineno.
 202    */
 203   switch( c )
 204     {
 205     case ',':
 206       if( dbg_timestamp == *state )
 207         {
 208         *state = dbg_level;
 209         return( dbg_ignore );
 210         }
 211       break;
 212     case ']':
 213       if( dbg_level == *state )
 214         {
 215         *state = dbg_sourcefile;
 216         return( dbg_ignore );
 217         }
 218       break;
 219     case ':':
 220       if( dbg_sourcefile == *state )
 221         {
 222         *state = dbg_function;
 223         return( dbg_ignore );
 224         }
 225       break;
 226     case '(':
 227       if( dbg_function == *state )
 228         {
 229         *state = dbg_lineno;
 230         return( dbg_ignore );
 231         }
 232       break;
 233     case ')':
 234       if( dbg_lineno == *state )
 235         {
 236         *state = dbg_null;
 237         return( dbg_ignore );
 238         }
 239       break;
 240     }
 241
 242   /* If the previous block did not result in a state change, then
 243    * return the current state as the current token.
 244    */
 245   return( *state );
 246   } /* dbg_char2token */
 247
 248 void dbg_test( void )
 249   /* ------------------------------------------------------------------------ **
 250    * Simple test function.
 251    *
 252    *  Input:  none.
 253    *  Output: none.
 254    *  Notes:  This function was used to test dbg_char2token().  It reads a
 255    *          Samba log file from stdin and prints parsing info to stdout.
 256    *          It also serves as a simple example.
 257    *
 258    * ------------------------------------------------------------------------ **
 259    */
 260   {
 261   char bufr[DBG_BSIZE];
 262   int  i;
 263   int  linecount  = 1;
 264   dbg_Token old   = dbg_null,
 265             new   = dbg_null,
 266             state = dbg_null;
 267
 268   while( fgets( bufr, DBG_BSIZE, stdin ) )
 269     {
 270     for( i = 0; bufr[i]; i++ )
 271       {
 272       old = new;
 273       new = dbg_char2token( &state, bufr[i] );
 274       switch( new )
 275         {
 276         case dbg_header:
 277           if( linecount > 1 )
 278             (void)putchar( '\n' );
 279           break;
 280         case dbg_null:
 281           linecount++;
 282           break;
 283         case dbg_ignore:
 284           break;
 285         default:
 286           if( old != new )
 287             (void)printf( "\n[%05d]%12s: ", linecount, dbg_token2string(new) );
 288           (void)putchar( bufr[i] );
 289         }
 290       }
 291     }
 292   (void)putchar( '\n' );
 293   } /* dbg_test */
 294
 295
 296 /* -------------------------------------------------------------------------- **
 297  * This simple main line can be uncommented and used to test the parser.
 298  */
 299
 300 /*
 301  * int main( void )
 302  *  {
 303  *  dbg_test();
 304  *  return( 0 );
 305  *  }
 306  */
 307
 308 /* ========================================================================== */