From 6483b61fb5f9548539912bf267c2fd01a25ba905 Mon Sep 17 00:00:00 2001
From: ketmar <ketmar@ketmar.no-ip.org>
Date: Thu, 18 Aug 2011 15:25:15 +0300
Subject: [PATCH] lexer slightly relaxed: '{', '}' and ';' now delimiters

---
 src/mkjambase.c |   6 ++
 src/scan.c      | 187 +++++++++++++++++++++++++++++++-------------------------
 2 files changed, 110 insertions(+), 83 deletions(-)

diff --git a/src/mkjambase.c b/src/mkjambase.c
index 8114186..cefb1c0 100644
--- a/src/mkjambase.c
+++ b/src/mkjambase.c
@@ -25,6 +25,8 @@
 #include <string.h>
 #include <unistd.h>
 
+#define COMPACT
+
 
 static void fatal (const char *msg) {
   fprintf(stdout, "FATAL: %s\n", msg);
@@ -72,10 +74,14 @@ int main (int argc, char **argv, char **envp) {
     //
     while (fgets(buf, sizeof(buf), fin)) {
       if (doDotC) {
+#ifdef COMPACT
         if (!strncmp(buf, "#DONT_TOUCH", 11)) {
           dontStrip = !dontStrip;
           continue;
         }
+#else
+        dontStrip = 1;
+#endif
         char *p = buf;
         /* strip leading whitespace */
         if (!dontStrip) {
diff --git a/src/scan.c b/src/scan.c
index f52ea36..ca9d3dd 100644
--- a/src/scan.c
+++ b/src/scan.c
@@ -34,6 +34,7 @@ const struct keyword {
   {0,0}
 };
 
+
 struct include {
   struct include *next; /* next serial include file */
   const char *string;  /* pointer into current line */
@@ -50,7 +51,8 @@ static int scanmode = SCAN_NORMAL;
 static int anyerrors = 0;
 static char *symdump (YYSTYPE *s);
 
-# define BIGGEST_TOKEN 10240  /* no single token can be larger */
+/* no single token can be larger */
+#define BIGGEST_TOKEN  (10240)
 
 
 /*
@@ -75,7 +77,7 @@ int yyanyerrors (void) {
 
 void yyfparse (const char *s) {
   struct include *i = (struct include *)malloc(sizeof(*i));
-  /* Push this onto the incp chain. */
+  /* push this onto the incp chain */
   i->string = "";
   i->strings = 0;
   i->file = 0;
@@ -83,7 +85,7 @@ void yyfparse (const char *s) {
   i->line = 0;
   i->next = incp;
   incp = i;
-  /* If the filename is "+", it means use the internal jambase. */
+  /* if the filename is "+", it means use the internal jambase */
   if (!strcmp(s, "+")) i->strings = jambase;
 }
 
@@ -91,44 +93,41 @@ void yyfparse (const char *s) {
 /*
  * yyline() - read new line and return first character
  *
- * Fabricates a continuous stream of characters across include files,
- * returning EOF at the bitter end.
+ * fabricates a continuous stream of characters across include files, returning EOF at the bitter end
  */
 int yyline (void) {
   struct include *i = incp;
-
+  //
   if (!incp) return EOF;
-  /* Once we start reading from the input stream, we reset the */
-  /* include insertion point so that the next include file becomes */
-  /* the head of the list. */
-
-  /* If there is more data in this line, return it. */
+  /* once we start reading from the input stream, we reset the
+   * include insertion point so that the next include file becomes
+   * the head of the list */
+  /* if there is more data in this line, return it */
   if (*i->string) return *i->string++;
-  /* If we're reading from an internal string list, go to the */
-  /* next string. */
+  /* if we're reading from an internal string list, go to the next string */
   if (i->strings) {
-    if(!*i->strings) goto next;
-    i->line++;
+    if (!*i->strings) goto next;
+    ++i->line;
     i->string = *(i->strings++);
     return *i->string++;
   }
-  /* If necessary, open the file */
+  /* if necessary, open the file */
   if (!i->file) {
     FILE *f = stdin;
     if (strcmp(i->fname, "-") && !(f = fopen(i->fname, "r"))) perror(i->fname);
     i->file = f;
   }
-  /* If there's another line in this file, start it. */
+  /* if there's another line in this file, start it */
   if (i->file && fgets(i->buf, sizeof(i->buf), i->file)) {
-    i->line++;
+    ++i->line;
     i->string = i->buf;
     return *i->string++;
   }
 next:
-  /* This include is done.  */
-  /* Free it up and return EOF so yyparse() returns to parse_file(). */
+  /* this include is done */
+  /* free it up and return EOF so yyparse() returns to parse_file() */
   incp = i->next;
-  /* Close file, free name */
+  /* close file, free name */
   if (i->file && i->file != stdin) fclose(i->file);
   freestr(i->fname);
   free((char *)i);
@@ -147,61 +146,94 @@ next:
  * yychar() returns a continuous stream of characters, until it hits
  * the EOF of the current include file.
  */
+#define yychar()  (*incp->string ? *incp->string++ : yyline())
+#define yyprev()  (incp->string--)
+
+
+/* eat white space */
+static int skipSpaces (int c) {
+  for (;;) {
+    /* skip past white space */
+    while (c != EOF && isspace(c)) c = yychar();
+    /* not a comment? swallow up comment line */
+    if (c != '#') break;
+    while ((c = yychar()) != EOF && c != '\n') ;
+  }
+  return c;
+}
 
-# define yychar() (*incp->string ? *incp->string++ : yyline())
-# define yyprev() (incp->string--)
 
 int yylex (void) {
   int c;
   char buf[BIGGEST_TOKEN];
   char *b = buf;
-
+  //
   if (!incp) goto eof;
-  /* Get first character (whitespace or of token) */
+  /* get first character (whitespace or of token) */
   c = yychar();
   if (scanmode == SCAN_STRING) {
-    /* If scanning for a string (action's {}'s), look for the */
-    /* closing brace.  We handle matching braces, if they match! */
+    /* if scanning for a string (action's {}'s), look for the closing brace */
+    /* we handle matching braces, if they match! */
     int nest = 1;
-
+    //
     while (c != EOF && b < buf+sizeof(buf)) {
-      if (c == '{') nest++;
+      if (c == '{') ++nest;
       if (c == '}' && !--nest) break;
       *b++ = c;
       c = yychar();
     }
-    /* We ate the ending brace -- regurgitate it. */
+    /* we ate the ending brace -- regurgitate it */
     if (c != EOF) yyprev();
-    /* Check obvious errors. */
-    if (b == buf+sizeof(buf)) {
-      yyerror("action block too big");
-      goto eof;
-    }
-    if (nest) {
-      yyerror("unmatched {} in action block");
-      goto eof;
-    }
+    /* check obvious errors */
+    if (b == buf+sizeof(buf)) { yyerror("action block too big"); goto eof; }
+    if (nest) { yyerror("unmatched {} in action block"); goto eof; }
     *b = 0;
     yylval.type = STRING;
     yylval.string = newstr(buf);
   } else {
     char *b = buf;
     const struct keyword *k;
-    int inquote = 0;
-    int notkeyword;
-    /* Eat white space */
-    for (;;) {
-      /* Skip past white space */
-      while (c != EOF && isspace(c)) c = yychar();
-      /* Not a comment?  Swallow up comment line. */
-      if (c != '#') break;
-      while ((c = yychar()) != EOF && c != '\n') ;
-    }
-    /* c now points to the first character of a token. */
+    int inquote = 0, notkeyword = 0;
+    //
+    c = skipSpaces(c);
+    /* c now points to the first character of a token */
     if (c == EOF) goto eof;
-    /* While scanning the word, disqualify it for (expensive) */
-    /* keyword lookup when we can: $anything, "anything", \anything */
-    notkeyword = c == '$';
+    //printf(":'%c'\n", c);
+#if 0
+    if (!isalpha(c) && c != '$' && c != '_' && c != '"' && c != '\'') {
+      const struct keyword *kgood = NULL;
+      /* special chars are delimiters */
+      while (c != EOF) {
+        *b++ = c;
+        *b = 0;
+        for (k = keywords; k->word != NULL; ++k) if (!isalpha(k->word[0]) && strcmp(buf, k->word) == 0) break;
+        if (k->word != NULL) {
+          /* good keyword */
+          kgood = k;
+          c = yychar();
+          continue;
+        }
+        /* bad keyword */
+        break;
+      }
+      if (c != EOF) --b; /* remove last char from token buffer */
+      if (kgood != NULL) {
+        /* ok, we got it */
+        printf("![%s]\n", buf);
+        if (c != EOF) yyprev();
+        goto lexdone;
+      }
+    }
+    /* bad luck, try it another way */
+#else
+    if (c == '{' || c == '}' || c == ';') {
+      *b++ = c;
+      goto lexdone;
+    }
+#endif
+    /* while scanning the word, disqualify it for (expensive)
+     * keyword lookup when we can: $anything, "anything", \anything */
+    notkeyword = (c == '$');
     /* look for white space to delimit word */
     /* "'s get stripped but preserve white space */
     /* \ protects next character */
@@ -210,6 +242,10 @@ int yylex (void) {
         /* begin or end " */
         inquote = !inquote;
         notkeyword = 1;
+      } else if (c == '{' || c == '}' || c == ';') {
+        /* k8: allow specials to work as delimiters */
+        if (!inquote) break;
+        *b++ = c;
       } else if (c != '\\') {
         /* normal char */
         *b++ = c;
@@ -223,27 +259,22 @@ int yylex (void) {
       }
       c = yychar();
     }
-    /* Check obvious errors. */
-    if (b == buf+sizeof(buf)) {
-      yyerror("string too big");
-      goto eof;
-    }
-    if (inquote) {
-      yyerror("unmatched \" in string");
-      goto eof;
-    }
-    /* We looked ahead a character - back up. */
+    /* we looked ahead a character - back up */
     if (c != EOF) yyprev();
+lexdone:
+    /* check obvious errors */
+    if (b == buf+sizeof(buf)) { yyerror("string too big"); goto eof; }
+    if (inquote) { yyerror("unmatched \" in string"); goto eof; }
     /* scan token table */
     /* don't scan if it's obviously not a keyword or if its */
     /* an alphabetic when were looking for punctuation */
     *b = 0;
     yylval.type = ARG;
     if (!notkeyword && !(isalpha(*buf) && scanmode == SCAN_PUNCT)) {
-      for (k = keywords; k->word; k++) {
-        if (*buf == *k->word && !strcmp(k->word, buf)) {
+      for (k = keywords; k->word; ++k) {
+        if (*buf == *k->word && strcmp(k->word, buf) == 0) {
           yylval.type = k->type;
-          yylval.string = k->word;  /* used by symdump */
+          yylval.string = k->word; /* used by symdump */
           break;
         }
       }
@@ -260,23 +291,13 @@ eof:
 
 static char *symdump (YYSTYPE *s) {
   static char buf[BIGGEST_TOKEN+20];
-
+  //
   switch (s->type) {
-    case EOF:
-      sprintf(buf, "EOF");
-      break;
-    case 0:
-      sprintf(buf, "unknown symbol %s", s->string);
-      break;
-    case ARG:
-      sprintf(buf, "argument %s", s->string);
-      break;
-    case STRING:
-      sprintf(buf, "string \"%s\"", s->string);
-      break;
-    default:
-      sprintf(buf, "keyword %s", s->string);
-     break;
-  } 
+    case EOF: sprintf(buf, "EOF"); break;
+    case 0: sprintf(buf, "unknown symbol %s", s->string); break;
+    case ARG: sprintf(buf, "argument %s", s->string); break;
+    case STRING: sprintf(buf, "string \"%s\"", s->string); break;
+    default: sprintf(buf, "keyword %s", s->string); break;
+  }
   return buf;
 }
-- 
2.11.4.GIT