From 6483b61fb5f9548539912bf267c2fd01a25ba905 Mon Sep 17 00:00:00 2001 From: ketmar Date: Thu, 18 Aug 2011 15:25:15 +0300 Subject: [PATCH] lexer slightly relaxed: '{', '}' and ';' now delimiters --- src/mkjambase.c | 6 ++ src/scan.c | 187 +++++++++++++++++++++++++++++++------------------------- 2 files changed, 110 insertions(+), 83 deletions(-) diff --git a/src/mkjambase.c b/src/mkjambase.c index 8114186..cefb1c0 100644 --- a/src/mkjambase.c +++ b/src/mkjambase.c @@ -25,6 +25,8 @@ #include #include +#define COMPACT + static void fatal (const char *msg) { fprintf(stdout, "FATAL: %s\n", msg); @@ -72,10 +74,14 @@ int main (int argc, char **argv, char **envp) { // while (fgets(buf, sizeof(buf), fin)) { if (doDotC) { +#ifdef COMPACT if (!strncmp(buf, "#DONT_TOUCH", 11)) { dontStrip = !dontStrip; continue; } +#else + dontStrip = 1; +#endif char *p = buf; /* strip leading whitespace */ if (!dontStrip) { diff --git a/src/scan.c b/src/scan.c index f52ea36..ca9d3dd 100644 --- a/src/scan.c +++ b/src/scan.c @@ -34,6 +34,7 @@ const struct keyword { {0,0} }; + struct include { struct include *next; /* next serial include file */ const char *string; /* pointer into current line */ @@ -50,7 +51,8 @@ static int scanmode = SCAN_NORMAL; static int anyerrors = 0; static char *symdump (YYSTYPE *s); -# define BIGGEST_TOKEN 10240 /* no single token can be larger */ +/* no single token can be larger */ +#define BIGGEST_TOKEN (10240) /* @@ -75,7 +77,7 @@ int yyanyerrors (void) { void yyfparse (const char *s) { struct include *i = (struct include *)malloc(sizeof(*i)); - /* Push this onto the incp chain. */ + /* push this onto the incp chain */ i->string = ""; i->strings = 0; i->file = 0; @@ -83,7 +85,7 @@ void yyfparse (const char *s) { i->line = 0; i->next = incp; incp = i; - /* If the filename is "+", it means use the internal jambase. */ + /* if the filename is "+", it means use the internal jambase */ if (!strcmp(s, "+")) i->strings = jambase; } @@ -91,44 +93,41 @@ void yyfparse (const char *s) { /* * yyline() - read new line and return first character * - * Fabricates a continuous stream of characters across include files, - * returning EOF at the bitter end. + * fabricates a continuous stream of characters across include files, returning EOF at the bitter end */ int yyline (void) { struct include *i = incp; - + // if (!incp) return EOF; - /* Once we start reading from the input stream, we reset the */ - /* include insertion point so that the next include file becomes */ - /* the head of the list. */ - - /* If there is more data in this line, return it. */ + /* once we start reading from the input stream, we reset the + * include insertion point so that the next include file becomes + * the head of the list */ + /* if there is more data in this line, return it */ if (*i->string) return *i->string++; - /* If we're reading from an internal string list, go to the */ - /* next string. */ + /* if we're reading from an internal string list, go to the next string */ if (i->strings) { - if(!*i->strings) goto next; - i->line++; + if (!*i->strings) goto next; + ++i->line; i->string = *(i->strings++); return *i->string++; } - /* If necessary, open the file */ + /* if necessary, open the file */ if (!i->file) { FILE *f = stdin; if (strcmp(i->fname, "-") && !(f = fopen(i->fname, "r"))) perror(i->fname); i->file = f; } - /* If there's another line in this file, start it. */ + /* if there's another line in this file, start it */ if (i->file && fgets(i->buf, sizeof(i->buf), i->file)) { - i->line++; + ++i->line; i->string = i->buf; return *i->string++; } next: - /* This include is done. */ - /* Free it up and return EOF so yyparse() returns to parse_file(). */ + /* this include is done */ + /* free it up and return EOF so yyparse() returns to parse_file() */ incp = i->next; - /* Close file, free name */ + /* close file, free name */ if (i->file && i->file != stdin) fclose(i->file); freestr(i->fname); free((char *)i); @@ -147,61 +146,94 @@ next: * yychar() returns a continuous stream of characters, until it hits * the EOF of the current include file. */ +#define yychar() (*incp->string ? *incp->string++ : yyline()) +#define yyprev() (incp->string--) + + +/* eat white space */ +static int skipSpaces (int c) { + for (;;) { + /* skip past white space */ + while (c != EOF && isspace(c)) c = yychar(); + /* not a comment? swallow up comment line */ + if (c != '#') break; + while ((c = yychar()) != EOF && c != '\n') ; + } + return c; +} -# define yychar() (*incp->string ? *incp->string++ : yyline()) -# define yyprev() (incp->string--) int yylex (void) { int c; char buf[BIGGEST_TOKEN]; char *b = buf; - + // if (!incp) goto eof; - /* Get first character (whitespace or of token) */ + /* get first character (whitespace or of token) */ c = yychar(); if (scanmode == SCAN_STRING) { - /* If scanning for a string (action's {}'s), look for the */ - /* closing brace. We handle matching braces, if they match! */ + /* if scanning for a string (action's {}'s), look for the closing brace */ + /* we handle matching braces, if they match! */ int nest = 1; - + // while (c != EOF && b < buf+sizeof(buf)) { - if (c == '{') nest++; + if (c == '{') ++nest; if (c == '}' && !--nest) break; *b++ = c; c = yychar(); } - /* We ate the ending brace -- regurgitate it. */ + /* we ate the ending brace -- regurgitate it */ if (c != EOF) yyprev(); - /* Check obvious errors. */ - if (b == buf+sizeof(buf)) { - yyerror("action block too big"); - goto eof; - } - if (nest) { - yyerror("unmatched {} in action block"); - goto eof; - } + /* check obvious errors */ + if (b == buf+sizeof(buf)) { yyerror("action block too big"); goto eof; } + if (nest) { yyerror("unmatched {} in action block"); goto eof; } *b = 0; yylval.type = STRING; yylval.string = newstr(buf); } else { char *b = buf; const struct keyword *k; - int inquote = 0; - int notkeyword; - /* Eat white space */ - for (;;) { - /* Skip past white space */ - while (c != EOF && isspace(c)) c = yychar(); - /* Not a comment? Swallow up comment line. */ - if (c != '#') break; - while ((c = yychar()) != EOF && c != '\n') ; - } - /* c now points to the first character of a token. */ + int inquote = 0, notkeyword = 0; + // + c = skipSpaces(c); + /* c now points to the first character of a token */ if (c == EOF) goto eof; - /* While scanning the word, disqualify it for (expensive) */ - /* keyword lookup when we can: $anything, "anything", \anything */ - notkeyword = c == '$'; + //printf(":'%c'\n", c); +#if 0 + if (!isalpha(c) && c != '$' && c != '_' && c != '"' && c != '\'') { + const struct keyword *kgood = NULL; + /* special chars are delimiters */ + while (c != EOF) { + *b++ = c; + *b = 0; + for (k = keywords; k->word != NULL; ++k) if (!isalpha(k->word[0]) && strcmp(buf, k->word) == 0) break; + if (k->word != NULL) { + /* good keyword */ + kgood = k; + c = yychar(); + continue; + } + /* bad keyword */ + break; + } + if (c != EOF) --b; /* remove last char from token buffer */ + if (kgood != NULL) { + /* ok, we got it */ + printf("![%s]\n", buf); + if (c != EOF) yyprev(); + goto lexdone; + } + } + /* bad luck, try it another way */ +#else + if (c == '{' || c == '}' || c == ';') { + *b++ = c; + goto lexdone; + } +#endif + /* while scanning the word, disqualify it for (expensive) + * keyword lookup when we can: $anything, "anything", \anything */ + notkeyword = (c == '$'); /* look for white space to delimit word */ /* "'s get stripped but preserve white space */ /* \ protects next character */ @@ -210,6 +242,10 @@ int yylex (void) { /* begin or end " */ inquote = !inquote; notkeyword = 1; + } else if (c == '{' || c == '}' || c == ';') { + /* k8: allow specials to work as delimiters */ + if (!inquote) break; + *b++ = c; } else if (c != '\\') { /* normal char */ *b++ = c; @@ -223,27 +259,22 @@ int yylex (void) { } c = yychar(); } - /* Check obvious errors. */ - if (b == buf+sizeof(buf)) { - yyerror("string too big"); - goto eof; - } - if (inquote) { - yyerror("unmatched \" in string"); - goto eof; - } - /* We looked ahead a character - back up. */ + /* we looked ahead a character - back up */ if (c != EOF) yyprev(); +lexdone: + /* check obvious errors */ + if (b == buf+sizeof(buf)) { yyerror("string too big"); goto eof; } + if (inquote) { yyerror("unmatched \" in string"); goto eof; } /* scan token table */ /* don't scan if it's obviously not a keyword or if its */ /* an alphabetic when were looking for punctuation */ *b = 0; yylval.type = ARG; if (!notkeyword && !(isalpha(*buf) && scanmode == SCAN_PUNCT)) { - for (k = keywords; k->word; k++) { - if (*buf == *k->word && !strcmp(k->word, buf)) { + for (k = keywords; k->word; ++k) { + if (*buf == *k->word && strcmp(k->word, buf) == 0) { yylval.type = k->type; - yylval.string = k->word; /* used by symdump */ + yylval.string = k->word; /* used by symdump */ break; } } @@ -260,23 +291,13 @@ eof: static char *symdump (YYSTYPE *s) { static char buf[BIGGEST_TOKEN+20]; - + // switch (s->type) { - case EOF: - sprintf(buf, "EOF"); - break; - case 0: - sprintf(buf, "unknown symbol %s", s->string); - break; - case ARG: - sprintf(buf, "argument %s", s->string); - break; - case STRING: - sprintf(buf, "string \"%s\"", s->string); - break; - default: - sprintf(buf, "keyword %s", s->string); - break; - } + case EOF: sprintf(buf, "EOF"); break; + case 0: sprintf(buf, "unknown symbol %s", s->string); break; + case ARG: sprintf(buf, "argument %s", s->string); break; + case STRING: sprintf(buf, "string \"%s\"", s->string); break; + default: sprintf(buf, "keyword %s", s->string); break; + } return buf; } -- 2.11.4.GIT