From 073de9a10b3b4f8041829bfefad94f05ea2c9154 Mon Sep 17 00:00:00 2001 From: ketmar Date: Fri, 17 Feb 2012 16:52:50 +0200 Subject: [PATCH] better envvar splitter --- src/jam.c | 6 ++-- src/matchglob.c | 89 ++++++++++++++++++++++++++++++++++++++------------------- src/matchglob.h | 2 ++ src/variable.c | 50 ++++++++++++++++++++++++++++++-- src/variable.h | 2 +- 5 files changed, 112 insertions(+), 37 deletions(-) diff --git a/src/jam.c b/src/jam.c index 35a7579..5b63872 100644 --- a/src/jam.c +++ b/src/jam.c @@ -266,15 +266,15 @@ int main (int argc, char **argv, char **arg_environ) { } #endif /* unix */ /* Jam defined variables OS, OSPLAT */ - var_defines(othersyms); + var_defines(othersyms, 1); /* load up environment variables */ - var_defines((const char **)use_environ); + var_defines((const char **)use_environ, 0); /* load up variables set on command line. */ for (n = 0; (s = getoptval(optv, 's', n)) != 0; ++n) { const char *symv[2]; symv[0] = s; symv[1] = 0; - var_defines(symv); + var_defines(symv, 1); } /* add JAMCMDARGS */ { diff --git a/src/matchglob.c b/src/matchglob.c index b9e1f06..852781d 100644 --- a/src/matchglob.c +++ b/src/matchglob.c @@ -25,21 +25,72 @@ * 11/04/02 (seiwald) - const-ing for string literals */ #include "jam.h" +#include "matchglob.h" #define CHECK_BIT(tab, bit) (tab[(bit)/8]&(1<<((bit)%8))) +#define SET_BIT(tab, bit) (tab[(bit)/8] |= (1<<((bit)%8))) + /* bytes used for [chars] in compiled expr */ -#define BITLISTSIZE (16) +#define BITLISTSIZE (32) + + +static inline int casechar (char ch) { + return + (ch >= 'a' && ch <= 'z') ? ch-32 : + (ch >= 'A' && ch <= 'Z') ? ch+32 : + ch; +} + + +static inline int lower (char ch) { + return (ch >= 'A' && ch <= 'Z') ? ch+32 : ch; +} -static void globchars (const char *s, const char *e, char *b); +static inline int samechars (char c0, char c1, int casesens) { + return casesens ? c0==c1 : lower(c0)==lower(c1); +} /* - * matchglob() - match a string against a simple pattern + * globchars() - build a bitlist to check for character group match */ +static void globchars (const char *s, const char *e, unsigned char *b, int casesens) { + int neg = 0, c; + // + memset(b, 0, BITLISTSIZE); + if (*s == '^') ++neg, ++s; + // + while (s < e) { + if (s+2 < e && s[1] == '-') { + for (c = (unsigned char)s[0]; c <= (unsigned char)s[2]; ++c) { + SET_BIT(b, c); + if (!casesens) SET_BIT(b, casechar(c)); + } + s += 3; + } else { + c = (unsigned char)(*s++); + SET_BIT(b, c); + if (!casesens) SET_BIT(b, casechar(c)); + } + } + if (neg) for (c = 0; c < BITLISTSIZE; ++c) b[c] ^= 0xff; + /* don't include \0 in either $[chars] or $[^chars] */ + b[0] &= 0xfe; +} + + int matchglob (const char *pat, const char *str) { - char bitlist[BITLISTSIZE]; + return matchglobex(pat, str, 1); +} + + +/* + * matchglobex() - match a string against a simple pattern + */ +int matchglobex (const char *pat, const char *str, int casesens) { + unsigned char bitlist[BITLISTSIZE]; const char *here; // for (;;) { @@ -56,7 +107,7 @@ int matchglob (const char *pat, const char *str) { do { if (!*pat++) return 1; } while (here == pat || *pat != ']') ; ++pat; /* build character class bitlist */ - globchars(here, pat, bitlist); + globchars(here, pat, bitlist, casesens); if (!CHECK_BIT(bitlist, *(unsigned char *)str)) return 1; ++str; break; @@ -68,7 +119,7 @@ int matchglob (const char *pat, const char *str) { while (str != here) { int r; /* a fast path for the last token in a pattern */ - r = *pat?matchglob(pat, str):*str?-1:0; + r = *pat?matchglobex(pat, str, casesens):*str?-1:0; if (!r) return 0; if (r < 0) return 1; --str; @@ -76,35 +127,13 @@ int matchglob (const char *pat, const char *str) { break; case '\\': /* force literal match of next char */ - if (!*pat || *str++ != *pat++) return 1; + if (!*pat || !samechars(*str++, *pat++, casesens)) return 1; break; default: - if (*str++ != pat[-1]) return 1; + if (!samechars(*str++, pat[-1], casesens)) return 1; break; } } } -/* - * globchars() - build a bitlist to check for character group match - */ -static void globchars (const char *s, const char *e, char *b) { - int neg = 0; - int c; - // - memset(b, '\0', BITLISTSIZE); - if (*s == '^') ++neg, ++s; - while (s < e) { - if (s+2 < e && s[1] == '-') { - for (c = s[0]; c <= s[2]; ++c) b[c/8] |= (1<<(c%8)); - s += 3; - } else { - c = *s++; - b[c/8] |= (1<<(c%8)); - } - } - if (neg) for (c = 0; c < BITLISTSIZE; ++c) b[c] ^= 0377; - /* don't include \0 in either $[chars] or $[^chars] */ - b[0] &= 0376; -} diff --git a/src/matchglob.h b/src/matchglob.h index c687536..0e7a1a0 100644 --- a/src/matchglob.h +++ b/src/matchglob.h @@ -18,7 +18,9 @@ #define JAMH_MATCHGLOB_H +/* returns 0 on success */ extern int matchglob (const char *pat, const char *str); +extern int matchglobex (const char *pat, const char *str, int casesens); #endif diff --git a/src/variable.c b/src/variable.c index d069bcf..ce312d2 100644 --- a/src/variable.c +++ b/src/variable.c @@ -35,6 +35,7 @@ #include "expand.h" #include "hash.h" #include "newstr.h" +#include "matchglob.h" static struct hash *varhash = NULL; @@ -75,12 +76,40 @@ static VARIABLE *var_enter (const char *symbol) { } +/* case-insensitive */ +static inline int globhit (const char *list[], const char *str) { + for (; *list; ++list) { + //fprintf(stderr, "[%s]:[%s]:%d\n", *list, str, matchglobex(*list, str, 0)); + if (matchglobex(*list, str, 0) == 0) return 1; + } + return 0; +} + + /* * var_defines() */ -void var_defines (const char **e) { +void var_defines (const char **e, int dontignore) { + static const char *pathsplits[] = { + "*PATH", + "INCLUDE", + "LIBS", + NULL + }; + static const char *varignores[] = { + "ANT_*", + "BASH*", + "LS_*", /* various options for ls */ + "PROMPT_*", + "PS[0-9]", + "WINDOWPATH", + NULL + }; + // + //fprintf(stderr, "---\n"); for (; *e; ++e) { const char *val; + //fprintf(stderr, "[%s]\n", *e); /* just say "no": windows defines this in the env, but we don't want it to override our notion of OS */ if (!strcmp(*e, "OS=Windows_NT")) continue; /* Just say "no": on Unix, variables can contain function @@ -92,23 +121,38 @@ void var_defines (const char **e) { const char *pp, *p; char split = ' '; char buf[MAXSYM]; + /* get name */ + strncpy(buf, *e, val-*e); + buf[val-*e] = '\0'; + if (!dontignore && globhit(varignores, buf)) { + /* ignore this */ + //fprintf(stderr, "IGN: [%s]\n", buf); + continue; + } /* split *PATH at :'s, not spaces */ if (val-4 >= *e) { - if (!strncmp(val-4, "PATH", 4) || !strncmp(val-4, "Path", 4) || !strncmp(val-4, "path", 4)) split = SPLITPATH; + if (globhit(pathsplits, buf)) { + //fprintf(stderr, "PATH: [%s]\n", buf); + split = + (buf[0]=='l'||buf[0]=='L')&&(buf[1]=='u'||buf[1]=='U') ? ';' /* special for LUA_XXX */ + : SPLITPATH; + } } /* do the split */ for (pp = val+1; (p = strchr(pp, split)); pp = p+1) { int len = p-pp; // if (len >= sizeof(buf)) len = sizeof(buf)-1; + if (len < 1) continue; /* ignore empty elements */ strncpy(buf, pp, len); buf[len] = '\0'; + //fprintf(stderr, " [%s]\n", buf); l = list_new(l, buf, 0); } - l = list_new(l, pp, 0); /* get name */ strncpy(buf, *e, val-*e); buf[val-*e] = '\0'; + l = list_new(l, pp, 0); var_set(buf, l, VAR_SET); } } diff --git a/src/variable.h b/src/variable.h index 8692f07..0e84774 100644 --- a/src/variable.h +++ b/src/variable.h @@ -19,7 +19,7 @@ * * if variable name ends in PATH, split value at :'s, otherwise, split at blanks */ -extern void var_defines (const char **e); +extern void var_defines (const char **e, int dontignore); /* * var_string() - expand a string with variables in it -- 2.11.4.GIT