From fcd87e3d9a04820e0bcebfa577c83f4c46287d75 Mon Sep 17 00:00:00 2001 From: skimo Date: Sun, 16 Jul 2000 15:27:45 +0000 Subject: [PATCH] CHAR_T moves to regex --- regex/cclass.h | 39 +++++++++++++------- regex/engine.c | 110 ++++++++++++++++++++++++++++---------------------------- regex/regcomp.c | 40 +++++++++++---------- regex/regex2.h | 4 +-- regex/regexec.c | 4 +-- 5 files changed, 106 insertions(+), 91 deletions(-) diff --git a/regex/cclass.h b/regex/cclass.h index 9a806962..2f5ab1c6 100644 --- a/regex/cclass.h +++ b/regex/cclass.h @@ -37,34 +37,47 @@ * @(#)cclass.h 8.2 (Berkeley) 3/16/94 */ +CHAR_T ALNUM[] = {'a','l','n','u','m',0}; +CHAR_T ALPHA[] = {'a','l','p','h','a',0}; +CHAR_T BLANK[] = {'b','l','a','n','k',0}; +CHAR_T CNTRL[] = {'c','n','t','r','l',0}; +CHAR_T DIGIT[] = {'d','i','g','i','t',0}; +CHAR_T GRAPH[] = {'g','r','a','p','h',0}; +CHAR_T LOWER[] = {'l','o','w','e','r',0}; +CHAR_T PRINT[] = {'p','r','i','n','t',0}; +CHAR_T PUNCT[] = {'p','u','n','c','t',0}; +CHAR_T SPACE[] = {'s','p','a','c','e',0}; +CHAR_T UPPER[] = {'u','p','p','e','r',0}; +CHAR_T XDIGIT[] = {'x','d','i','g','i','t',0}; + /* character-class table */ static struct cclass { - char *name; + CHAR_T *name; char *chars; char *multis; } cclasses[] = { - "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ + ALNUM, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 0123456789", "", - "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", + ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "", - "blank", " \t", "", - "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ + BLANK, " \t", "", + CNTRL, "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ \25\26\27\30\31\32\33\34\35\36\37\177", "", - "digit", "0123456789", "", - "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ + DIGIT, "0123456789", "", + GRAPH, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "", - "lower", "abcdefghijklmnopqrstuvwxyz", + LOWER, "abcdefghijklmnopqrstuvwxyz", "", - "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ + PRINT, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", "", - "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + PUNCT, "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "", - "space", "\t\n\v\f\r ", "", - "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + SPACE, "\t\n\v\f\r ", "", + UPPER, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", - "xdigit", "0123456789ABCDEFabcdef", + XDIGIT, "0123456789ABCDEFabcdef", "", NULL, 0, "" }; diff --git a/regex/engine.c b/regex/engine.c index 17f34a41..980546ae 100644 --- a/regex/engine.c +++ b/regex/engine.c @@ -72,11 +72,11 @@ struct match { struct re_guts *g; int eflags; regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ - char *offp; /* offsets work from here */ - char *beginp; /* start of string -- virtual NUL precedes */ - char *endp; /* end of string -- virtual NUL here */ - char *coldp; /* can be no match starting before here */ - char **lastpos; /* [nplus+1] */ + CHAR_T *offp; /* offsets work from here */ + CHAR_T *beginp; /* start of string -- virtual NUL precedes */ + CHAR_T *endp; /* end of string -- virtual NUL here */ + CHAR_T *coldp; /* can be no match starting before here */ + CHAR_T **lastpos; /* [nplus+1] */ STATEVARS; states st; /* current states */ states fresh; /* states for a fresh start */ @@ -90,11 +90,11 @@ extern "C" { #endif /* === engine.c === */ -static int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags)); -static char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); -static char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev)); -static char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); -static char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); +static int matcher __P((struct re_guts *g, CHAR_T *string, size_t nmatch, regmatch_t pmatch[], int eflags)); +static CHAR_T *dissect __P((struct match *m, CHAR_T *start, CHAR_T *stop, sopno startst, sopno stopst)); +static CHAR_T *backref __P((struct match *m, CHAR_T *start, CHAR_T *stop, sopno startst, sopno stopst, sopno lev)); +static CHAR_T *fast __P((struct match *m, CHAR_T *start, CHAR_T *stop, sopno startst, sopno stopst)); +static CHAR_T *slow __P((struct match *m, CHAR_T *start, CHAR_T *stop, sopno startst, sopno stopst)); static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft)); #define BOL (OUT+1) #define EOL (BOL+1) @@ -132,26 +132,26 @@ static char *pchar __P((int ch)); /* - matcher - the actual matching engine - == static int matcher(register struct re_guts *g, char *string, \ + == static int matcher(register struct re_guts *g, CHAR_T *string, \ == size_t nmatch, regmatch_t pmatch[], int eflags); */ static int /* 0 success, REG_NOMATCH failure */ matcher(g, string, nmatch, pmatch, eflags) register struct re_guts *g; -char *string; +CHAR_T *string; size_t nmatch; regmatch_t pmatch[]; int eflags; { - register char *endp; + register CHAR_T *endp; register int i; struct match mv; register struct match *m = &mv; - register char *dp; + register CHAR_T *dp; const register sopno gf = g->firststate+1; /* +1 for OEND */ const register sopno gl = g->laststate; - char *start; - char *stop; + CHAR_T *start; + CHAR_T *stop; /* simplify the situation where possible */ if (g->cflags®_NOSUB) @@ -161,7 +161,7 @@ int eflags; stop = string + pmatch[0].rm_eo; } else { start = string; - stop = start + strlen(start); + stop = start + v_strlen(start); } if (stop < start) return(REG_INVARG); @@ -170,7 +170,7 @@ int eflags; if (g->must != NULL) { for (dp = start; dp < stop; dp++) if (*dp == g->must[0] && stop - dp >= g->mlen && - memcmp(dp, g->must, (size_t)g->mlen) == 0) + MEMCMPW(dp, g->must, (size_t)g->mlen) == 0) break; if (dp == stop) /* we didn't find g->must */ return(REG_NOMATCH); @@ -229,8 +229,8 @@ int eflags; dp = dissect(m, m->coldp, endp, gf, gl); } else { if (g->nplus > 0 && m->lastpos == NULL) - m->lastpos = (char **)malloc((g->nplus+1) * - sizeof(char *)); + m->lastpos = (CHAR_T **)malloc((g->nplus+1) * + sizeof(CHAR_T *)); if (g->nplus > 0 && m->lastpos == NULL) { free(m->pmatch); STATETEARDOWN(m); @@ -298,30 +298,30 @@ int eflags; /* - dissect - figure out what matched what, no back references - == static char *dissect(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); + == static CHAR_T *dissect(register struct match *m, CHAR_T *start, \ + == CHAR_T *stop, sopno startst, sopno stopst); */ -static char * /* == stop (success) always */ +static CHAR_T * /* == stop (success) always */ dissect(m, start, stop, startst, stopst) register struct match *m; -char *start; -char *stop; +CHAR_T *start; +CHAR_T *stop; sopno startst; sopno stopst; { register int i; register sopno ss; /* start sop of current subRE */ register sopno es; /* end sop of current subRE */ - register char *sp; /* start of string matched by it */ - register char *stp; /* string matched by it cannot pass here */ - register char *rest; /* start of rest of string */ - register char *tail; /* string unmatched by rest of RE */ + register CHAR_T *sp; /* start of string matched by it */ + register CHAR_T *stp; /* string matched by it cannot pass here */ + register CHAR_T *rest; /* start of rest of string */ + register CHAR_T *tail; /* string unmatched by rest of RE */ register sopno ssub; /* start sop of subsubRE */ register sopno esub; /* end sop of subsubRE */ - register char *ssp; /* start of string matched by subsubRE */ - register char *sep; /* end of string matched by subsubRE */ - register char *oldssp; /* previous ssp */ - register char *dp; + register CHAR_T *ssp; /* start of string matched by subsubRE */ + register CHAR_T *sep; /* end of string matched by subsubRE */ + register CHAR_T *oldssp; /* previous ssp */ + register CHAR_T *dp; AT("diss", start, stop, startst, stopst); sp = start; @@ -486,25 +486,25 @@ sopno stopst; /* - backref - figure out what matched what, figuring in back references - == static char *backref(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst, sopno lev); + == static CHAR_T *backref(register struct match *m, CHAR_T *start, \ + == CHAR_T *stop, sopno startst, sopno stopst, sopno lev); */ -static char * /* == stop (success) or NULL (failure) */ +static CHAR_T * /* == stop (success) or NULL (failure) */ backref(m, start, stop, startst, stopst, lev) register struct match *m; -char *start; -char *stop; +CHAR_T *start; +CHAR_T *stop; sopno startst; sopno stopst; sopno lev; /* PLUS nesting level */ { register int i; register sopno ss; /* start sop of current subRE */ - register char *sp; /* start of string matched by it */ + register CHAR_T *sp; /* start of string matched by it */ register sopno ssub; /* start sop of subsubRE */ register sopno esub; /* end sop of subsubRE */ - register char *ssp; /* start of string matched by subsubRE */ - register char *dp; + register CHAR_T *ssp; /* start of string matched by subsubRE */ + register CHAR_T *dp; register size_t len; register int hard; register sop s; @@ -690,26 +690,26 @@ sopno lev; /* PLUS nesting level */ /* - fast - step through the string at top speed - == static char *fast(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); + == static CHAR_T *fast(register struct match *m, CHAR_T *start, \ + == CHAR_T *stop, sopno startst, sopno stopst); */ -static char * /* where tentative match ended, or NULL */ +static CHAR_T * /* where tentative match ended, or NULL */ fast(m, start, stop, startst, stopst) register struct match *m; -char *start; -char *stop; +CHAR_T *start; +CHAR_T *stop; sopno startst; sopno stopst; { register states st = m->st; register states fresh = m->fresh; register states tmp = m->tmp; - register char *p = start; + register CHAR_T *p = start; register int c = (start == m->beginp) ? OUT : *(start-1); register int lastc; /* previous c */ register int flagch; register int i; - register char *coldp; /* last p after which no match was underway */ + register CHAR_T *coldp; /* last p after which no match was underway */ CLEAR(st); SET1(st, startst); @@ -781,26 +781,26 @@ sopno stopst; /* - slow - step through the string more deliberately - == static char *slow(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); + == static CHAR_T *slow(register struct match *m, CHAR_T *start, \ + == CHAR_T *stop, sopno startst, sopno stopst); */ -static char * /* where it ended */ +static CHAR_T * /* where it ended */ slow(m, start, stop, startst, stopst) register struct match *m; -char *start; -char *stop; +CHAR_T *start; +CHAR_T *stop; sopno startst; sopno stopst; { register states st = m->st; register states empty = m->empty; register states tmp = m->tmp; - register char *p = start; + register CHAR_T *p = start; register int c = (start == m->beginp) ? OUT : *(start-1); register int lastc; /* previous c */ register int flagch; register int i; - register char *matchp; /* last p at which a match ended */ + register CHAR_T *matchp; /* last p at which a match ended */ AT("slow", start, stop, startst, stopst); CLEAR(st); diff --git a/regex/regcomp.c b/regex/regcomp.c index 75425f5e..cf163fe8 100644 --- a/regex/regcomp.c +++ b/regex/regcomp.c @@ -60,8 +60,8 @@ static char sccsid[] = "@(#)regcomp.c 8.4 (Berkeley) 3/19/94"; * other clumsinesses */ struct parse { - char *next; /* next character in RE */ - char *end; /* end of string (-> NUL normally) */ + CHAR_T *next; /* next character in RE */ + CHAR_T *end; /* end of string (-> NUL normally) */ int error; /* has an error been seen? */ sop *strip; /* malloced strip */ sopno ssize; /* malloced strip size (allocated) */ @@ -125,7 +125,7 @@ static sopno pluscount __P((struct parse *p, struct re_guts *g)); #endif /* ========= end header generated by ./mkh ========= */ -static char nuls[10]; /* place to point scanner in event of error */ +static CHAR_T nuls[10]; /* place to point scanner in event of error */ /* * macros for use with parse structure @@ -165,7 +165,7 @@ static int never = 0; /* for use in asserts; shuts lint up */ /* - regcomp - interface for parser and compilation - = extern int regcomp(regex_t *, const char *, int); + = extern int regcomp(regex_t *, const CHAR_T *, int); = #define REG_BASIC 0000 = #define REG_EXTENDED 0001 = #define REG_ICASE 0002 @@ -201,7 +201,7 @@ int cflags; return(REG_INVARG); len = preg->re_endp - pattern; } else - len = strlen((char *)pattern); + len = v_strlen(pattern); /* do the mallocs early so failure handling is easy */ g = (struct re_guts *)malloc(sizeof(struct re_guts) + @@ -218,7 +218,7 @@ int cflags; /* set things up */ p->g = g; - p->next = (char *)pattern; /* convenience; we do not modify it */ + p->next = (CHAR_T *)pattern; /* convenience; we do not modify it */ p->end = p->next + len; p->error = 0; p->ncsalloc = 0; @@ -676,14 +676,16 @@ register struct parse *p; register char c; register cset *cs = allocset(p); register int invert = 0; + static CHAR_T bow[] = { '[', ':', '<', ':', ']', ']' }; + static CHAR_T eow[] = { '[', ':', '>', ':', ']', ']' }; /* Dept of Truly Sickening Special-Case Kludges */ - if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { + if (p->next + 5 < p->end && MEMCMPW(p->next, bow, 6) == 0) { EMIT(OBOW, 0); NEXTn(6); return; } - if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { + if (p->next + 5 < p->end && MEMCMPW(p->next, eow, 6) == 0) { EMIT(OEOW, 0); NEXTn(6); return; @@ -815,7 +817,7 @@ p_b_cclass(p, cs) register struct parse *p; register cset *cs; { - register char *sp = p->next; + register CHAR_T *sp = p->next; register struct cclass *cp; register size_t len; register char *u; @@ -825,7 +827,7 @@ register cset *cs; NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) - if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') + if (v_strlen(cp->name) == len && MEMCMPW(cp->name, sp, len)) break; if (cp->name == NULL) { /* oops, didn't find it */ @@ -886,7 +888,7 @@ p_b_coll_elem(p, endc) register struct parse *p; int endc; /* name ended by endc,']' */ { - register char *sp = p->next; + register CHAR_T *sp = p->next; register struct cname *cp; register int len; register char c; @@ -899,7 +901,7 @@ int endc; /* name ended by endc,']' */ } len = p->next - sp; for (cp = cnames; cp->name != NULL; cp++) - if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') + if (v_strlen(cp->name) == len && MEMCMPW(cp->name, sp, len)) return(cp->code); /* known name */ if (len == 1) return(*sp); /* single character */ @@ -935,9 +937,9 @@ bothcases(p, ch) register struct parse *p; int ch; { - register char *oldnext = p->next; - register char *oldend = p->end; - char bracket[3]; + register CHAR_T *oldnext = p->next; + register CHAR_T *oldend = p->end; + CHAR_T bracket[3]; assert(othercase(ch) != ch); /* p_bracket() would recurse */ p->next = bracket; @@ -981,9 +983,9 @@ static void nonnewline(p) register struct parse *p; { - register char *oldnext = p->next; - register char *oldend = p->end; - char bracket[4]; + register CHAR_T *oldnext = p->next; + register CHAR_T *oldend = p->end; + CHAR_T bracket[4]; p->next = bracket; p->end = bracket+3; @@ -1594,7 +1596,7 @@ register struct re_guts *g; register sop *newstart; register sopno newlen; register sop s; - register char *cp; + register CHAR_T *cp; register sopno i; /* avoid making error situations worse */ diff --git a/regex/regex2.h b/regex/regex2.h index 4736641f..0b8706f6 100644 --- a/regex/regex2.h +++ b/regex/regex2.h @@ -134,7 +134,7 @@ typedef struct { #define MCin(p, cs, cp) mcin(p, cs, cp) /* stuff for character categories */ -typedef unsigned char cat_t; +typedef CHAR_T cat_t; /* * main compiled-expression structure @@ -159,7 +159,7 @@ struct re_guts { int neol; /* number of $ used */ int ncategories; /* how many character categories */ cat_t *categories; /* ->catspace[-CHAR_MIN] */ - char *must; /* match must contain this string */ + CHAR_T *must; /* match must contain this string */ int mlen; /* length of must */ size_t nsub; /* copy of re_nsub */ int backrefs; /* does it use back references? */ diff --git a/regex/regexec.c b/regex/regexec.c index 8d183b6b..24c83992 100644 --- a/regex/regexec.c +++ b/regex/regexec.c @@ -175,7 +175,7 @@ int eflags; eflags = GOODFLAGS(eflags); if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) - return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); + return(smatcher(g, (CHAR_T *)string, nmatch, pmatch, eflags)); else - return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); + return(lmatcher(g, (CHAR_T *)string, nmatch, pmatch, eflags)); } -- 2.11.4.GIT