added "jammod" command and "genman" module
[k8jam.git] / src / rexp.c
blobc5279c4650a6907ffb127b0e8acf60b483d4995c
1 /* coded by Ketmar // Vampire Avalon (psyc://ketmar.no-ip.org/~Ketmar)
2 * Understanding is not required. Only obedience.
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 #include "jam.h"
18 #include "hash.h"
19 #include "newstr.h"
22 typedef struct recache_item_s {
23 regexp_t re; /* this MUST be here! */
24 struct recache_item_s *next; /* for 'same string, different flags' */
25 } recache_item_t;
28 static struct hash *recache = NULL;
31 /* return 0 if not found (created new ci) */
32 static int find_re (regexp_t **ci, const char *str, int flags) {
33 int r = 1;
34 recache_item_t fnd, *res = &fnd;
35 if (recache == NULL) recache = hashinit(sizeof(recache_item_t), "recache");
36 fnd.re.restr = newstr(str);
37 if (hashenter(recache, (HASHDATA **)&res)) {
38 /* want new one */
39 res->re.flags = flags;
40 res->re.maxmem = 0;
41 res->next = NULL;
42 r = 0;
43 //fprintf(stderr, "NEW RE: '%s'\n", str);
44 } else {
45 /* hit; check if we have regexp with this set of flags */
46 recache_item_t *c;
47 for (c = res; c != NULL; c = c->next) if (c->re.flags == flags) break;
48 if (c == NULL) {
49 /* not found; create new item */
50 recache_item_t *last;
51 for (last = res; last->next != NULL; last = last->next) ;
52 if ((c = malloc(sizeof(*c))) == NULL) { printf("FATAL: out of memory!\n"); exit(EXITBAD); }
53 c->re.restr = res->re.restr;
54 c->re.flags = res->re.flags;
55 c->re.maxmem = 0;
56 c->next = NULL;
57 last->next = c;
58 res = c;
59 r = 0;
60 //fprintf(stderr, "NEW RE(1): '%s'\n", str);
61 } else {
62 res = c;
63 //fprintf(stderr, "RE HIT: '%s'\n", str);
66 *ci = &res->re;
67 return r;
72 * regexp options:
73 * i: ignore case
74 * u: this is utf-8 string
75 * m: '.' matches newline
76 * default mode: non-utf-8 (it can be only reset with /.../u)
78 regexp_t *regexp_compile (const char *str, int flags) {
79 regexp_t *cre;
80 const char *s = str, *e = NULL, *errmsg;
81 flags |= RE9_FLAG_NONUTF8;
82 if (str == NULL) str = "";
83 if (str[0] == '/' && (e = strrchr(str+1, '/')) != NULL) {
84 /* this must be regexp with options */
85 for (const char *t = e+1; *t; ++t) {
86 switch (*t) {
87 case 'i': flags |= RE9_FLAG_CASEINSENS; break;
88 case 'u': flags &= ~RE9_FLAG_NONUTF8; break;
89 case 'm': flags |= RE9_FLAG_ANYDOT; break;
90 default:
91 printf("FATAL: invalid regexp option: '%c'!\n", *t);
92 exit(EXITBAD); /* oops */
95 ++str;
97 if (find_re(&cre, s, flags) == 0) {
98 if ((cre->re = re9_compile_ex(str, e, flags, &errmsg)) == NULL) {
99 printf("FATAL: regexp error: '%s'!\n", errmsg);
100 exit(EXITBAD); /* oops */
102 flags &= ~RE9_FLAG_ANYDOT; /* don't need that */
103 /* RE9_FLAG_CASEINSENS left only for caller checks; re9_execute() will ignore it */
104 cre->flags = flags;
106 return cre;
110 void regexp_free (regexp_t *re) {
111 /* do nothing, yeah! */
115 int regexp_execute (regexp_t *re, const char *bol, re9_sub_t *mp, int ms) {
116 if (re != NULL) {
117 #ifdef REGEXP9_DEBUG_MEMSIZE
118 int res = re9_execute(re->re, re->flags, bol, mp, ms);
119 if (re9_memused > re->maxmem) re->maxmem = re9_memused;
120 return res;
121 #else
122 return re9_execute(re->re, re->flags, bol, mp, ms);
123 #endif
125 return -1;
129 void regexp_done (void) {
130 #ifdef REGEXP9_DEBUG_MEMSIZE
131 int count = 0;
132 regexp_t *rarray;
133 hashiterate(recache, ({ int lmb (const void *hdata, void *udata) { ++count; return 0; } lmb; }), NULL);
134 rarray = malloc(sizeof(rarray[0])*count);
135 count = 0;
136 hashiterate(recache, ({ int lmb (const void *hdata, void *udata) { rarray[count++] = *((regexp_t *)hdata); return 0; } lmb; }), NULL);
137 printf("regexps, sorted by used memory:\n");
138 qsort(rarray, count, sizeof(rarray[0]), ({
139 int cmp (const void *p0, const void *p1) {
140 regexp_t *r0 = (regexp_t *)p0;
141 regexp_t *r1 = (regexp_t *)p1;
142 if (r0->maxmem != r1->maxmem) return r1->maxmem-r0->maxmem;
143 if (strlen(r0->restr) != strlen(r1->restr)) return strlen(r0->restr)-strlen(r1->restr);
144 return strcmp(r0->restr, r1->restr);
146 cmp;
147 }));
148 for (int f = 0; f < count; ++f) printf("%10d: /%s/\n", rarray[f].maxmem, rarray[f].restr);
149 free(rarray);
150 #endif