jamgram: cosmetix
[k8jam.git] / src / rexp.c
blob4eeb4279b4fafc5022dbec426c2361f37ee197b1
1 #include "jam.h"
2 #include "hash.h"
3 #include "newstr.h"
6 typedef struct recache_item_s {
7 regexp_t re; /* this MUST be here! */
8 struct recache_item_s *next; /* for 'same string, different flags' */
9 } recache_item_t;
12 static struct hash *recache = NULL;
15 /* return 0 if not found (created new ci) */
16 static int find_re (regexp_t **ci, const char *str, int flags) {
17 int r = 1;
18 recache_item_t fnd, *res = &fnd;
19 if (recache == NULL) recache = hashinit(sizeof(recache_item_t), "recache");
20 fnd.re.restr = newstr(str);
21 if (hashenter(recache, (HASHDATA **)&res)) {
22 /* want new one */
23 res->re.flags = flags;
24 res->re.maxmem = 0;
25 res->next = NULL;
26 r = 0;
27 //fprintf(stderr, "NEW RE: '%s'\n", str);
28 } else {
29 /* hit; check if we have regexp with this set of flags */
30 recache_item_t *c;
31 for (c = res; c != NULL; c = c->next) if (c->re.flags == flags) break;
32 if (c == NULL) {
33 /* not found; create new item */
34 recache_item_t *last;
35 for (last = res; last->next != NULL; last = last->next) ;
36 if ((c = malloc(sizeof(*c))) == NULL) { printf("FATAL: out of memory!\n"); exit(EXITBAD); }
37 c->re.restr = res->re.restr;
38 c->re.flags = res->re.flags;
39 c->re.maxmem = 0;
40 c->next = NULL;
41 last->next = c;
42 res = c;
43 r = 0;
44 //fprintf(stderr, "NEW RE(1): '%s'\n", str);
45 } else {
46 res = c;
47 //fprintf(stderr, "RE HIT: '%s'\n", str);
50 *ci = &res->re;
51 return r;
56 * regexp options:
57 * i: ignore case
58 * u: this is utf-8 string
59 * m: '.' matches newline
60 * default mode: non-utf-8 (it can be only reset with /.../u)
62 regexp_t *regexp_compile (const char *str, int flags) {
63 regexp_t *cre;
64 const char *s = str, *e = NULL, *errmsg;
65 flags |= RE9_FLAG_NONUTF8;
66 if (str == NULL) str = "";
67 if (str[0] == '/' && (e = strrchr(str+1, '/')) != NULL) {
68 /* this must be regexp with options */
69 for (const char *t = e+1; *t; ++t) {
70 switch (*t) {
71 case 'i': flags |= RE9_FLAG_CASEINSENS; break;
72 case 'u': flags &= ~RE9_FLAG_NONUTF8; break;
73 case 'm': flags |= RE9_FLAG_ANYDOT; break;
74 default:
75 printf("FATAL: invalid regexp option: '%c'!\n", *t);
76 exit(EXITBAD); /* oops */
79 ++str;
81 if (find_re(&cre, s, flags) == 0) {
82 if ((cre->re = re9_compile_ex(str, e, flags, &errmsg)) == NULL) {
83 printf("FATAL: regexp error: '%s'!\n", errmsg);
84 exit(EXITBAD); /* oops */
86 flags &= ~RE9_FLAG_ANYDOT; /* don't need that */
87 /* RE9_FLAG_CASEINSENS left only for caller checks; re9_execute() will ignore it */
88 cre->flags = flags;
90 return cre;
94 void regexp_free (regexp_t *re) {
95 /* do nothing, yeah! */
99 int regexp_execute (regexp_t *re, const char *bol, re9_sub_t *mp, int ms) {
100 if (re != NULL) {
101 #ifdef REGEXP9_DEBUG_MEMSIZE
102 int res = re9_execute(re->re, re->flags, bol, mp, ms);
103 if (re9_memused > re->maxmem) re->maxmem = re9_memused;
104 return res;
105 #else
106 return re9_execute(re->re, re->flags, bol, mp, ms);
107 #endif
109 return -1;
113 void regexp_done (void) {
114 #ifdef REGEXP9_DEBUG_MEMSIZE
115 int count = 0;
116 regexp_t *rarray;
117 hashiterate(recache, ({ int lmb (const void *hdata, void *udata) { ++count; return 0; } lmb; }), NULL);
118 rarray = malloc(sizeof(rarray[0])*count);
119 count = 0;
120 hashiterate(recache, ({ int lmb (const void *hdata, void *udata) { rarray[count++] = *((regexp_t *)hdata); return 0; } lmb; }), NULL);
121 printf("regexps, sorted by used memory:\n");
122 qsort(rarray, count, sizeof(rarray[0]), ({
123 int cmp (const void *p0, const void *p1) {
124 regexp_t *r0 = (regexp_t *)p0;
125 regexp_t *r1 = (regexp_t *)p1;
126 if (r0->maxmem != r1->maxmem) return r1->maxmem-r0->maxmem;
127 if (strlen(r0->restr) != strlen(r1->restr)) return strlen(r0->restr)-strlen(r1->restr);
128 return strcmp(r0->restr, r1->restr);
130 cmp;
131 }));
132 for (int f = 0; f < count; ++f) printf("%10d: /%s/\n", rarray[f].maxmem, rarray[f].restr);
133 free(rarray);
134 #endif