1 /******************************************************************************
2 Regular Expression compiler
4 some kind of state machine is definitelly used. A parser too!
5 ******************************************************************************/
10 static bool regexp_case
;
11 static int regexp_partition
= 255;
12 static bool regexp_packed
;
13 static int regexp_INFTY
= 32000;
14 static bool regexp_noextract
;
15 static bool regexp_noctbl
= 1;
16 static char* regexp_recipe
;
17 static bool regexp_anon
;
18 static bool regexp_strfuncs
= 0;
22 void regexp_print (int *retons
)
26 for (i
= 0; retons
[i
]; i
++)
28 PRINTF (" [%i] ", retons
[i
]);
29 else if (retons
[i
] > 31)
30 PRINTF (" %c ", retons
[i
]);
32 PRINTF (" -%i ", retons
[i
]);
38 void rerror (char *s
) {
39 fprintf (stderr
, "regexp error: %s\n", s
);
40 fprintf (stderr
, "regexp error: In \"%s\"\n", regexp_recipe
);
46 RE_SPECIAL_SSTART
= 256, // ^
48 RE_SPECIAL_OPARENTH
, // (
49 RE_SPECIAL_CPARENTH
, // )
50 RE_SPECIAL_OBRAK
, // [
51 RE_SPECIAL_ONBRAK
, // [^
52 RE_SPECIAL_RANGE
, // -
53 RE_SPECIAL_CBRAK
, // ]
55 RE_SPECIAL_QUESTION
, // ?
59 RE_SPECIAL_OBRACE
, // {
60 RE_SPECIAL_CBRACE
, // }
61 RE_SPECIAL_GRANTED
, // ?/
63 RE_SPECIAL_UCLASS
, // \c
65 RE_SPECIAL_STORP
= 512,
66 RE_SPECIAL_CLASS
= 2048
69 static inline int isstorp (int i
)
70 { return i
>= RE_SPECIAL_STORP
&& i
< RE_SPECIAL_CLASS
; }
71 static inline int isuclass (int i
)
72 { return i
>= RE_SPECIAL_UCLASS
&& i
< RE_SPECIAL_STORP
; }
79 } uclass
[OFFS ('z')] = {
80 [OFFS ('w')] = { "[a-zA-Z0-9_]", 0 },
81 [OFFS ('s')] = { "[ \t\r\n\f]", 0 },
82 [OFFS ('d')] = { "[0-9]", 0 }
89 static int escape (char *r
, int i
, regstr R
)
91 #define EPARSE(x, y) break; case x: *R = y; return i;
92 #define ESCAPE(x) EPARSE (x, x)
96 char *endptr
= endptr
;
97 if (r
[i
] == 'x' && isxdigit (r
[i
+1]))
98 *R
= strtol (r
+ i
+ 1, &endptr
, 16);
99 else if (r
[i
] == '0')
100 *R
= strtol (r
+ i
, &endptr
, 8);
101 else if (r
[i
] >= '1' && r
[i
] <= '9')
102 rerror ("Backreferences *not* supported");
105 *R
= RE_SPECIAL_UCLASS
+ r
[i
];
106 else rerror ("undefined escape sequence");
109 return endptr
- r
- 1;
111 EPARSE ('n', '\n'); EPARSE ('t', '\t'); EPARSE ('f', '\f');
112 EPARSE ('r', '\r'); EPARSE ('a', '\a');
113 ESCAPE ('('); ESCAPE (')'); ESCAPE (']'); ESCAPE ('[');
114 ESCAPE ('{'); ESCAPE ('}'); ESCAPE ('.'); ESCAPE ('?');
115 ESCAPE ('*'); ESCAPE ('+'); ESCAPE ('-'); ESCAPE ('^');
116 ESCAPE ('$'); ESCAPE ('|'); ESCAPE ('\\');
122 static void regexp_descape (char *r
, regstr R
)
124 #define XPARSE(x,y) break; case x: R [j++] = y
126 for (np
= i
= j
= 0; r
[i
]; i
++)
128 default: R
[j
++] = r
[i
];
129 XPARSE ('^', RE_SPECIAL_SSTART
);
130 XPARSE ('$', RE_SPECIAL_SEND
);
131 XPARSE ('.', RE_SPECIAL_DOT
);
132 XPARSE ('?', RE_SPECIAL_QUESTION
);
133 XPARSE ('*', RE_SPECIAL_STAR
);
134 XPARSE ('+', RE_SPECIAL_PLUS
);
135 XPARSE ('|', RE_SPECIAL_OR
);
136 ncase
')': R
[j
++] = np
? (np
--, RE_SPECIAL_CPARENTH
) : ')';
137 XPARSE ('(', RE_SPECIAL_OPARENTH
);
139 b
= regexp_noextract
;
140 if (r
[i
+ 1] == '?')
141 switch (r
[i
+= 2]) {
142 default: rerror ("embedded modifier not supported");
143 ncase
'/': R
[j
++] = RE_SPECIAL_GRANTED
;
146 if (!b
) R
[j
++] = RE_SPECIAL_STORP
+ nextr
++;
148 if (r
[++i
] == '^') {
149 R
[j
++] = RE_SPECIAL_ONBRAK
;
151 } else R
[j
++] = RE_SPECIAL_OBRAK
;
154 for (; r
[i
]; i
++) switch (r
[i
]) {
155 default: R
[j
++] = r
[i
];
156 ncase
']': R
[j
++] = RE_SPECIAL_CBRAK
; goto close1
;
157 case '-': R
[j
++] = RE_SPECIAL_RANGE
;
158 ncase
'\\': i
= escape (r
, i
+ 1, &R
[j
++]);
161 if (R
[j
- 2] == RE_SPECIAL_RANGE
)
164 char *endptr
= endptr
;
165 R
[j
++] = RE_SPECIAL_OBRACE
;
167 if (r
[i
] < '0' || r
[i
] > '9')
168 rerror ("no number after '{'");
169 R
[j
++] = strtol (r
+ i
, &endptr
, 10);
170 if (r
[i
= endptr
- r
] == ',') {
173 if (r
[i
] >= '0' && r
[i
] <= '9') {
174 R
[j
++] = strtol (r
+ i
, &endptr
, 10);
178 if (r
[i
] != '}') rerror ("Unclosed '{'");
179 R
[j
++] = RE_SPECIAL_CBRACE
;
181 ncase
'\\': i
= escape (r
, i
+ 1, &R
[j
++]);
184 if (np
) rerror ("unclosed parenthesis");
187 typedef char *char_class
;
189 static char_class
*CClass
;
192 static int add_class (char_class C
)
195 for (i
= 0; i
< nclasses
; i
++)
196 if (!strcmp (CClass
[i
], C
))
197 return i
+ RE_SPECIAL_CLASS
;
198 CClass
[nclasses
] = strdup (C
);
199 return RE_SPECIAL_CLASS
+ nclasses
++;
202 static inline void OR_classes (char_class C1
, char_class C2
)
205 for (i
= 0; i
< 258; i
++)
206 if (C1
[i
] == '0' && C2
[i
] == '1')
210 static int mk_class (regstr
, int*);
212 static int mk_uclass (int v
)
214 int cb
= tolower (v
- RE_SPECIAL_UCLASS
);
215 int iv
= cb
!= v
- RE_SPECIAL_UCLASS
, i
;
218 if (!uclass
[OFFS (cb
)].def
)
219 rerror ("undefined user class");
221 if (uclass
[OFFS (cb
)].op
)
222 rerror ("recusrive character class");
223 uclass
[OFFS (cb
)].op
= 1;
224 regexp_descape (uclass
[OFFS (cb
)].def
, R
);
226 v
-= RE_SPECIAL_CLASS
;
228 for (i
= 1; i
< 258; i
++)
229 CClass
[v
][i
] = CClass
[v
][i
] == '1' ? '0' : '1';
230 uclass
[OFFS (cb
)].op
= 0;
231 return v
+ RE_SPECIAL_CLASS
;
234 static int mk_class (regstr R
, int *p
)
236 int i
= 0, j1
, j2
, c
;
237 int not = R
[i
++] == RE_SPECIAL_ONBRAK
;
238 char base_class
[258];
240 if (!not && R
[i
+ 1] == RE_SPECIAL_CBRAK
&& R
[i
] < 256) {
245 memset (base_class
, '0', 258);
246 base_class
[258] = 0;
248 while (R
[i
] != RE_SPECIAL_CBRAK
)
250 if (R
[i
+ 1] != RE_SPECIAL_RANGE
) {
251 base_class
[R
[i
++]] = '1';
254 if (R
[i
+ 2] >= 256) rerror ("range to special");
255 if (R
[i
+ 2] < R
[i
])
256 j1
= R
[i
+ 2], j2
= R
[i
];
257 else j1
= R
[i
], j2
= R
[i
+ 2];
259 base_class
[j1
++] = '1';
261 } else if (isuclass (R
[i
])) {
262 j1
= mk_uclass (R
[i
++]) - RE_SPECIAL_CLASS
;
263 for (c
= 0; c
< 256; c
++)
264 if (CClass
[j1
][c
] == '1')
265 base_class
[c
] = '1';
266 } else rerror ("was not expecting that in here");
269 for (j1
= 1; j1
< 256; j1
++)
270 base_class
[j1
] = base_class
[j1
] == '0' ? '1' : '0';
272 *p
= add_class (base_class
);
276 static void build_ctbl ();
277 static void compile_classes (regstr R
)
280 char_class tmp
[256];
283 for (i
= 0; R
[i
]; i
++)
287 for (i
= j
= 0; R2
[i
]; i
++)
288 if (R2
[i
] == RE_SPECIAL_OBRAK
|| R2
[i
] == RE_SPECIAL_ONBRAK
)
289 i
+= mk_class (&R2
[i
], &R
[j
++]);
290 else if (isuclass (R2
[i
]))
291 R
[j
++] = mk_uclass (R2
[i
]);
292 else R
[j
++] = R2
[i
];
298 static unsigned int *ctbl
;
300 static void build_ctbl ()
305 if (nclasses
> 31) rerror ("not supported > 32 character classes");
307 for (i
= 0; i
< 258; i
++)
310 for (i
= 0; i
< nclasses
; i
++)
311 for (C
= CClass
[i
], v
= 1 << i
, j
= 0; j
< 257; j
++)
316 static int count1s (int b
)
320 for (i
= 1, s
= 0; i
< 256; i
++)
321 if (ctbl
[i
] & b
) ++s
;
324 //. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
325 static inline int re_skip_parenthesis (regstr R
, int i
)
331 case RE_SPECIAL_OPARENTH
: ++p
;
332 ncase RE_SPECIAL_CPARENTH
: --p
;
333 ncase
0: rerror ("Unclosed parenthesis");
338 static int find_or (regstr R
, int o
[])
342 for (i
= n
= 0; R
[i
]; i
++)
343 if (R
[i
] == RE_SPECIAL_OR
)
345 else if (R
[i
] == RE_SPECIAL_OPARENTH
)
346 i
= re_skip_parenthesis (R
, i
+ 1) - 1;
352 static int find_and (regstr R
, int o
[])
359 i
= R
[i
] == RE_SPECIAL_OPARENTH
? re_skip_parenthesis (R
, i
+ 1) : i
+ 1;
360 if (R
[i
] != RE_SPECIAL_STAR
&& R
[i
] != RE_SPECIAL_PLUS
361 && R
[i
] != RE_SPECIAL_QUESTION
&& R
[i
] != RE_SPECIAL_OBRACE
)
363 if (R
[i
] == RE_SPECIAL_OBRACE
)
364 while (R
[i
] != RE_SPECIAL_CBRACE
) i
++;
366 if (R
[i
] == RE_SPECIAL_QUESTION
) i
++;
373 // * * * * * * * * * * A.n.a.l.y.s.i.s * * * * * * * * * * * * *
374 // generate and optimize the ``subregular'' array which
375 // is more convenient. We can do analysis and put all
376 // the info for the code generator in the sr_pool[]
378 enum { SB_NIL
, SB_TERM
, SB_OR
, SB_AND
, SB_REPG
, SB_REPN
};
383 subReg
*expr
, expr1
, referrer
, this;
395 int save_fail
, use_fail
;
397 // for the code generator
398 int proton
, save_success
, gst
, usemin
;
401 static subregular
**sr_pool
;
405 static void free_subregs ()
408 for (i
= 0; i
< nsr_pool
; i
++) {
409 if (sr_pool
[i
]->swtbl
) free (sr_pool
[i
]->swtbl
);
410 if (sr_pool
[i
]->strn
) free (sr_pool
[i
]->strn
);
415 static void set_repet (subregular
*m
, regstr R
)
419 case RE_SPECIAL_STAR
: m
->min
= 0; m
->max
= regexp_INFTY
;
420 ncase RE_SPECIAL_PLUS
: m
->min
= 1; m
->max
= regexp_INFTY
;
421 ncase RE_SPECIAL_QUESTION
: m
->min
= 0; m
->max
= 1;
422 ncase RE_SPECIAL_OBRACE
: m
->min
= R
[j
++];
424 if (R
[j
+ 1] != RE_SPECIAL_CBRACE
) {
428 m
->max
= regexp_INFTY
;
436 m
->type
= R
[j
] == RE_SPECIAL_QUESTION
? SB_REPN
: SB_REPG
;
439 static subregular
*mallocsubr (int type
, int store_at
, int from
, int to
, subReg referrer
)
441 subregular
*m
= (subregular
*) malloc (sizeof *m
);
444 m
->store_at
= store_at
;
448 m
->referrer
= referrer
;
452 m
->save_fail
= m
->use_fail
= 0;
458 return sr_pool
[m
->this = nsr_pool
++] = m
;
461 static subReg
make_subregulars (regstr R
, subReg referrer
)
463 int n
, ors
[128], tmp
[256], i
, j
, k
, orst
[64];
467 return mallocsubr (SB_NIL
, -1, 0, 0, referrer
)->this;
469 if (n
= find_or (R
, ors
)) {
471 from
= regexp_INFTY
, to
= 0;
472 for (i
= 0; i
< n
; i
++) {
473 for (j
= 0, k
= i
? ors
[i
-1]+1 : 0; k
< ors
[i
];)
476 j
= orst
[i
] = make_subregulars (tmp
, -1);
477 if (sr_pool
[j
]->from
< from
) from
= sr_pool
[j
]->from
;
478 if (sr_pool
[j
]->to
> to
) to
= sr_pool
[j
]->to
;
481 m
= mallocsubr (SB_OR
, -1, from
, to
, referrer
);
482 for (i
= 0; i
< n
; i
++)
483 sr_pool
[orst
[i
]]->referrer
= m
->this;
485 m
->expr
= intdup (orst
);
489 n
= find_and (R
, ors
);
493 for (from
= to
= i
= 0; i
< n
; i
++) {
494 for (j
= 0, k
= ors
[i
]; k
< ors
[i
+1];)
497 j
= orst
[i
] = make_subregulars (tmp
, -1);
498 from
+= sr_pool
[j
]->from
;
499 to
+= sr_pool
[j
]->to
;
502 m
= mallocsubr (SB_AND
, -1, from
, min (regexp_INFTY
, to
), referrer
);
503 for (i
= 0; i
< n
; i
++)
504 sr_pool
[orst
[i
]]->referrer
= m
->this;
506 m
->expr
= intdup (orst
);
510 if (R
[0] == RE_SPECIAL_OPARENTH
) {
511 int have_stor
= isstorp (R
[1]);
512 int skip1
= have_stor
|| R
[1] == RE_SPECIAL_GRANTED
;
513 int have_repet
= R
[j
= re_skip_parenthesis (R
, 1)] != 0;
515 for (i
= 1 + skip1
, k
= 0; i
< j
- 1;)
518 k
= make_subregulars (tmp
, -1);
519 if (have_stor
) sr_pool
[k
]->store_at
= R
[1] - RE_SPECIAL_STORP
;
520 sr_pool
[k
]->granted
= skip1
&& !have_stor
;
522 sr_pool
[k
]->referrer
= referrer
;
526 m
= mallocsubr (0, -1, 0, 0, referrer
);
528 set_repet (m
, &R
[j
]);
529 m
->from
= m
->min
* sr_pool
[k
]->from
;
530 m
->to
= min (regexp_INFTY
, m
->max
* sr_pool
[k
]->to
);
531 sr_pool
[k
]->referrer
= m
->this;
535 m
= mallocsubr (SB_TERM
, -1, 1, 1, referrer
);
540 mr
= mallocsubr (0, -1, 0, 0, referrer
);
541 m
->referrer
= mr
->this;
544 set_repet (mr
, &R
[1]);
545 mr
->from
= mr
->min
; mr
->to
= mr
->max
;
550 static void set_r_granted (subregular
*m
)
556 case SB_REPN
: set_r_granted (sr_pool
[m
->expr1
]);
559 for (i
= 0; i
< m
->n
; i
++)
560 set_r_granted (sr_pool
[m
->expr
[i
]]);
564 static void set_granted ()
569 for (i
= 2; i
< nsr_pool
; i
++)
570 if ((m
= sr_pool
[i
])->granted
) {
571 if (m
->from
!= m
->to
)
572 rerror ("Granted strings must be fixed size!");
578 // debugging. print the sr_pool[] through the stages
580 static void print_subregularz ()
585 PRINTF ("root=%i\n", root
);
586 for (i
= 0; i
< nsr_pool
; i
++) {
588 PRINTF ("%s%i(%i)[%i-%i]: ", m
->store_at
>= 0 ? COLS
"S"COLE
: "S", i
, m
->this, m
->from
, m
->to
);
591 case SB_NIL
: PRINTF (COLS
"-"COLE
"NIL-\n");
592 ncase
-1: PRINTF (COLS
"-"COLE
"1 (off)\n");
593 ncase SB_TERM
: if (!m
->strn
) {
596 if (c
>= ' ') PRINTF ("%c\n", c
);
597 else PRINTF (" ctrl[%i]\n", c
);
598 else if (c
== RE_SPECIAL_DOT
) PRINTF (COLS
"."COLE
"\n");
599 else if (c
== RE_SPECIAL_SSTART
) PRINTF (COLS
"^"COLE
"\n");
600 else if (c
== RE_SPECIAL_SEND
) PRINTF (COLS
"$"COLE
"\n");
601 else PRINTF ("class[%i]\n", c
- RE_SPECIAL_CLASS
);
603 PRINTF ("strncmp (\"%s\")\n", m
->strn
);
607 for (j
= 0; j
< n
; j
++)
608 PRINTF ("S%i %s", m
->expr
[j
], j
== n
-1 ? "" : COLS
"| "COLE
);
609 PRINTF (m
->swtbl
? COLS
" *switched*"COLE
"\n" : "\n");
612 for (j
= 0; j
< n
; j
++)
613 PRINTF ("S%i %s", m
->expr
[j
], j
== n
-1 ? "\n" : COLS
"+ "COLE
);
616 PRINTF ("S%i "COLS
"^"COLE
" (%i %s %i) ", m
->expr1
, m
->min
,
617 m
->type
== SB_REPN
? "->" : "<-", m
->max
);
618 if (m
->use_fail
) PRINTF (COLS
"*use fail%i* "COLE
, m
->use_fail
);
619 if (m
->fsckd
) PRINTF (COLS
"*fsckd*"COLE
"\n");
620 else if (m
->backtrack
>= 0)
621 PRINTF (COLS
"*backtrack %i!*"COLE
"\n", m
->backtrack
);
622 else PRINTF (COLS
"*unroll*"COLE
"\n");
625 PRINTF (" this=%i rstate ---> S%i\n", m
->this, m
->rstate
);
630 // silly optimizations
633 /*********************************************************
634 [1] FSCKD repetitioners - unroll recursion
636 Imagine this bad case /(fo|foo)*s/ against the string
637 "foofos". We can't unroll the recusrion: the fact that
638 it matches "fo" does not mean we can advance with a
639 positive match: "foo" can be matched if we don't advance
640 /(foo|fo)*s/ is OK on the other hand.
642 Anyway, if the repetitioner applies on fixed size we
643 are definitelly OK. If it is not fixed size and the
644 subexpression contains ORs or NON-GREEDY repetitioners,
645 suppose FSCKD. We could do futher analysis, but let's
646 suppose that repetitioners on non-fixed size *are* rare.
648 /(foo??)*s/ is FSCKD.
651 it has to do with subsets...
652 *********************************************************/
653 static int has_ors_or_temperate (subregular
*m
)
657 case SB_TERM
: return 0;
659 case SB_OR
: return 1;
660 case SB_REPG
: return has_ors_or_temperate (sr_pool
[m
->expr1
]);
664 for (i
= 0; i
< m
->n
; i
++)
665 if (has_ors_or_temperate (sr_pool
[m
->expr
[i
]]))
671 static void optimize_fsckd_repetitioners ()
676 for (i
= 2; i
< nsr_pool
; i
++) {
678 if (m
->type
!= SB_REPG
&& m
->type
!= SB_REPN
)
681 m2
= sr_pool
[m
->expr1
];
682 if (m2
->from
== m2
->to
)
684 m
->fsckd
= has_ors_or_temperate (m2
);
685 if (m
->type
== SB_REPN
686 && sr_pool
[m
->expr1
]->to
!= sr_pool
[m
->expr1
]->from
)
690 /*********************************************************
691 [2] NO BACKTRACK on greedy repetitioners
693 This is a very important optimization. Suppose regexp
694 /a*@/ and the string "aaaaaaab". Once the search for
695 'a's stops at 'b', there's no need to backtrack hoping
696 for a less greedy match: the fact that so far we matched
697 [a] means that there's no way we can match [@]. The opt
698 is important because such constructs are very frequent.
700 Another interesting case is this: /\w+\d\d\d@/
701 where we do not backtrack more than 3. This is checked
702 only if the base of the repetitioner is one character and
703 as long as it is followed by fixed subexpressions.
704 Otherwise it gets very complicated.
706 In the case C1* C2 C3 C4, where C4 and C1 have no common
707 characters, we also check C2-C4 and C3-C4. Normally
708 we backtrack no more than 2. If C2 and C4 have no common
709 and C3 and C4 have no common we backtrack no more than 0!
710 *********************************************************/
711 static subReg
next_subreg_norep (subReg s
)
717 s
= sr_pool
[p
= s
]->referrer
;
718 if (s
== -1) return -1;
720 if (m
->type
== SB_REPG
|| m
->type
== SB_REPN
)
722 if (m
->type
== SB_OR
) continue;
723 if (m
->type
!= SB_AND
) rerror ("bugs");
724 for (i
= 0; m
->expr
[i
] != p
; i
++);
725 if (m
->expr
[i
+ 1] != -1)
726 return m
->expr
[i
+ 1];
730 static void set_class_from_term (char_class C
, int c
)
734 if (!regexp_case
&& c
< 127 && isalpha (c
))
735 C
[toupper (c
)] = C
[tolower (c
)] = '1';
737 else if (c
== RE_SPECIAL_DOT
)
738 for (i
= 1; i
< 256; i
++)
741 c
= 1 << (c
- RE_SPECIAL_CLASS
);
742 for (i
= 0; i
< 258; i
++)
748 static void get_first_class (subReg s
, char_class C
)
751 memset (C
, '1', 258);
755 subregular
*m
= sr_pool
[s
];
759 case SB_REPN
: return get_first_class (m
->expr1
, C
);
760 case SB_AND
: return get_first_class (m
->expr
[0], C
);
761 case SB_NIL
: for (i
= 0; i
< 258; i
++) C
[i
] = '1';
762 ncase SB_TERM
: set_class_from_term (C
, m
->expr1
);
764 for (i
= 0; i
< m
->n
; i
++)
765 get_first_class (m
->expr
[i
], C
);
769 static int no_common (char_class C1
, char_class C2
)
772 for (i
= 0; i
< 258; i
++)
773 if (C2
[i
] == '1' && C1
[i
] == '1')
778 static int test_repetitioner1 (subregular
*m
)
780 char C1
[259], C2
[259];
781 memset (C1
, '0', 258); C1
[258] = 0;
782 memset (C2
, '0', 258); C2
[258] = 0;
784 get_first_class (m
->expr1
, C1
);
785 get_first_class (next_subreg_norep (m
->this), C2
);
786 return no_common (C1
, C2
);
789 static void get_nth_class_from_fixed (subregular
*m
, char_class C
, int n
)
794 j
= sr_pool
[m
->expr
[i
= 0]]->to
;
796 j
+= sr_pool
[m
->expr
[++i
]]->to
;
797 j
-= sr_pool
[m
->expr
[i
]]->to
;
798 get_nth_class_from_fixed (sr_pool
[m
->expr
[i
]], C
, n
- j
);
800 for (i
= 0; i
< m
->n
; i
++)
801 get_nth_class_from_fixed (sr_pool
[m
->expr
[i
]], C
, n
);
803 set_class_from_term (C
, m
->expr1
);
804 ncase SB_NIL
: for (i
= 0; i
< 258; i
++) C
[i
] = '1';
808 static void test_repetitioner2 (subregular
*m
)
811 subReg
this = m
->this, s
= m
->referrer
;
816 if (m
->type
!= SB_AND
) return;
817 for (i
= 0; m
->expr
[i
] != this; i
++);
818 for (j
= 0, k
= ++i
; i
< m
->n
; i
++) {
819 m2
= sr_pool
[m
->expr
[i
]];
820 if (m2
->from
!= m2
->to
) break;
825 char C1
[259], C2
[259];
826 memset (C1
, '0', 258); C1
[258] = 0;
827 get_first_class (sr_pool
[this]->expr1
, C1
);
829 for (j
= 0, i
= p
= k
; i
< m
->n
; i
++) {
830 m2
= sr_pool
[m
->expr
[i
]];
831 for (k
= 0; k
< m2
->to
; k
++, j
++) {
832 memset (C2
, '0', 258); C2
[258] = 0;
833 get_nth_class_from_fixed (m2
, C2
, k
);
834 if (no_common (C1
, C2
))
840 //PRINTF ("Backtrack distance = %i\n", j);
842 for (i2
= p
; i2
<= i
; i2
++) {
843 m2
= sr_pool
[m
->expr
[i2
]];
844 mto
= (i2
== i
) ? k
: m2
->to
- 1;
845 for (k2
= 0; k2
<= mto
; k2
++) {
846 memset (C1
, '0', 258); C1
[258] = 0;
847 get_nth_class_from_fixed (m2
, C1
, k2
);
848 if (no_common (C1
, C2
)) --j
;
853 //PRINTF ("Reduced to %i...\n", j);
854 sr_pool
[this]->backtrack
= j
;
857 static void optimize_backtracking_repetitioners ()
862 for (i
= 2; i
< nsr_pool
; i
++) {
864 if (m
->type
!= SB_REPG
)
867 if (test_repetitioner1 (m
))
869 else if (sr_pool
[m
->expr1
]->type
== SB_TERM
)
870 test_repetitioner2 (m
);
873 /*********************************************************
876 If a non-greedy repetitioner is followed by another
877 repetitioner, drop complexity in failure. For example
878 /.*?\w*?@/ and the text "abcdef1234,". Once the second
879 repetitioner (\w*?) stops at ',', the first one will
880 jump at it the next time: there is no need to go through
881 "bcdef1234" because the second repetitioner will always
882 halt at the same point.
884 This sounds rare but we make all regular expressions
885 fixed at the start by adding /^.*?/ in front of them.
886 So it is quite common and very useful so we won't shift
887 more than we have to. The optimization takes place
888 only when the repetitioner is on a character class and as
889 long as it is followed by character classes, up to the
890 first repetitioner. For example /^.*?abcd\w+/
891 will do it. It is interesting if the second repetitioner
892 has a big 'max' and possibly many match characters:
893 if it will walk far. An unfortunate case is this
894 /.*?ab?c\w+/ the (b?) repetitioner is too small for
895 us to activate the optimization and yet stops us
896 from proceeding up to the (\w+) which is interesting.
898 We can do more analysis and even pass it to OR's in
899 the future. For example: /.*?(bat|bee|fly)\w+/ ....
900 *********************************************************/
901 static void make_fixed ()
903 // make the regular expression fixed to start if
904 // possible by adding /^.*?/ in front of it.
905 // we can't do that if '^' already exists.
906 // a pathological case is (^a|bc)
908 memset (C
, '0', 258); C
[258] = 0;
909 get_first_class (root
, C
);
910 if (C
[RE_SPECIAL_SSTART
] == '1')
913 subregular
*m
= sr_pool
[root
];
914 subregular
*st
= mallocsubr (SB_TERM
, -1, 1, 1, 00);
915 subregular
*dt
= mallocsubr (SB_TERM
, -1, 1, 1, 00);
916 subregular
*ng
= mallocsubr (SB_REPN
, -1, 0, regexp_INFTY
, 00);
917 subregular
*nr
= mallocsubr (SB_AND
, -1, m
->from
, m
->to
, -1);
919 st
->expr1
= RE_SPECIAL_SSTART
;
920 st
->referrer
= nr
->this;
921 dt
->expr1
= RE_SPECIAL_DOT
;
922 dt
->referrer
= ng
->this;
923 ng
->expr1
= dt
->this;
924 ng
->referrer
= nr
->this;
927 ng
->max
= regexp_INFTY
;
928 m
->referrer
= nr
->this;
930 subReg tmp
[] = { st
->this, ng
->this, m
->this, -1 };
931 nr
->expr
= intdup (tmp
);
935 static subReg
descend_to_repterm (subReg s
)
937 subregular
*m
= sr_pool
[s
];
941 case SB_NIL
: return -1;
942 case SB_AND
: return descend_to_repterm (m
->expr
[0]);
943 default: return sr_pool
[m
->expr1
]->type
== SB_TERM
?
944 // do if repetitioner max > 4 walk far
945 m
->max
>= 4 ? m
->this : -1 : -1;
949 static int get_term_sequence (subregular
*m
, int st
, subReg seq
[])
953 for (i
= st
; i
< m
->n
; i
++) {
954 r
= sr_pool
[m
->expr
[i
]];
956 default: if (sr_pool
[r
->expr1
]->type
== SB_TERM
)
959 case SB_NIL
: seq
[n
] = -1; return 0;
961 case SB_TERM
: seq
[n
++] = r
->this;
962 ncase SB_AND
: if (!get_term_sequence (r
, 0, seq
+ n
))
964 while (seq
[n
] != -1) n
++;
971 static void distant_jmp (subregular
*m
)
975 subregular
*a
= sr_pool
[m
->referrer
];
976 if (a
->type
!= SB_AND
) return;
977 for (j
= 0; a
->expr
[j
] != m
->this; j
++);
978 get_term_sequence (a
, j
+ 1, sq
);
980 for (i
= 0; sq
[i
] != -1; i
++)
981 if (in2 (sr_pool
[sq
[i
]]->type
, SB_REPG
, SB_REPN
))
983 if (sq
[i
] == -1) return;
984 /* if too small / compared to how far, abort */
985 if (sr_pool
[sq
[i
]]->max
- i
< 4) return;
986 for (j
= 0; j
< i
+ 1; j
++)
987 sr_pool
[sq
[j
]]->save_fail
= sq
[i
];
988 m
->use_fail
= sq
[i
];
991 static void nongreedy2 (subregular
*m
, subReg s
)
994 char C1
[259], C2
[259];
995 memset (C1
, '0', 258); C1
[258] = 0;
996 memset (C2
, '0', 258); C2
[258] = 0;
998 get_first_class (m
->expr1
, C1
);
999 get_first_class (sr_pool
[s
]->expr1
, C2
);
1000 for (i
= c
= 0; i
< 258; i
++)
1006 sr_pool
[s
]->save_fail
= s
;
1009 static void optimize_nongreedy_jump ()
1016 for (i
= 2; i
< nsr_pool
; i
++) {
1018 if (m
->type
!= SB_REPN
|| m
->fsckd
)
1020 if (sr_pool
[m
->expr1
]->type
!= SB_TERM
)
1022 s
= next_subreg_norep (m
->this);
1023 if (s
== -1 || (s
= descend_to_repterm (s
)) == -1)
1025 else nongreedy2 (m
, s
);
1028 /*********************************************************
1031 Convert repetitioners with non-zero minimum times to
1032 repetitioners with zero minimum by expanding *min times
1033 the subexpression. For example /(f+)/ becomes /(ff*)/.
1035 We'll only do this for '+' and if we have 0 backtrack
1036 from optimization [2]. This is excellent because we don't
1038 *********************************************************/
1039 static subregular
*clone_copy (subregular
*m
)
1041 subregular
*n
= (subregular
*) malloc (sizeof *n
);
1043 return sr_pool
[n
->this = nsr_pool
++] = n
;
1046 static subReg
clone_subReg (subReg s
, subReg referrer
)
1049 subregular
*m
= sr_pool
[s
];
1050 subregular
*n
= clone_copy (m
);
1052 n
->referrer
= referrer
;
1055 case SB_REPG
: n
->expr1
= clone_subReg (m
->expr1
, n
->this);
1057 case SB_OR
: for (i
= 0; i
< n
->n
; i
++)
1058 n
->expr
[i
] = clone_subReg (n
->expr
[i
], n
->this);
1063 static void optimize_zeromin ()
1065 int i
, all
= nsr_pool
;
1068 for (i
= 2; i
< all
; i
++) {
1069 if (!in2 ((m
= sr_pool
[i
])->type
, SB_REPG
, SB_REPN
))
1071 if (m
->min
== 1 && m
->max
== regexp_INFTY
) {
1072 subregular
*r
= clone_copy (m
);
1073 r
->from
= r
->min
= 0;
1075 r
->referrer
= m
->this;
1076 subReg tmp
[] = { clone_subReg (r
->expr1
, m
->this), r
->this, -1 };
1079 m
->expr
= intdup (tmp
);
1081 sr_pool
[tmp
[0]]->save_fail
= r
->save_fail
;
1085 /*********************************************************
1088 For OR expressions with more than 3 operands use a
1089 switch in C. For example /(Mon|Tue|Web|Thu|Fri|Sat|Sun)/
1090 will greatly benefit.
1092 TODO: non-fixed with no repetitioners is OK /cat|bird|penguin/
1093 *********************************************************/
1094 #define SWITCH_THRESHOLD 3
1096 static void rmv_first_class (subReg s
)
1099 subregular
*m
= sr_pool
[s
];
1103 if (sr_pool
[m
->expr
[0]]->type
== SB_TERM
) {
1104 sr_pool
[m
->expr
[0]]->type
= -1;
1105 for (i
= 0; i
< m
->n
; i
++) {
1106 m
->expr
[i
] = m
->expr
[i
+ 1];
1109 } else rmv_first_class (m
->expr
[0]);
1112 ncase SB_TERM
: m
->type
= SB_NIL
; m
->from
= m
->to
= 0;
1114 for (i
= 0; i
< m
->n
; i
++)
1115 rmv_first_class (m
->expr
[i
]);
1120 static void optimize_switch (subregular
*m
)
1124 long long SW
[258], v
;
1126 if (m
->n
> 64) return; // not impl
1128 memset (SW
, 0, sizeof SW
);
1129 for (i
= 0; i
< m
->n
; i
++) {
1130 memset (C
, '0', 258); C
[258] = 0;
1131 get_first_class (m
->expr
[i
], C
);
1132 rmv_first_class (m
->expr
[i
]);
1133 for (v
= 1 << i
, j
= 0; j
< 258; j
++)
1137 memcpy (m
->swtbl
= (long long*) malloc (sizeof SW
), SW
, sizeof SW
);
1140 static void optimize_switches ()
1145 for (i
= 2; i
< nsr_pool
; i
++) {
1147 if (m
->type
!= SB_OR
|| m
->n
<= SWITCH_THRESHOLD
|| m
->from
!= m
->to
)
1149 for (j
= 0; j
< m
->n
; j
++)
1150 if (!sr_pool
[m
->expr
[j
]]->to
)
1152 optimize_switch (m
);
1156 /*********************************************************
1159 For the case our regular expression starts with the ^.*?
1160 and the minimum length of the string in order to have
1161 a positive match *is* accountable (non neligible), we
1162 set a threshold to avoid studying strings which are
1163 too small to produce a match anyway.
1164 *********************************************************/
1167 static Token R_minlen
;
1168 static void optimize_minlen ()
1171 subregular
*m
= sr_pool
[root
], *n
;
1173 if (regexp_packed
) return;
1174 if (m
->type
!= SB_AND
) return;
1175 n
= sr_pool
[m
->expr
[0]];
1176 if (n
->type
!= SB_TERM
|| n
->expr1
!= RE_SPECIAL_SSTART
) return;
1177 n
= sr_pool
[m
->expr
[1]];
1178 if (n
->type
!= SB_REPN
) return;
1179 n
= sr_pool
[n
->expr1
];
1180 if (n
->type
!= SB_TERM
) return;
1181 for (j
= 0, i
= 2; i
< m
->n
; i
++)
1182 j
+= sr_pool
[m
->expr
[i
]]->from
;
1185 sr_pool
[m
->expr
[1]]->usemin
= 1;
1188 /*********************************************************
1189 [7] use strncmp/strncasecmp
1191 For long fixed strings, the code gets smaller and
1192 this is good for cache.
1193 *********************************************************/
1194 static int isplain (subReg s
, int i
)
1196 subregular
*m
= sr_pool
[sr_pool
[s
]->expr
[i
]];
1197 return m
->type
== SB_TERM
&& m
->expr1
< 255 && !m
->granted
;
1200 static void dostrfunc (subReg s
, int f
, int t
)
1203 subregular
*m
= sr_pool
[s
], *m2
;
1206 for (i
= 0; f
+ i
<= t
; i
++)
1207 tmp
[i
] = sr_pool
[m
->expr
[f
+ i
]]->expr1
;
1209 m2
= sr_pool
[m
->expr
[f
]];
1210 m2
->strn
= strdup (tmp
);
1211 m2
->from
= m2
->to
= strlen (tmp
);
1212 for (i
= 1; t
+ i
<= m
->n
; i
++)
1213 m
->expr
[f
+ i
] = m
->expr
[t
+ i
];
1217 #define DOSTR_ABOVE 2
1219 static void optimize_strfuncs ()
1224 for (i
= 2; i
< nsr_pool
; i
++)
1225 if ((m
= sr_pool
[i
])->type
== SB_AND
)
1226 for (j
= 0; j
< m
->n
;) {
1227 while (j
< m
->n
&& !isplain (i
, j
)) j
++;
1230 for (k
= j
; k
< m
->n
&& isplain (i
, k
); k
++);
1231 if (k
- j
> DOSTR_ABOVE
)
1232 dostrfunc (i
, j
, k
- 1);
1236 /*++++++++++++++++++++++++++++++++++++++++++++++++++
1237 ++++++++++++++++++++++++++++++++++++++++++++++++++*/
1238 /*********************************************************
1239 Do all optimizations in the correct order
1240 *********************************************************/
1241 void print_subregularz ();
1242 static void optimize_regexp ()
1246 if (debugflag.REGEXP_DEBUG) {\
1247 print_subregularz ();\
1254 // must be first. we don't do the others on fsckd
1256 STATUS ("\nOptimize: set fsckd repetitioners:\n")
1257 optimize_fsckd_repetitioners ();
1260 // most important optimization
1262 STATUS ("\nOptimize. Backtracks:\n")
1263 optimize_backtracking_repetitioners ();
1266 // make it fixed ^ if possible. root changes
1268 STATUS ("\nOptimize. Non-greedy jumps:\n")
1269 optimize_nongreedy_jump ();
1273 // must be after above. others are not prepared
1274 // for an SB_OR subregular with a swtbl
1276 STATUS ("\nOptimize. Switch:\n")
1277 optimize_switches ();
1280 // put it here to avoid zeromin on
1281 // subexpressions implemented with switch ()
1283 STATUS ("\nOptimize. zeromin:\n")
1284 optimize_zeromin ();
1287 // use strncmp() and strncasecmp() for fixed sequences
1288 if (regexp_strfuncs
) {
1289 STATUS ("\nOptimize strfuncs():\n");
1290 optimize_strfuncs ();
1298 /*********************************************************
1299 finalize all the info in the sr_pool[]
1300 *********************************************************/
1305 // disable extractors inside repetitioners
1307 static subReg
*e_off
;
1310 static void disable_extractors (subregular
*m
)
1313 if (m
->store_at
>= 0) {
1314 e_off
[n_e_off
++] = m
->store_at
;
1319 case SB_REPN
: disable_extractors (sr_pool
[m
->expr1
]);
1322 for (i
= 0; i
< m
->n
; i
++)
1323 disable_extractors (sr_pool
[m
->expr
[i
]]);
1327 static void disable_repet_extracts ()
1331 e_off
= (subReg
*) alloca (nsr_pool
* sizeof *e_off
);
1333 for (i
= 0; i
< nsr_pool
; i
++)
1334 if (in2 (sr_pool
[i
]->type
, SB_REPG
, SB_REPN
))
1335 disable_extractors (sr_pool
[sr_pool
[i
]->expr1
]);
1336 if (!n_e_off
) return;
1337 for (i
= 0; i
< nsr_pool
; i
++)
1338 if ((l
= sr_pool
[i
]->store_at
) >= 0) {
1339 for (j
= k
= 0; j
< n_e_off
; j
++)
1340 if (l
> e_off
[j
]) ++k
;
1341 sr_pool
[i
]->store_at
-= k
;
1347 // possible badly set matchers: (cat)|(dog)|(baboon)
1354 } extract_point
[64];
1356 static bool ambiguous_extracts
;
1358 static void ambig_store (subregular
*m
)
1360 if (m
->store_at
>= 0)
1361 ambiguous_extracts
= extract_point
[m
->store_at
].ambig
= 1;
1362 if (in2 (m
->type
, SB_OR
, SB_AND
)) {
1364 for (i
= 0; i
< m
->n
; i
++)
1365 ambig_store (sr_pool
[m
->expr
[i
]]);
1369 static void check_match_in_or ()
1373 ambiguous_extracts
= 0;
1374 for (i
= 0; i
< nsr_pool
; i
++)
1375 if ((m
= sr_pool
[i
])->type
== SB_OR
)
1376 for (j
= 0; j
< m
->n
; j
++)
1377 ambig_store (sr_pool
[m
->expr
[j
]]);
1380 // connect return states and mark extractor points
1383 static void rstate_connect (subReg s
, subReg rstate
)
1386 subregular
*m
= sr_pool
[s
], *m1
;
1387 if (m
->store_at
>= 0) {
1388 int fixedlen
= m
->from
== m
->to
? m
->to
: -1;
1389 extract_point
[m
->store_at
].ss
= m
->this;
1390 extract_point
[m
->store_at
].ambig
= 0;
1391 m
->save_success
= 1;
1392 if ((extract_point
[m
->store_at
].fixedlen
= fixedlen
) == -1) {
1393 if (!rstate
) rstate
= 1;
1394 extract_point
[m
->store_at
].se
= rstate
;
1395 sr_pool
[rstate
]->save_success
= 1;
1403 for (i
= 0; i
< m
->n
- 1; i
++)
1404 rstate_connect (m
->expr
[i
], m
->expr
[i
+ 1]);
1405 rstate_connect (m
->expr
[i
], rstate
);
1406 m
->rstate
= m
->expr
[0];
1408 for (i
= 0; i
< m
->n
; i
++)
1409 rstate_connect (m
->expr
[i
], rstate
);
1412 m1
= sr_pool
[m
->expr1
];
1414 rstate_connect (m
->expr1
, m
->store_at
>= 0 ? s
+ OBV
: s
);
1416 rerror ("can match infinite times the empty string anywhere!");
1417 } else rstate_connect (m
->expr1
, m
->store_at
>= 0 || m1
->from
!= m1
->to
);
1421 static bool fixed_start
;
1422 static void do_fixed_start ()
1424 subregular
*m1
= sr_pool
[root
], *m2
;
1426 if (m1
->type
== SB_AND
) {
1427 m2
= sr_pool
[m1
->expr
[0]];
1428 if (m2
->type
== SB_TERM
&& m2
->expr1
== RE_SPECIAL_SSTART
) {
1430 intcpy (m1
->expr
, m1
->expr
+ 1);
1438 // do everything we need for the code generator
1440 static void prepare_regexp (char *re
)
1448 // escape and set RE_SPECIAL values
1449 regexp_descape (re
, r
);
1451 // compile classes and install class numbers
1452 compile_classes (r
);
1455 mallocsubr (SB_NIL
, -1, 0, 0, -1);
1456 mallocsubr (SB_NIL
, -1, 0, 0, -1);
1457 root
= make_subregulars (r
, -1);
1460 // optimize sr_pool []
1463 // connect rstates for the code generator
1465 disable_repet_extracts ();
1466 rstate_connect (root
, 0);
1467 check_match_in_or ();
1469 if (debugflag
.REGEXP_DEBUG
) {
1470 PRINTF ("\nrstates:\n");
1471 print_subregularz ();
1475 //######################################################################
1477 //######################################################################
1480 // everything we need is in the subregulars sr_pool[]
1481 //######################################################################
1482 static Token
catRE (char*);
1483 static Token
state_var(char*,int),state_func(int),new_value_char(int),new_value_hex(int), new_value_eint(int);
1484 static Token R_ctbl
, R_name
;
1485 static Token RGType
= RESERVED_char
;
1486 #define BRING(x) Token x = RESERVED_ ## x;
1487 #define XCODE(...) { Token xcd [] = { __VA_ARGS__, -1 }; outprintf (REGEXP, ISTR (xcd), -1); }
1488 #define IF(...) if (__VA_ARGS__) {
1489 #define ELIF(...) } else if (__VA_ARGS__) {
1490 #define ELSE } else {
1493 #define PROTO(x) R (static), R (inline), R (int), state_func (x), '(', R (const), \
1494 R (unsigned), RGType, '*', a, ')'
1495 #define RETURN0 R (return), R (0)
1496 #define RETURN1 R (return), R (1)
1497 #define JMPVAR(n) state_var ("j", n)
1498 #define STORVAR(n) state_var ("s", n)
1499 #define DECLAREJMP(x,n) outprintf (REGEXPD, R (static), R (const), R (unsigned), RGType, '*', x = JMPVAR (n), ';', -1);
1500 #define DECLARESTOR(x,n) outprintf (REGEXPD, R (static), R (const), R (unsigned), RGType, '*', x = STORVAR(n), ';', -1);
1501 #define STATEF(x) state_func (x), '(', a, ')'
1502 #define STATEFA1(x) state_func (x), '(', a, '+', RESERVED_1, ')'
1504 #define COMPOUND(...) '{', __VA_ARGS__, '}'
1506 static OUTSTREAM REGEXP
, REGEXPD
;
1507 static bool ctbl_used
;
1509 static void gen_end_state (subregular
*);
1510 static void gen_action_state (subregular
*);
1511 static void gen_or_state (subregular
*);
1512 static void gen_switch_state (subregular
*);
1513 static void gen_greedy_state (subregular
*);
1514 static void gen_nongreedy_state (subregular
*);
1515 static void gen_nongreedy_loop_state (subregular
*);
1516 static void gen_greedy_loop_state (subregular
*);
1517 static void gen_save_interstate (subregular
*);
1518 static void proto (subReg
);
1520 static void gen_state (subReg s
)
1522 if (s
<= 1 || s
>= OBV
) return;
1523 subregular
*m
= sr_pool
[s
];
1525 if (m
->gst
== 2) return;
1526 if (m
->gst
== 1) { proto (m
->this); return; }
1528 if (m
->save_success
)
1529 gen_save_interstate (m
);
1532 case SB_NIL
: gen_end_state (m
);
1533 ncase SB_TERM
: gen_action_state (m
);
1534 ncase SB_OR
: m
->swtbl
? gen_switch_state (m
) : gen_or_state (m
);
1535 ncase SB_REPG
: m
->fsckd
? gen_greedy_state (m
) : gen_greedy_loop_state (m
);
1536 ncase SB_REPN
: m
->fsckd
? gen_nongreedy_state (m
) : gen_nongreedy_loop_state (m
);
1542 static void gen_regexp_code ()
1547 R_minlen
= catRE ("mb");
1548 outprintf (REGEXPD
, R (static), R (const), R (unsigned), RGType
, '*',
1552 XCODE (PROTO(0), '{', RETURN1
, ';', '}')
1554 XCODE (PROTO(1), '{', STORVAR (1), '=', a
, ';', RETURN1
, ';', '}')
1561 static void gen_save_interstate (subregular
*m
)
1564 int ist
= m
->this + OBV
;
1567 XCODE (PROTO (ist
), ';')
1568 DECLARESTOR(s
, m
->this);
1569 XCODE (PROTO (m
->this), '{', R (if), '(', STATEF (ist
), ')', '{', s
, '=', a
, ';')
1570 XCODE (RETURN1
, ';', '}', RETURN0
, ';', '}')
1574 static void proto (subReg s
)
1576 if (s
> 0 && (s
>= OBV
|| !sr_pool
[s
]->proton
)) {
1578 outprintf (REGEXPD
, PROTO (s
), ';', -1);
1579 sr_pool
[s
]->proton
= 1;
1583 //////////////////////////////////////////////////////////
1584 // each of these 7 functions generates a state function //
1585 //////////////////////////////////////////////////////////
1587 //--------------------------------------------
1588 // greedy repetitioner -- unroll
1589 //--------------------------------------------
1590 static void gen_greedy_loop_state (subregular
*m
)
1592 BRING (a
) BRING (x
) BRING (i
) BRING (X
)
1593 int min
= m
->min
, max
= m
->max
;
1594 bool havemin
= min
!= 0, havemax
= max
!= regexp_INFTY
, havetimes
= havemin
|| havemax
;
1595 bool fixed
= sr_pool
[m
->expr1
]->from
== sr_pool
[m
->expr1
]->to
;
1596 Token Nmin
= new_value_int (min
), Nmax
= new_value_int (max
);
1597 Token Nfix
= new_value_int (sr_pool
[m
->expr1
]->from
);
1599 bool save_displ
= m
->backtrack
!= 0 && !fixed
;
1600 bool unlimited
= regexp_INFTY
> regexp_partition
&& max
== regexp_INFTY
&& save_displ
;
1601 bool xused
= havetimes
|| unlimited
|| m
->backtrack
!= 0;
1603 havetimes
|= unlimited
;
1605 gen_state (m
->expr1
); gen_state (m
->rstate
);
1608 DECLAREJMP (j
, m
->save_fail
)
1610 XCODE (PROTO (m
->this), '{')
1611 IF (xused
) // if count times
1612 XCODE (R (int), x
, '=', R (0), ';')
1614 IF (save_displ
) // if saving displacement for non-fixed with backtracking
1615 int loopmax
= havemax
? max
: regexp_partition
;
1616 XCODE (RGType
, X
, '[', new_value_int (loopmax
), ']', ';', X
, '[', R (0), ']', '=', a
, ';')
1618 XCODE (R (for), '(', ';', ';', ')', '{', R (if), '(', STATEF (m
->expr1
), ')', '{')
1620 /////// if macth subexpr
1621 IF (fixed
) // increase a by fixed displacement
1622 XCODE (a
, ASSIGNA
, Nfix
, ';')
1623 ELSE
// or by state1 success
1624 XCODE (a
, '=', STORVAR (1), ';')
1626 IF (xused
) // if counting times, times++
1627 XCODE (PLUSPLUS
, x
, ';')
1630 XCODE (X
, '[', x
, ']', '=', a
, ';')
1633 IF (max
> 1) // if the repetitioner is '?' don't go for more
1634 IF (havemax
) // no more than maximum
1635 XCODE (R (if), '(', x
, '<', Nmax
, ')')
1636 ELIF (unlimited
) // and no more than the sizeof the displacement tbl
1637 XCODE (R (if), '(', x
, '<', new_value_int (regexp_partition
), ')')
1639 XCODE (R (continue), ';')
1642 IF (unlimited
) // recursion because sizeof displacement tbl exceeded
1643 XCODE (R (if), '(', STATEF (m
->this), ')', RETURN1
, ';')
1645 XCODE ('}', R (break), ';', '}')
1647 IF (m
->save_fail
) // save maximum match if have to
1648 XCODE (j
, '=', a
, ';')
1651 IF (m
->backtrack
< 0) // if not backtracking loop down to minimum
1652 XCODE (R (while), '(', x
, MINUSMINUS
, GEQCMP
, Nmin
, ')')
1653 ELIF (m
->backtrack
> 0) // or down to minimum but no more than 'backtrack' value (>0)
1654 XCODE (R (int), i
, '=', new_value_int (m
->backtrack
+ 1), ';')
1655 XCODE (R (while), '(', x
, MINUSMINUS
, GEQCMP
, Nmin
, ANDAND
, i
, MINUSMINUS
, ')')
1657 IF (!m
->backtrack
&& havemin
) // if just once (no loop) ensure >= min
1658 XCODE (R (if), '(', x
, GEQCMP
, Nmin
, ANDAND
, state_func (m
->rstate
), '(')
1659 ELSE
// ... otherwise, it is definitelly >= min
1660 XCODE (R (if), '(', state_func (m
->rstate
), '(')
1662 IF (fixed
) // if fixed pass 'a' to next-state
1663 XCODE (a
, ')', ')', RETURN1
, ';')
1664 IF (m
->backtrack
) // if looping, decrease 'a' by fixed displacement
1665 XCODE (R (else), a
, ASSIGNS
, Nfix
, ';')
1667 ELSE
// otherwise, if non fixed use the values from the displacement tbl
1668 XCODE (X
, '[', x
, '+', R (1), ']', ')', ')', RETURN1
, ';')
1670 XCODE (RETURN0
, ';', '}')
1673 //--------------------------------------------
1674 // non greedy repetitioner -- unroll
1675 //--------------------------------------------
1676 static void gen_nongreedy_loop_state (subregular
*m
)
1679 int min
= m
->min
, max
= m
->max
;
1680 bool havemin
= min
!= 0, havemax
= max
!= regexp_INFTY
, havetimes
= havemin
|| havemax
;
1681 bool fixed
= sr_pool
[m
->expr1
]->from
== sr_pool
[m
->expr1
]->to
;
1682 Token Nmin
= new_value_int (min
), Nmax
= new_value_int (max
);
1683 Token Nfix
= new_value_int (sr_pool
[m
->expr1
]->to
);
1686 gen_state (m
->expr1
); gen_state (m
->rstate
);
1688 XCODE (PROTO (m
->this), '{')
1689 IF (havetimes
) // we are counting times
1690 XCODE (R (int), x
, '=', R (0), ';')
1692 XCODE (R (for), '(', ';', ';', ')', '{')
1694 XCODE (R (if), '(', a
, '>', R_minlen
, ')', RETURN0
, ';')
1696 ////////// early match
1697 IF (havemin
) // ok if more than minimum
1698 XCODE (R (if), '(', x
, GEQCMP
, Nmin
, ')', '{')
1700 XCODE (R (if), '(', STATEF (m
->rstate
), ')', RETURN1
, ';')
1701 IF (m
->use_fail
) // jump after failuer of next state
1703 XCODE (x
, ASSIGNA
, JMPVAR (m
->use_fail
), '-', a
, ';')
1705 XCODE (a
, '=', JMPVAR (m
->use_fail
), ';')
1711 XCODE (R (if), '(', '!', '(')
1712 IF (havemax
) // if beyond maximum, fail
1713 XCODE (x
, '<', Nmax
, ANDAND
)
1715 XCODE (STATEF (m
->expr1
), ')', ')')
1716 IF (m
->save_fail
) // in failure case save it as maximum walk
1717 DECLAREJMP (j
, m
->save_fail
)
1718 XCODE (COMPOUND (j
, '=', a
, ';', RETURN0
, ';'))
1719 ELSE
// ... or just return 0
1720 XCODE (RETURN0
, ';')
1722 IF (fixed
) // increase 'a' by fixed displacement
1723 XCODE (a
, ASSIGNA
, Nfix
, ';')
1725 /* XXXXXX non fixed */
1726 /* not implemented, so we mark non-greedy on non-fixed fsckd. rare anyway */
1728 IF (havetimes
) // if counting times at all, increment
1729 XCODE (PLUSPLUS
, x
, ';')
1733 //--------------------------------------------
1734 // non greedy repetitioner -- recursive
1735 //--------------------------------------------
1736 static void gen_nongreedy_state (subregular
*m
)
1739 int min
= m
->min
, max
= m
->max
;
1740 bool havemin
= min
!= 0, havemax
= max
!= regexp_INFTY
, havetimes
= havemin
|| havemax
;
1741 Token Nmin
= new_value_int (min
), Nmax
= new_value_int (max
);
1743 gen_state (m
->expr1
); gen_state (m
->rstate
);
1745 XCODE (PROTO (m
->this), '{')
1747 XCODE (R (static), R (int), x
, ';')
1750 XCODE (R (if), '(', x
, GEQCMP
, Nmin
, ')')
1752 XCODE (R (if), '(', STATEF (m
->rstate
), ')', '{')
1754 XCODE (x
, '=', R (0), ';')
1756 XCODE (RETURN1
, ';', '}')
1758 XCODE (R (if), '(', x
, '>', Nmax
, ')', COMPOUND (x
, '=', R (0), ';', RETURN0
, ';'))
1761 XCODE (PLUSPLUS
, x
, ';')
1763 XCODE (R (return), STATEF (m
->expr1
), ';', '}')
1766 //--------------------------------------------
1767 // greedy repetitioner -- recursive
1768 //--------------------------------------------
1769 static void gen_greedy_state (subregular
*m
)
1772 int min
= m
->min
, max
= m
->max
;
1773 bool havemin
= min
!= 0, havemax
= max
!= regexp_INFTY
, havetimes
= havemin
|| havemax
;
1774 Token Nmin
= new_value_int (min
), Nmax
= new_value_int (max
);
1776 gen_state (m
->expr1
); gen_state (m
->rstate
);
1778 XCODE (PROTO (m
->this), '{')
1780 XCODE (R (static), R (int), x
, ';')
1783 XCODE (R (if), '(', x
, '<', Nmax
, ')', '{')
1786 XCODE (PLUSPLUS
, x
, ';')
1788 XCODE (R (if), '(', STATEF (m
->expr1
), ')', '{')
1790 XCODE (MINUSMINUS
, x
, ';')
1792 XCODE (RETURN1
, ';', '}')
1794 XCODE (R (else), MINUSMINUS
, x
, ';')
1800 XCODE (R (if), '(', x
, '<', Nmin
, ')', RETURN0
, ';')
1802 XCODE (R (return), STATEF (m
->rstate
), ';', '}')
1805 //--------------------------------------------
1806 // "this or that" with a switch()
1807 //--------------------------------------------
1808 static void gen_switch_state (subregular
*m
)
1811 long long utbl
[258], v
;
1814 memcpy (utbl
, m
->swtbl
, sizeof utbl
);
1815 for (i
= 0; i
< 258; i
++) {
1816 if (!(v
= utbl
[i
])) continue;
1817 for (j
= i
; j
< 258; j
++)
1818 if (utbl
[j
] == v
) utbl
[j
] &= ~v
;
1819 for (h
= j
= 0; j
< 64; j
++)
1821 gen_state (m
->expr
[j
]);
1824 memcpy (utbl
, m
->swtbl
, sizeof utbl
);
1825 XCODE (PROTO (m
->this), '{', R (switch), '(', '*', a
, ')', '{')
1826 for (i
= 0; i
< 258; i
++) {
1827 if (!(v
= utbl
[i
])) continue;
1828 for (j
= i
; j
< 258; j
++)
1829 if (utbl
[j
] == v
) {
1830 XCODE (R (case), new_value_int (j
), ':')
1834 for (h
= j
= 0; j
< 64; j
++)
1835 if (v
& (1LL << j
)) {
1838 XCODE (STATEFA1 (m
->expr
[j
]))
1842 XCODE (R (default), ':', RETURN0
, ';', '}', '}')
1845 //--------------------------------------------
1846 // return "this or that"
1847 //--------------------------------------------
1848 static void gen_or_state (subregular
*m
)
1853 for (i
= 0; i
< m
->n
; i
++)
1854 gen_state (m
->expr
[i
]);
1856 XCODE (PROTO (m
->this), '{', R(return))
1857 for (i
= 0; i
< m
->n
; i
++)
1858 XCODE (STATEF (m
->expr
[i
]), i
== m
->n
- 1 ? ';' : OROR
)
1862 //--------------------------------------------
1863 // do actual check for a character match
1864 //--------------------------------------------
1865 static void gen_action_state (subregular
*m
)
1867 int atom
= m
->expr1
, b
, s
, i
, adv
= RESERVED_1
;
1870 gen_state (m
->rstate
);
1872 XCODE (PROTO (m
->this), '{')
1876 adv
= new_value_int (strlen (m
->strn
));
1877 XCODE (R (if), '(', '!', regexp_case
? INTERN_strncmp
: INTERN_strncasecmp
)
1878 XCODE ('(', a
, ',', new_value_string (m
->strn
), ',', adv
, ')')
1879 ELIF (atom
>= RE_SPECIAL_CLASS
&& (b
= count1s (atom
- RE_SPECIAL_CLASS
)) > 2 && b
< 253 && regexp_noctbl
)
1880 bool ones
= b
< 127;
1881 XCODE (R (int), x
, '=', ones
? R (0) : R (1), ';', R (switch), '(', PA
, ')')
1882 b
= 1 << (atom
- RE_SPECIAL_CLASS
);
1883 for (i
= 1; i
< 256; i
++)
1884 if (nz (ctbl
[i
] & b
) == ones
) {
1886 #ifdef CASE_RANGERS // the heroic squad who fights the nasty alien outlaws
1888 for (j
= i
+ 1, c
= 0; j
< 256; j
++)
1889 if (nz (ctbl
[j
] & b
) == ones
) ++c
;
1892 XCODE (new_value_eint (i
), ELLIPSIS
, new_value_eint (--j
), ':')
1896 XCODE (new_value_eint (i
), ':')
1898 XCODE (x
, '=', ones
? R (1) : R (0), ';', R (if), '(', x
);
1901 IF (atom
== RE_SPECIAL_DOT
)
1903 ELIF (atom
== RE_SPECIAL_SEND
)
1905 ELIF (atom
>= RE_SPECIAL_CLASS
)
1906 s
= count1s (b
= atom
- RE_SPECIAL_CLASS
);
1908 for (i
= 1; !(ctbl
[i
] & (1 << b
)); i
++);
1909 XCODE (PA
, EQCMP
, new_value_int (i
))
1911 for (++i
; !(ctbl
[i
] & (1 << b
)); i
++);
1912 XCODE (OROR
, PA
, EQCMP
, new_value_int (i
))
1915 rerror ("character class whith matches nothing!");
1917 for (i
= 1; ctbl
[i
] & ~(1 << b
); i
++);
1918 XCODE (PA
, NEQCMP
, new_value_int (i
))
1920 for (++i
; ctbl
[i
] & ~(1 << b
); i
++);
1921 XCODE (ANDAND
, PA
, NEQCMP
, new_value_int (i
))
1924 Token bit
= new_value_hex (1 << (atom
- RE_SPECIAL_CLASS
));
1926 XCODE (R_ctbl
, '[', PA
, ']', '&', bit
)
1927 ENDIF
/* class implemented with ctbl or eq/neq */
1928 ELIF (atom
< ' ' || atom
> '~')
1929 XCODE (PA
, EQCMP
, new_value_int (atom
))
1931 XCODE (PA
, EQCMP
, new_value_char (atom
))
1932 IF (!regexp_case
&& isalpha (atom
))
1933 XCODE (OROR
, PA
, EQCMP
, new_value_char (atom
< 'a' ? tolower (atom
) : toupper (atom
)))
1934 ENDIF
/* do lower/upper case */
1936 ENDIF
/* switch or if */
1938 ENDIF
/* !granted */
1941 IF (m
->save_fail
) // on the way to a jump saving repetitioner....
1942 XCODE (m
->granted
? BLANKT
: '{', R (if),'(', STATEFA1 (m
->rstate
), ')', RETURN1
, ';')
1943 XCODE (MINUSMINUS
, JMPVAR (m
->save_fail
), ';', RETURN0
, ';', m
->granted
? BLANKT
: '}')
1945 XCODE (JMPVAR (m
->save_fail
), '=', a
, ';')
1948 XCODE (R (return), state_func (m
->rstate
), '(', a
, '+', adv
, ')', ';')
1952 XCODE (RETURN0
, ';')
1957 //--------------------------------------------
1958 // redirect to other state
1959 //--------------------------------------------
1960 static void gen_end_state (subregular
*m
)
1963 gen_state (m
->rstate
);
1964 XCODE (PROTO (m
->this), '{', R (return), STATEF (m
->rstate
), ';', '}')
1966 //######################################################################
1968 //######################################################################
1970 static Token
catREsym (char *s
)
1973 sprintf (tmp
, "%s%s", expand (R_name
), s
);
1974 return enter_symbol (strdup (tmp
));
1976 static Token
catRE (char *s
)
1979 sprintf (tmp
, "%s%s", expand (R_name
), s
);
1980 return new_symbol (strdup (tmp
));
1982 static Token
state_var (char *s
, int i
)
1985 sprintf (tmp
, "%s%s%i", expand (R_name
), s
, i
);
1986 return new_symbol (strdup (tmp
));
1988 static Token
state_func (int i
)
1991 sprintf (tmp
, "%s_state%i", expand (R_name
), i
);
1992 return new_symbol (strdup (tmp
));
1994 static Token
new_value_char (int i
)
1997 sprintf (tmp
, "'%c'", i
);
1998 return new_symbol (strdup (tmp
));
2000 static Token
new_value_eint (int i
)
2003 if (i
< ' ' || i
> '~')
2004 sprintf (tmp
, "'\\x%x'", i
);
2006 sprintf (tmp
, "'%c'", i
);
2007 return new_symbol (strdup (tmp
));
2010 static Token
new_value_hex (int i
)
2013 sprintf (tmp
, "0x%x", i
);
2014 return new_symbol (strdup (tmp
));
2017 //######################################################################
2019 //######################################################################
2020 // Entries to this module, inits and globals
2021 //######################################################################
2023 bool istatic
, iextern
, iinline
;
2025 #define STATIC regexp_dclql.istatic ? R (static) : BLANKT
2026 #define EXTERN regexp_dclql.iextern ? R (extern) : BLANKT
2027 #define INLINE regexp_dclql.iinline ? R (inline) : BLANKT
2029 static void export_ctbl ()
2034 if (!nclasses
|| !ctbl_used
) return;
2036 rerror ("Too many character classes. Can do only 32");
2038 outprintf (REGEXPD
, R (const), R (static), !regexp_packed
? R (int)
2039 : nclasses
< 8 ? R (char) : nclasses
< 16 ? R (short)
2040 : R (int), R_ctbl
, '[', ']', '=', '{', -1);
2041 for (i
= 0; i
< 258; i
++) {
2042 sprintf (tmp
, "0x%x,", ctbl
[i
]);
2043 outprintf (REGEXPD
, new_symbol (strdup (tmp
)), -1);
2045 outprintf (REGEXPD
, '}', ';', -1);
2048 static typeID typeID_matchf
;
2049 static void export_once ()
2056 // declare and make known the charp_len structure which holds extracts
2059 outprintf (GLOBAL
, R (struct), R (charp_len
), '{', R (unsigned), R (char), '*',
2060 p
, ';', R (int), i
, ';', '}', ';', -1);
2061 recID rr
= enter_struct (RESERVED_charp_len
, true, 0, 0, 0, 0);
2062 add_variable_member (rr
, p
, typeID_charP
, 0, false, false);
2063 add_variable_member (rr
, i
, typeID_int
, 0, false, false);
2064 complete_structure (0, rr
);
2065 int p0
[] = { rr
, '*', -1 };
2066 int p1
[] = { B_SINT
, '(', typeID_charP
, enter_type (p0
), INTERNAL_ARGEND
, -1 };
2067 typeID_matchf
= enter_type (p1
);
2070 static void make_match_function (char *re
)
2072 BRING (a
) BRING (i
) BRING (X
) BRING (p
)
2074 Token f
= catRE ("_match"), n
;
2075 Token recipe
= catRE ("_recipe");
2078 XCODE (STATIC
, R (const), R (char), recipe
, '[', ']', '=',
2079 new_symbol (escape_c_string (re
, strlen (re
))), ';')
2081 XCODE (STATIC
, INLINE
, R (int), f
, '(', R (const), R (char), '*', a
, ',', R (struct))
2082 XCODE (R (charp_len
), X
, '[', ']', ')', '{')
2084 rerror ("Not prepared to handle this kind of regexp. Please move '^' out");
2087 XCODE (R_minlen
, '=', a
, '+', '(', R (strlen
), '(', a
, ')', '-')
2088 XCODE (new_value_int (minlen
), ')', ';')
2091 IF (ambiguous_extracts
)
2092 for (j
= 0; j
< nextr
; j
++)
2093 if (extract_point
[j
].ambig
)
2094 XCODE (STORVAR (extract_point
[j
].ss
), '=', R (0), ';')
2096 XCODE (R (if), '(', STATEF (root
), ')', '{', R (if), '(', X
, ')', '{')
2097 for (j
= 0; j
< nextr
; j
++) {
2098 #define UNCONST '(', R (unsigned), RGType, '*', ')'
2099 n
= new_value_int (j
);
2100 IF (extract_point
[j
].fixedlen
== -1)
2101 XCODE (X
, '[', n
, ']', '.', i
, '=', STORVAR (extract_point
[j
].se
), '-', '(')
2102 XCODE (X
, '[', n
, ']', '.', p
, '=', UNCONST
, STORVAR (extract_point
[j
].ss
),')',';')
2104 XCODE (X
, '[', n
, ']', '.', p
, '=', UNCONST
, STORVAR (extract_point
[j
].ss
), ';')
2105 XCODE (X
, '[', n
, ']', '.', i
, '=', new_value_int(extract_point
[j
].fixedlen
),';')
2108 XCODE ('}', RETURN1
, ';', '}', RETURN0
, ';', '}')
2110 XCODE (R (return), STATEF (root
), ';', '}')
2114 static Token
regexp_declarations (bool def
)
2117 BRING (X
) BRING (a
);
2118 Token f
= catREsym ("_match");
2120 STATIC
, EXTERN
, INLINE
,
2121 R (int), f
, '(', R (const), R (char), '*', a
, ',', R (struct),
2122 R (charp_len
), X
, '[', ']', ')', -1
2124 Token recipe
= catREsym ("_recipe");
2125 Token xargs
[] = { a
, X
, -1 };
2126 Token
*dflt1
= mallocint (2);
2129 Token
**dflt
= (Token
**) malloc (2 * sizeof *dflt
);
2132 xdeclare_function (&Global
, f
, f
, typeID_matchf
, fproto
, xargs
, 0, dflt
, 0);
2133 enter_global_object (recipe
, typeID_charP
);
2135 regexp_dclql
.iextern
= !regexp_dclql
.istatic
;
2136 outprintf (GVARS
, EXTERN
, STATIC
, R (const), R (char), recipe
, '[', ']', ';', -1);
2141 static Token
regexp_compiler (char *re
)
2143 unsigned int sctbl
[258];
2145 subregular
*ssr_pool
[512];
2149 R_ctbl
= catRE ("_ctbl");
2151 sprintf (tmp
, "/* REGEXP: %s */\n", re
);
2152 outprintf (REGEXPD
, new_symbol (strdup (tmp
)), -1);
2155 if (debugflag
.REGEXP_DEBUG
)
2156 PRINTF ("Regexp: %s\n", re
);
2158 prepare_regexp (re
);
2162 make_match_function (re
);
2163 return regexp_declarations (1);
2166 static NormPtr
parse_regexp_dcl (NormPtr p
)
2168 if (!issymbol (R_name
= CODE
[p
++]))
2169 parse_error (p
, "No name for RegExp");
2170 if (CODE
[p
] != '(') {
2171 regexp_declarations (0);
2175 if (CODE
[p
++] != '(')
2176 parse_error (p
, "Syntax error in RegExp");
2177 if (!ISVALUE (CODE
[p
]) || type_of_const (CODE
[p
]) != typeID_charP
)
2178 parse_error (p
, "No regular expression string!");
2179 char *re
= expand (CODE
[p
++]);
2180 char *req
= (char*) alloca (strlen (re
));
2181 strcpy (req
, re
+ 1);
2182 req
[strlen (req
) - 1] = 0;
2184 regexp_strfuncs
= regexp_case
= 1;
2185 regexp_anon
= regexp_packed
= regexp_noextract
= regexp_noctbl
= 0;
2186 if (CODE
[p
] == ',')
2187 for (++p
; ISSYMBOL (CODE
[p
]); p
++)
2188 if (!tokcmp (CODE
[p
], "NOCASE")) regexp_case
= 0;
2189 else if (!tokcmp (CODE
[p
], "PACKED")) regexp_packed
= 1;
2190 else if (!tokcmp (CODE
[p
], "NOEX")) regexp_noextract
= 1;
2191 else if (!tokcmp (CODE
[p
], "NOCTBL")) regexp_noctbl
= 1;
2192 else if (!tokcmp (CODE
[p
], "NOSTRFUNC")) regexp_strfuncs
= 0;
2193 else parse_error (p
, "Unrecognized regexp option");
2194 if (CODE
[p
++] != ')')
2195 parse_error (p
, "RegExp definition");
2197 REGEXP
= new_stream ();
2198 REGEXPD
= new_stream ();
2199 regexp_compiler (req
);
2200 concate_streams (concate_streams (REGEXP_CODE
, REGEXPD
), REGEXP
);
2205 static NormPtr
parse_regexp_uclass (NormPtr p
)
2207 if (CODE
[++p
] != '(') goto aerr
;
2208 if (!isvalue (CODE
[++p
])) goto aerr
;
2209 char *cc
= expand (CODE
[p
++]);
2210 if (CODE
[p
++] != ',') goto aerr
;
2211 if (!ISVALUE (CODE
[p
])) goto aerr
;
2212 char *cd
= expand (CODE
[p
++]), ce
[64];
2213 if (CODE
[p
++] != ')') goto aerr
;
2214 if (cc
[0] != '\'' || !islower (cc
[1])
2215 || cd
[0] != '"' || cd
[1] != '[') goto aerr
;
2216 strcpy (ce
, cd
+ 1);
2217 ce
[strlen (ce
) - 1] = 0;
2218 /* where are regexps when you need them? */
2219 if (cc
[1] == 'r' || cc
[1] == 'a' || cc
[1] == 't'
2220 || cc
[1] == 'n' || cc
[1] == 'f' || cc
[1] == 'x')
2221 parse_error (p
, "Abbreviation of reserved character");
2222 uclass
[OFFS (cc
[1])].def
= strdup (ce
);
2225 parse_error (p
, "abbrev syntax error: abbrev ('lowercase char', \"[string]\") ");
2229 /******************************
2230 Perl style =~ in expressions
2231 ******************************/
2232 Token
perlop_regexp (Token recipe
)
2234 regexp_dclql
.istatic
= 1;
2235 regexp_dclql
.iextern
= 0;
2236 regexp_dclql
.iinline
= 1;
2238 regexp_strfuncs
= regexp_anon
= regexp_case
= regexp_packed
= regexp_noextract
= 1;
2239 char *re
= expand (recipe
);
2240 char *req
= (char*) alloca (strlen (re
));
2241 strcpy (req
, re
+ 1);
2242 req
[strlen (req
) - 1] = 0;
2243 REGEXP
= new_stream ();
2244 REGEXPD
= new_stream ();
2245 R_name
= name_anon_regexp ();
2246 recipe
= regexp_compiler (req
);
2247 concate_streams (concate_streams (REGEXP_CODE
, REGEXPD
), REGEXP
);
2251 /******************************
2252 Parse a RegExp, declaration
2253 ******************************/
2254 NormPtr
parse_RegExp (NormPtr p
, int ql
)
2256 regexp_dclql
.istatic
= ql
& 1;
2257 regexp_dclql
.iextern
= ql
& 2;
2258 regexp_dclql
.iinline
= ql
& 4;
2260 p
= CODE
[p
] == RESERVED_abbrev
? parse_regexp_uclass (p
) : parse_regexp_dcl (p
);
2261 if (CODE
[p
] != ',' && CODE
[p
] != ';')
2262 parse_error (p
, "RegExp declaration separator");
2263 } while (CODE
[p
++] != ';');