2 * Copyright 2012 Yichun "agentzh" Zhang
3 * Copyright 2007-2009 Russ Cox. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style
6 * Part of this code is from the NGINX opensource project: http://nginx.org/LICENSE
8 * This library is licensed under the BSD license.
10 * Copyright (c) 2012-2014 Yichun "agentzh" Zhang.
12 * Copyright (c) 2007-2009 Russ Cox, Google Inc. All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
18 * * Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * * Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following disclaimer
22 * in the documentation and/or other materials provided with the
24 * * Neither the name of Google, Inc. nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 module iv
.srex
/*is aliced*/;
44 // ///////////////////////////////////////////////////////////////////////// //
46 public enum SRes
: int {
47 Ok
= 0, /// Match found
48 Again
= -1, /// EOF buffer wasn't seen, and it is still possible to find a match
49 NoMatch
= -2, /// No match found, stop searching
50 Error
= -3, /// Some internal error occured (usually out of memory)
51 Done
= -4, /// Internal code, user should never see this
55 public enum SRFlags
: uint {
56 CaseInsensitive
= 1<<0, ///
57 Multiline
= 1<<1, /// \s range will not include '\n', and dot will not match '\n' too
61 public struct RegExp
{
63 MemPool pool
; // first member is RegExpPart
65 Program
* reprg
; // it always points to pool
72 @property bool valid () const pure nothrow @safe @nogc { pragma(inline
, true); return (count
> 0); }
74 static RegExp
create(R
) (auto ref R rng
, uint flags
=0) if (isInputRange
!R
&& is(ElementEncodingType
!R
: char)) {
76 res
.compile(rng
, flags
);
80 bool compile (R
) (auto ref R rng
, uint flags
=0) if (isInputRange
!R
&& is(ElementEncodingType
!R
: char)) {
81 if (!pool
.active
) pool
= MemPool
.create
;
84 auto re
= parseRegExp(rng
, pool
, flags
, lastError
, lastErrorPos
);
85 if (re
is null) { pool
= pool
.init
; return false; } // free pool and exit
86 auto prgpool
= MemPool
.create
;
87 if (!prgpool
.active
) { pool
= pool
.init
; lastError
= "compile error"; lastErrorPos
= 0; return false; } // free pool and exit
88 auto prg
= reCompile(prgpool
, re
);
89 if (prg
is null) { pool
= pool
.init
; lastError
= "compile error"; lastErrorPos
= 0; return false; } // free pool and exit
92 capcount
= re
.multi_ncaps
[0];
97 lastError
= "memory error";
100 count
= capcount
= 0;
104 @property int reCount () const @safe { pragma(inline
, true); return count
; }
105 // `captureCount` returns total number, including capture $0 for whole regex
106 @property int captureCount () const @trusted { pragma(inline
, true); return capcount
+1; }
110 // ///////////////////////////////////////////////////////////////////////// //
111 // optimize this: allocate on demand
112 Exception reParseError
;
113 static this () { reParseError
= new Exception(""); }
116 bool isWordChar (char c
) pure nothrow @safe @nogc {
117 pragma(inline
, true);
119 (c
>= '0' && c
<= '9') ||
120 (c
>= 'A' && c
<= 'Z') ||
121 (c
>= 'a' && c
<= 'z') ||
126 // ////////////////////////////////////////////////////////////////////////// //
129 static immutable char[2] esc_d_ranges
= [ '0', '9' ];
130 static immutable char[4] esc_D_ranges
= [ 0, '0'-1, '9'+1, 255 ];
131 static immutable char[8] esc_w_ranges
= [ 'A', 'Z', 'a', 'z', '0', '9', '_', '_' ];
132 static immutable char[10] esc_W_ranges
= [ 0, 47, 58, 64, 91, 94, 96, 96, 123, 255 ];
133 static immutable char[10] esc_s_ranges
= [ ' ', ' ', '\f', '\f', '\n', '\n', '\r', '\r', '\t', '\t' ];
134 static immutable char[10] esc_sm_ranges
= [ ' ', ' ', '\f', '\f', '\r', '\r', '\t', '\t' ];
135 static immutable char[8] esc_S_ranges
= [ 0, 8, 11, 11, 14, 31, 33, 255 ];
136 static immutable char[8] esc_SM_ranges
= [ 0, 8, 10, 11, 14, 31, 33, 255 ];
137 static immutable char[6] esc_h_ranges
= [ 0x09, 0x09, 0x20, 0x20, 0xa0, 0xa0 ];
138 static immutable char[8] esc_H_ranges
= [ 0x00, 0x08, 0x0a, 0x1f, 0x21, 0x9f, 0xa1, 0xff ];
139 static immutable char[10] esc_v_ranges
= [ 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x85, 0x85 ];
140 static immutable char[6] esc_V_ranges
= [ 0x00, 0x09, 0x0e, 0x84, 0x86, 0xff ];
141 static immutable char[4] esc_N_ranges
= [ 0, '\n'-1, '\n'+1, 255 ];
143 static bool isdigit (int ch
) pure nothrow @safe @nogc { return (ch
>= '0' && ch
<= '9'); }
144 static bool isalpha (int ch
) pure nothrow @safe @nogc { return (ch
>= 'A' && ch
<= 'Z') ||
(ch
>= 'a' && ch
<= 'z'); }
145 static bool isalnum (int ch
) pure nothrow @safe @nogc { return (ch
>= '0' && ch
<= '9') ||
(ch
>= 'A' && ch
<= 'Z') ||
(ch
>= 'a' && ch
<= 'z'); }
146 static int digitInBase (char ch
, int base
=10) pure nothrow @trusted @nogc {
147 pragma(inline
, true);
149 base
>= 1 && ch
>= '0' && ch
< '0'+base ? ch
-'0' :
150 base
> 10 && ch
>= 'A' && ch
< 'A'+base
-10 ? ch
-'A'+10 :
151 base
> 10 && ch
>= 'a' && ch
< 'a'+base
-10 ? ch
-'a'+10 :
161 int delegate () nextch
; // -1 on EOL
164 int errpos
= -1; // valid only if error was thrown
165 alias curpos
= errpos
;
167 this (scope int delegate () anextch
) {
175 if (yytok
< 0) yytok
= -1;
179 void fail (string msg
, int ep
=-1) { errmsg
= msg
; if (ep
>= 0) errpos
= ep
; throw reParseError
; }
181 T
* memoryfail(T
) (T
* p
) {
182 if (p
is null) { errmsg
= "out of memory"; throw reParseError
; }
186 RegExpPart
* buildRange (const(char)[] rng
, bool normal
) {
187 auto yyl
= memoryfail(pool
.reCreate((normal ? RegExpPart
.Type
.Class
: RegExpPart
.Type
.NClass
), null, null));
189 foreach (immutable idx
; 0..rng
.length
/2) {
190 auto range
= memoryfail(pool
.alloc
!RERange
);
191 range
.from
= rng
[idx
*2];
192 range
.to
= rng
[idx
*2+1];
193 if (last
is null) yyl
.range
= range
; else last
.next
= range
;
199 RegExpPart
* metaAssert (RegExpPart
.AssType att
) {
200 auto yyl
= memoryfail(pool
.reCreate(RegExpPart
.Type
.Assert
, null, null));
205 RegExpPart
* metaLit (char ch
) {
206 auto yyl
= memoryfail(pool
.reCreate(RegExpPart
.Type
.Lit
, null, null));
211 RegExpPart
* parseAtom () {
212 if (yytok
== '{' || yytok
== '}' || yytok
== '|') fail("invalid regexp special char");
220 if (yytok
!= ':') fail("':' expected in anonymous group");
225 auto yyl
= parseAlt();
226 if (yytok
!= ')') fail("unclosed group", stp
);
229 yyl
= memoryfail(pool
.reCreate(RegExpPart
.Type
.Paren
, yyl
, null));
236 auto yyl
= memoryfail(pool
.reCreate((flags
&SRFlags
.Multiline ? RegExpPart
.Type
.DotML
: RegExpPart
.Type
.Dot
), null, null));
242 auto yyl
= metaAssert(RegExpPart
.AssType
.Bol
);
248 auto yyl
= metaAssert(RegExpPart
.AssType
.Eol
);
253 if ((flags
&SRFlags
.CaseInsensitive
) && isalpha(yytok
)) {
254 auto yyl
= memoryfail(pool
.reCreate(RegExpPart
.Type
.Class
, null, null));
255 yyl
.range
= memoryfail(pool
.alloc
!RERange
);
256 yyl
.range
.from
= cast(char)yytok
;
257 yyl
.range
.to
= cast(char)yytok
;
258 yyl
.range
.next
= memoryfail(pool
.alloc
!RERange
);
261 yyl
.range
.next
.from
= cast(char)(yytok
+32);
262 yyl
.range
.next
.to
= cast(char)(yytok
+32);
265 yyl
.range
.next
.from
= cast(char)(yytok
-32);
266 yyl
.range
.next
.to
= cast(char)(yytok
-32);
268 yyl
.range
.next
.next
= null;
280 case EOF
: fail("metachar expected"); break;
281 case 'B': yyl
= metaAssert(RegExpPart
.AssType
.NonWord
); break;
282 case 'b': yyl
= metaAssert(RegExpPart
.AssType
.Word
); break;
283 case 'z': yyl
= metaAssert(RegExpPart
.AssType
.StreamEnd
); break;
284 case 'A': yyl
= metaAssert(RegExpPart
.AssType
.StreamStart
); break;
285 case 'd': yyl
= buildRange(esc_d_ranges
, true); break;
286 case 'D': yyl
= buildRange(esc_d_ranges
, false); break;
287 case 'w': yyl
= buildRange(esc_w_ranges
, true); break;
288 case 'W': yyl
= buildRange(esc_w_ranges
, false); break;
289 case 's': yyl
= buildRange((flags
&SRFlags
.Multiline ? esc_sm_ranges
: esc_s_ranges
), true); break;
290 case 'S': yyl
= buildRange(esc_s_ranges
, false); break;
291 case 'N': // \N is defined as [^\n]
292 yyl
= memoryfail(pool
.reCreate(RegExpPart
.Type
.NClass
, null, null));
293 auto range
= memoryfail(pool
.alloc
!RERange
);
299 case 'C': yyl
= memoryfail(pool
.reCreate(RegExpPart
.Type
.Dot
, null, null)); break; // \C is defined as .
300 case 'h': yyl
= buildRange(esc_h_ranges
, true); break;
301 case 'H': yyl
= buildRange(esc_h_ranges
, false); break;
302 case 'v': yyl
= buildRange(esc_v_ranges
, true); break;
303 case 'V': yyl
= buildRange(esc_v_ranges
, false); break;
304 case 't': yyl
= metaLit('\t'); break;
305 case 'n': yyl
= metaLit('\n'); break;
306 case 'r': yyl
= metaLit('\r'); break;
307 case 'f': yyl
= metaLit('\f'); break;
308 case 'a': yyl
= metaLit('\a'); break;
309 case 'e': yyl
= metaLit('\x1b'); break;
312 if (yytok
< 0 || yytok
> 255) fail("invalid meta");
313 int n
= digitInBase(cast(char)yytok
, 16);
314 if (n
< 0) fail("invalid meta");
316 if (yytok
>= 0 && yytok
<= 255 && digitInBase(cast(char)yytok
, 16) >= 0) {
317 n
= n
*16+digitInBase(cast(char)yytok
, 16);
320 yyl
= metaLit(cast(char)n
);
322 default: // other non-char escapes are literal
323 if (isalnum(mt
)) fail("invalid meta");
324 yyl
= metaLit(cast(char)mt
);
327 if (yyl
is null) fail("wtf?!");
328 if (flags
&SRFlags
.CaseInsensitive
) {
329 if (yyl
.type
== RegExpPart
.Type
.Class || yyl
.type
== RegExpPart
.Type
.NClass
) {
330 yyl
.range
= memoryfail(range2CI(pool
, yyl
.range
));
340 if (yytok
== EOF
) fail("invalid range", stp
);
341 auto type
= RegExpPart
.Type
.Class
;
342 if (yytok
== '^') { type
= RegExpPart
.Type
.NClass
; nextToken(); }
343 if (yytok
== ']') fail("empty ranges are not supported", stp
);
344 bool[256] chars
= false;
345 while (yytok
!= ']') {
348 bool isrange
= false;
349 if (yytok
== EOF
) fail("invalid range", stp
);
352 if (yytok
== EOF
) fail("invalid range", stp
);
354 case 't': litch
= '\t'; islit
= true; break;
355 case 'n': litch
= '\n'; islit
= true; break;
356 case 'r': litch
= '\r'; islit
= true; break;
357 case 'f': litch
= '\f'; islit
= true; break;
358 case 'a': litch
= '\a'; islit
= true; break;
359 case 'e': litch
= '\x1b'; islit
= true; break;
375 if (yytok
< 0 || yytok
> 255) fail("invalid meta");
376 int n
= digitInBase(cast(char)yytok
, 16);
377 if (n
< 0) fail("invalid meta");
379 if (yytok
>= 0 && yytok
<= 255 && digitInBase(cast(char)yytok
, 16) >= 0) {
380 n
= n
*16+digitInBase(cast(char)yytok
, 16);
387 if (isalnum(yytok
)) fail("invalid escape");
388 litch
= cast(char)yytok
;
394 litch
= cast(char)yytok
;
399 case 'd': rng
= esc_d_ranges
; break;
400 case 'D': rng
= esc_D_ranges
; break;
401 case 's': rng
= (flags
&SRFlags
.Multiline ? esc_sm_ranges
: esc_s_ranges
); break;
402 case 'S': rng
= esc_S_ranges
; break;
403 case 'N': rng
= esc_N_ranges
; break;
404 case 'w': rng
= esc_w_ranges
; break;
405 case 'W': rng
= esc_W_ranges
; break;
406 case 'h': rng
= esc_h_ranges
; break;
407 case 'H': rng
= esc_H_ranges
; break;
408 case 'v': rng
= esc_v_ranges
; break;
409 case 'V': rng
= esc_V_ranges
; break;
412 nextToken(); // skip range type
413 foreach (immutable idx
; 0..rng
.length
/2) {
414 chars
[rng
[idx
*2+0]..rng
[idx
*2+1]+1] = true;
416 if (yytok
== '-') fail("no metaranges, please");
419 nextToken(); // skip literal
424 nextToken(); // skip minus
425 if (yytok
== EOF
) fail("invalid range", stp
);
430 if (yytok
== EOF
) fail("invalid range", stp
);
432 case 't': ech
= '\t'; lxc
= true; break;
433 case 'n': ech
= '\n'; lxc
= true; break;
434 case 'r': ech
= '\r'; lxc
= true; break;
435 case 'f': ech
= '\f'; lxc
= true; break;
436 case 'a': ech
= '\a'; lxc
= true; break;
437 case 'e': ech
= '\x1b'; lxc
= true; break;
452 if (yytok
< 0 || yytok
> 255) fail("invalid meta");
453 int n
= digitInBase(cast(char)yytok
, 16);
454 if (n
< 0) fail("invalid meta");
456 if (yytok
>= 0 && yytok
<= 255 && digitInBase(cast(char)yytok
, 16) >= 0) {
457 n
= n
*16+digitInBase(cast(char)yytok
, 16);
464 if (isalnum(yytok
)) fail("invalid escape");
465 ech
= cast(char)yytok
;
470 ech
= cast(char)yytok
;
473 if (!lxc
) fail("invalid range");
474 if (ech
< litch
) fail("invalid range");
475 nextToken(); // skip literal
476 chars
[litch
..ech
+1] = true;
478 if (yytok
!= ']') fail("invalid range", stp
);
480 auto yyl
= memoryfail(pool
.reCreate(type
, null, null));
483 while (idx
< chars
.length
) {
484 if (!chars
[idx
]) { ++idx
; continue; }
486 while (ei
< chars
.length
&& chars
[ei
]) ++ei
;
487 auto range
= memoryfail(pool
.alloc
!RERange
);
488 range
.from
= cast(char)idx
;
489 range
.to
= cast(char)(ei
-1);
490 if (last
is null) yyl
.range
= range
; else last
.next
= range
;
494 if (last
is null) fail("invalid range", stp
);
499 auto yyl
= metaLit(cast(char)yytok
);
504 RegExpPart
* parseRepeat () {
505 auto yyl
= parseAtom();
508 // counted repetition
511 while (yytok
>= '0' && yytok
<= '9') {
512 res
= res
*10+yytok
-'0';
521 qcc
.from
= parseInt();
524 } else if (yytok
!= ',') {
525 fail("comma expected");
527 nextToken(); // skip comma
528 qcc
.to
= (yytok
== '}' ?
-1 : parseInt());
530 if (yytok
!= '}') fail("'}' expected");
531 //nextToken(); // later
532 if (qcc
.from
>= 500 || qcc
.to
>= 500) fail("repetition count too big");
533 if (qcc
.to
>= 0 && qcc
.from
> qcc
.to
) fail("invalid repetition count");
535 if (qcc
.to
== 1) { yytok
= '?'; goto cont
; }
536 if (qcc
.to
== -1) { yytok
= '*'; goto cont
; }
537 } else if (qcc
.from
== 1) {
538 if (qcc
.to
== -1) { yytok
= '+'; goto cont
; }
540 nextToken(); // skip closing curly
542 if (yytok
== '?') { greedy
= false; nextToken(); }
543 return memoryfail(lowerCountedRep(pool
, yyl
, qcc
, greedy
));
550 if (yytok
== '?') { greedy
= false; nextToken(); }
551 yyl
= memoryfail(pool
.reCreate(RegExpPart
.Type
.Star
, yyl
, null));
558 if (yytok
== '?') { greedy
= false; nextToken(); }
559 yyl
= memoryfail(pool
.reCreate(RegExpPart
.Type
.Plus
, yyl
, null));
566 if (yytok
== '?') { greedy
= false; nextToken(); }
567 yyl
= memoryfail(pool
.reCreate(RegExpPart
.Type
.Quest
, yyl
, null));
574 RegExpPart
* parseConcat () {
578 yyl
= memoryfail(pool
.reCreate(RegExpPart
.Type
.Nil
, null, null));
581 while (yytok
!= '|' && yytok
!= ')' && yytok
!= EOF
) {
582 auto y2
= parseRepeat();
583 yyl
= memoryfail(pool
.reCreate(RegExpPart
.Type
.Cat
, yyl
, y2
));
589 RegExpPart
* parseAlt () {
590 RegExpPart
* yyl
= parseConcat();
591 while (yytok
== '|') {
593 auto y2
= parseConcat();
594 yyl
= memoryfail(pool
.reCreate(RegExpPart
.Type
.Alt
, yyl
, y2
));
599 RegExpPart
* parseMain () {
602 auto yyl
= parseAlt();
603 if (yytok
!= EOF
) fail("extra data at regexp");
609 RegExpPart
* parseRegExp(R
) (auto ref R rng
, ref MemPool pool
, uint flags
, out string lasterr
, out int lasterrpos
)
610 if (isInputRange
!R
&& is(ElementEncodingType
!R
: char))
612 if (!pool
.active
) return null;
631 // ok, build "wrapper" regex: ".*?(regex)"
633 p
.errmsg
= "memory error";
634 re
= pool
.reCreate(RegExpPart
.Type
.Paren
, re
, null); // $0 capture
635 if (re
is null) throw reParseError
;
637 re
= pool
.reCreate(RegExpPart
.Type
.TopLevel
, re
, null);
638 if (re
is null) throw reParseError
;
640 auto r
= pool
.reCreate(RegExpPart
.Type
.Dot
, null, null);
641 if (r
is null) throw reParseError
;
643 r
= pool
.reCreate(RegExpPart
.Type
.Star
, r
, null);
644 if (r
is null) throw reParseError
;
646 re
= pool
.reCreate(RegExpPart
.Type
.Cat
, r
, re
);
647 if (re
is null) throw reParseError
;
648 } catch (Exception
) {
650 lasterrpos
= p
.errpos
;
655 re
.multi_ncaps
= pool
.alloc
!uint;
656 if (re
.multi_ncaps
is null) return null;
658 re
.multi_ncaps
[0] = p
.ncaps
;
664 // ///////////////////////////////////////////////////////////////////////// //
667 enum PoolSize
= 4096;
680 uint poolcount
, poolsize
;
682 int newPool (uint size
) {
683 import core
.stdc
.stdlib
: malloc
, realloc
;
685 debug(srex_pools
) { import core
.stdc
.stdio
: stderr
, fprintf
; stderr
.fprintf("allocating new pool of size %u\n", size
); }
686 if (poolcount
>= poolsize
) {
687 int nsz
= poolsize
+512;
688 auto np
= cast(Pool
*)realloc(pools
, nsz
*pools
[0].sizeof
);
689 if (np
is null) return -1;
690 //np[poolsize..nsz] = null;
694 auto pm
= cast(ubyte*)malloc(size
);
695 if (pm
is null) return -1;
696 pools
[poolcount
].data
= pm
;
697 pools
[poolcount
].used
= 0;
698 pools
[poolcount
].size
= size
;
702 ubyte* xalloc (uint size
) {
703 import core
.stdc
.stdlib
: malloc
;
704 import core
.stdc
.string
: memset
;
706 //debug(srex_pools) { import core.stdc.stdio : stderr, fprintf; stderr.fprintf("allocating buffer of size %u\n", size); }
708 if (size
>= PoolSize
) {
709 // unconditional new pool
710 auto pn
= newPool(size
);
711 if (pn
< 0) return null;
712 pools
[pn
].used
= size
;
713 res
= pools
[pn
].data
;
715 // do we have free memory in last pool?
716 auto lp
= pools
+poolcount
-1;
717 if (poolcount
== 0 || lp
.size
-lp
.used
< size
) {
719 //if (poolcount != 0) { import core.stdc.stdio : stderr, fprintf; stderr.fprintf(" want new pool: used=%u; size=%u\n", lp.used, lp.size); }
720 auto pn
= newPool(PoolSize
);
721 if (pn
< 0) return null;
723 //{ import core.stdc.stdio : stderr, fprintf; stderr.fprintf(" pools: %u\n", poolcount); }
725 res
= lp
.data
+lp
.used
;
728 //{ import core.stdc.stdio : stderr, fprintf; stderr.fprintf(" pool: used=%u; size=%u\n", lp.used, lp.size); }
729 memset(res
, 0, size
);
735 import core
.stdc
.stdlib
: free
;
736 debug(srex_pools
) { import core
.stdc
.stdio
: stderr
, fprintf
; stderr
.fprintf("clearing pools: %u\n", poolcount
); }
737 foreach_reverse (ref p
; pools
[0..poolcount
]) free(p
.data
);
738 if (pools
!is null) free(pools
);
740 poolcount
= poolsize
= 0;
743 T
* alloc(T
) (uint addsize
=0) {
744 static if ((void*).sizeof
> 4) { assert(T
.sizeof
< int.max
/8); }
745 import core
.stdc
.string
: memcpy
;
746 T
* res
= cast(T
*)xalloc(cast(uint)T
.sizeof
+addsize
);
749 static if (is(T == struct) && T.sizeof > 0) {
750 static immutable T i = T.init;
751 memcpy(res, &i, T.sizeof);
760 // ///////////////////////////////////////////////////////////////////////// //
761 public struct MemPool
{
763 static if ((void*).sizeof
<= 4) {
770 @property inout(PoolImpl
)* impl () inout { pragma(inline
, true); return cast(typeof(return))pi
; }
775 auto pp
= cast(PoolImpl
*)pi
;
777 import core
.stdc
.stdlib
: free
;
780 debug(srex_pools_high
) { import core
.stdc
.stdio
: stderr
, fprintf
; stderr
.fprintf("MEMHI: pool 0x%08x freed\n", pi
); }
786 @property inout(void)* firstalloc () inout @trusted {
788 auto pp
= cast(PoolImpl
*)pi
;
789 return cast(typeof(return))(pp
.poolcount ? pp
.pools
[0].data
: null);
796 this (in MemPool p
) {
797 pragma(inline
, true);
799 if (pi
) ++(cast(PoolImpl
*)pi
).rc
;
802 this (this) @trusted { static if (__VERSION__
> 2071) pragma(inline
, true); if (pi
) ++(cast(PoolImpl
*)pi
).rc
; }
804 ~this () { pragma(inline
, true); if (pi
) decref(); }
806 @property bool active () const pure @safe { pragma(inline
, true); return (pi
!= 0); }
808 static MemPool
create () {
809 import core
.stdc
.stdlib
: malloc
;
810 import core
.stdc
.string
: memset
;
812 auto pp
= cast(PoolImpl
*)malloc(PoolImpl
.sizeof
);
815 memset(pp
, 0, (*pp
).sizeof
);
817 res
.pi
= cast(typeof(res
.pi
))pp
;
818 debug(srex_pools_high
) { import core
.stdc
.stdio
: stderr
, fprintf
; stderr
.fprintf("MEMHI: pool 0x%08x allocated\n", res
.pi
); }
823 void clear () { pragma(inline
, true); if (pi
) (cast(PoolImpl
*)pi
).clear
; }
825 void release () { pragma(inline
, true); if (pi
) decref(); }
827 void opAssign (in MemPool p
) {
828 if (p
.pi
) ++(cast(PoolImpl
*)(p
.pi
)).rc
;
833 T
* alloc(T
) (uint addsize
=0) {
834 if (!pi
) return null;
835 return (cast(PoolImpl
*)pi
).alloc
!T(addsize
);
840 // ///////////////////////////////////////////////////////////////////////// //
842 uint rc
; // reference count
849 enum Opcode
: ubyte {
862 align(1) struct Range2VM
{
881 uint group
; // capture group
898 uint uniq_threads
; // unique thread count
899 uint dup_threads
; // duplicatable thread count
900 uint lookahead_asserts
;
902 Chain
* leading_bytes
;
911 // ///////////////////////////////////////////////////////////////////////// //
912 void dump (Program
* prog
) {
913 VMInstr
* pc
, start
, end
;
915 end
= prog
.start
+prog
.len
;
916 for (pc
= start
; pc
< end
; ++pc
) {
917 import core
.stdc
.stdio
: printf
;
924 void dump (VMInstr
* pc
, VMInstr
* start
) {
925 import core
.stdc
.stdio
: FILE
, fprintf
, stdout
, fputc
;
933 fprintf(f
, "%2d. split %d, %d", cast(int)(pc
-start
), cast(int)(pc
.x
-start
), cast(int)(pc
.y
-start
));
936 fprintf(f
, "%2d. jmp %d", cast(int)(pc
-start
), cast(int)(pc
.x
-start
));
939 fprintf(f
, "%2d. char %d", cast(int)(pc
-start
), cast(int)pc
.ch
);
942 fprintf(f
, "%2d. in", cast(int)(pc
-start
));
943 for (i
= 0; i
< pc
.ranges
.count
; i
++) {
944 range
= pc
.ranges
.head
+i
;
945 if (i
> 0) fputc(',', f
);
946 fprintf(f
, " %d-%d", range
.from
, range
.to
);
950 fprintf(f
, "%2d. notin", cast(int)(pc
-start
));
951 for (i
= 0; i
< pc
.ranges
.count
; i
++) {
952 range
= pc
.ranges
.head
+i
;
953 if (i
> 0) fputc(',', f
);
954 fprintf(f
, " %d-%d", range
.from
, range
.to
);
958 fprintf(f
, "%2d. any", cast(int)(pc
-start
));
961 fprintf(f
, "%2d. anyml", cast(int)(pc
-start
));
964 fprintf(f
, "%2d. match %d", cast(int)(pc
-start
), cast(int)pc
.regex_id
);
967 fprintf(f
, "%2d. save %d", cast(int)(pc
-start
), cast(int)pc
.group
);
970 fprintf(f
, "%2d. assert ", cast(int)(pc
-start
));
971 switch (pc
.assertion
) {
972 case RegExpPart
.AssType
.StreamStart
:
975 case RegExpPart
.AssType
.Bol
:
978 case RegExpPart
.AssType
.StreamEnd
:
981 case RegExpPart
.AssType
.NonWord
:
984 case RegExpPart
.AssType
.Word
:
987 case RegExpPart
.AssType
.Eol
:
996 fprintf(f
, "%2d. unknown", cast(int)(pc
-start
));
1002 // ////////////////////////////////////////////////////////////////////////// //
1012 // counted quantifier
1013 struct RECountedQuant
{
1036 enum AssType
: ubyte {
1064 // ////////////////////////////////////////////////////////////////////////// //
1065 void dump (const(RegExpPart
)* r
) {
1066 import core
.stdc
.stdio
: printf
;
1067 const(RERange
)* range
;
1069 case RegExpPart
.Type
.Alt
:
1076 case RegExpPart
.Type
.Cat
:
1083 case RegExpPart
.Type
.Lit
:
1084 printf("Lit(%d)", cast(int)r
.ch
);
1086 case RegExpPart
.Type
.Dot
:
1089 case RegExpPart
.Type
.DotML
:
1092 case RegExpPart
.Type
.Paren
:
1093 printf("Paren(%lu, ", cast(uint)r
.group
);
1097 case RegExpPart
.Type
.Star
:
1098 if (!r
.greedy
) printf("Ng");
1103 case RegExpPart
.Type
.Plus
:
1104 if (!r
.greedy
) printf("Ng");
1109 case RegExpPart
.Type
.Quest
:
1110 if (!r
.greedy
) printf("Ng");
1115 case RegExpPart
.Type
.Nil
:
1118 case RegExpPart
.Type
.Class
:
1120 for (range
= r
.range
; range
; range
= range
.next
) printf("[%d, %d]", range
.from
, range
.to
);
1123 case RegExpPart
.Type
.NClass
:
1125 for (range
= r
.range
; range
; range
= range
.next
) printf("[%d, %d]", range
.from
, range
.to
);
1128 case RegExpPart
.Type
.Assert
:
1130 switch (r
.assertion
) {
1131 case RegExpPart
.AssType
.StreamStart
:
1134 case RegExpPart
.AssType
.Bol
:
1137 case RegExpPart
.AssType
.Eol
:
1140 case RegExpPart
.AssType
.StreamEnd
:
1143 case RegExpPart
.AssType
.NonWord
:
1146 case RegExpPart
.AssType
.Word
:
1155 case RegExpPart
.Type
.TopLevel
:
1156 printf("TOPLEVEL(%lu, ", cast(uint)r
.regex_id
);
1167 // ////////////////////////////////////////////////////////////////////////// //
1168 private RegExpPart
* reCreate (MemPool pool
, RegExpPart
.Type type
, RegExpPart
* left
, RegExpPart
* right
) {
1169 auto r
= pool
.alloc
!RegExpPart
;
1170 if (r
is null) return null;
1178 // turn range into case-insensitive one
1179 private RERange
* range2CI (MemPool pool
, RERange
* range
) {
1180 static T
max(T
) (T a
, T b
) pure nothrow @safe @nogc { pragma(inline
, true); return (a
> b ? a
: b
); }
1181 static T
min(T
) (T a
, T b
) pure nothrow @safe @nogc { pragma(inline
, true); return (a
< b ? a
: b
); }
1183 char from
= 0, to
= 0;
1185 for (r
= range
; r
; r
= r
.next
) {
1188 if (to
>= 'A' && from
<= 'Z') {
1190 nr
= pool
.alloc
!RERange
;
1191 if (nr
is null) return null;
1192 nr
.from
= cast(char)(max(from
, 'A')+32);
1193 nr
.to
= cast(char)(min(to
, 'Z')+32);
1198 if (to
>= 'a' && from
<= 'z') {
1199 /* overlap with a-z */
1200 nr
= pool
.alloc
!RERange
;
1201 if (nr
is null) return null;
1202 nr
.from
= cast(char)(max(from
, 'a')-32);
1203 nr
.to
= cast(char)(min(to
, 'z')-32);
1213 private RegExpPart
* lowerCountedRep (MemPool pool
, RegExpPart
* subj
, ref RECountedQuant cquant
, bool greedy
) {
1215 RegExpPart
* concat
, quest
, star
;
1217 if (cquant
.from
== 1 && cquant
.to
== 1) return subj
;
1219 // generate subj{from} first
1220 if (cquant
.from
== 0) {
1221 concat
= pool
.reCreate(RegExpPart
.Type
.Nil
, null, null);
1222 if (concat
is null) return null;
1226 for (i
= 1; i
< cquant
.from
; ++i
) {
1227 concat
= pool
.reCreate(RegExpPart
.Type
.Cat
, concat
, subj
);
1228 if (concat
is null) return null;
1232 if (cquant
.from
== cquant
.to
) return concat
;
1234 if (cquant
.to
== -1) {
1235 // append subj* to concat
1236 star
= pool
.reCreate(RegExpPart
.Type
.Star
, subj
, null);
1237 if (star
is null) return null;
1238 star
.greedy
= greedy
;
1239 concat
= pool
.reCreate(RegExpPart
.Type
.Cat
, concat
, star
);
1240 if (concat
is null) return null;
1244 // append (?:subj?){to-from}
1245 quest
= pool
.reCreate(RegExpPart
.Type
.Quest
, subj
, null);
1246 if (quest
is null) return null;
1247 quest
.greedy
= greedy
;
1248 for (; i
< cquant
.to
; ++i
) {
1249 concat
= pool
.reCreate(RegExpPart
.Type
.Cat
, concat
, quest
);
1250 if (concat
is null) return null;
1256 // ////////////////////////////////////////////////////////////////////////// //
1257 Program
* reCompile (MemPool pool
, RegExpPart
* re
) {
1258 import core
.stdc
.string
: memcpy
, memset
;
1260 uint i
, n
, multi_ncaps_size
;
1267 multi_ncaps_size
= (re
.nregexes
-1)*cast(int)uint.sizeof
;
1269 p
= pool
.alloc
!char(cast(uint)Program
.sizeof
+multi_ncaps_size
+n
*cast(uint)VMInstr
.sizeof
);
1270 if (p
is null) return null;
1272 prog
= cast(Program
*)p
;
1274 prog
.nregexes
= re
.nregexes
;
1276 memcpy(prog
.multi_ncaps
.ptr
, re
.multi_ncaps
, re
.nregexes
*uint.sizeof
);
1278 prog
.start
= cast(VMInstr
*)(p
+Program
.sizeof
+multi_ncaps_size
);
1280 memset(prog
.start
, 0, n
*VMInstr
.sizeof
);
1282 pc
= emit(pool
, prog
.start
, re
);
1283 if (pc
is null) return null;
1285 if (pc
-prog
.start
!= n
) {
1286 //dd("buffer error: %d != %d", (int)(pc-prog.start), (int)n);
1290 prog
.len
= cast(uint)(pc
-prog
.start
);
1292 prog
.lookahead_asserts
= 0;
1293 prog
.dup_threads
= 0;
1294 prog
.uniq_threads
= 0;
1296 prog
.leading_bytes
= null;
1297 prog
.leading_byte
= -1;
1300 for (i
= 0; i
< prog
.nregexes
; ++i
) prog
.ovecsize
+= prog
.multi_ncaps
[i
]+1;
1301 prog
.ovecsize
*= 2*uint.sizeof
;
1303 if (getLeadingBytes(pool
, prog
, &prog
.leading_bytes
) == SRes
.Error
) return null;
1305 if (prog
.leading_bytes
&& prog
.leading_bytes
.next
is null) {
1306 pc
= cast(VMInstr
*)prog
.leading_bytes
.data
;
1307 if (pc
.opcode
== Opcode
.Char
) prog
.leading_byte
= pc
.ch
;
1310 //dd("nullable: %u", prog.nullable);
1314 for (cl
= prog
.leading_bytes
; cl
; cl
= cl
.next
) {
1316 fprintf(stderr
, "[");
1317 dump(stderr
, pc
, prog
.start
);
1318 fprintf(stderr
, "]");
1320 if (prog
.leading_bytes
) fprintf(stderr
, "\n");
1327 private int getLeadingBytes (MemPool pool
, Program
* prog
, Chain
** res
) {
1328 uint tag
= prog
.tag
+1;
1329 int rc
= getLeadingBytesHelper(pool
, prog
.start
, prog
, res
, tag
);
1331 if (rc
== SRes
.Error
) return SRes
.Error
;
1332 if (rc
== SRes
.NoMatch || prog
.nullable
) {
1334 return SRes
.NoMatch
;
1340 private int getLeadingBytesHelper (MemPool pool
, VMInstr
* pc
, Program
* prog
, Chain
** res
, uint tag
) {
1345 if (pc
.tag
== tag
) return SRes
.Ok
;
1347 if (pc
== prog
.start
+1) {
1348 // skip the dot (.) in the initial boilerplate ".*?"
1354 switch (pc
.opcode
) {
1356 rc
= getLeadingBytesHelper(pool
, pc
.x
, prog
, res
, tag
);
1357 if (rc
!= SRes
.Ok
) return rc
;
1358 return getLeadingBytesHelper(pool
, pc
.y
, prog
, res
, tag
);
1360 return getLeadingBytesHelper(pool
, pc
.x
, prog
, res
, tag
);
1362 if (++pc
== prog
.start
+prog
.len
) return SRes
.Ok
;
1363 return getLeadingBytesHelper(pool
, pc
, prog
, res
, tag
);
1368 if (++pc
== prog
.start
+prog
.len
) return SRes
.Ok
;
1369 return getLeadingBytesHelper(pool
, pc
, prog
, res
, tag
);
1372 return SRes
.NoMatch
;
1374 /* CHAR, ANY, IN, NOTIN */
1375 ncl
= pool
.alloc
!Chain
;
1376 if (ncl
is null) return SRes
.Error
;
1380 for (cl
= *res
; /* void */; cl
= cl
.next
) {
1381 bc
= cast(VMInstr
*)cl
.data
;
1382 if (bc
.opcode
== pc
.opcode
) {
1383 if (bc
.opcode
== Opcode
.Char
) {
1384 if (bc
.ch
== pc
.ch
) return SRes
.Ok
;
1387 if (cl
.next
is null) {
1400 private uint prgLength (RegExpPart
* r
) {
1401 //dd("program len on node: %d", (int)r.type);
1402 final switch (r
.type
) {
1403 case RegExpPart
.Type
.Alt
:
1404 return 2+r
.left
.prgLength
+r
.right
.prgLength
;
1405 case RegExpPart
.Type
.Cat
:
1406 return r
.left
.prgLength
+r
.right
.prgLength
;
1407 case RegExpPart
.Type
.Lit
:
1408 case RegExpPart
.Type
.Dot
:
1409 case RegExpPart
.Type
.DotML
:
1410 case RegExpPart
.Type
.Class
:
1411 case RegExpPart
.Type
.NClass
:
1413 case RegExpPart
.Type
.Paren
:
1414 return 2+r
.left
.prgLength
;
1415 case RegExpPart
.Type
.Quest
:
1416 return 1+r
.left
.prgLength
;
1417 case RegExpPart
.Type
.Star
:
1418 return 2+r
.left
.prgLength
;
1419 case RegExpPart
.Type
.Plus
:
1420 return 1+r
.left
.prgLength
;
1421 case RegExpPart
.Type
.Assert
:
1423 case RegExpPart
.Type
.TopLevel
:
1424 return 1+r
.left
.prgLength
;
1425 case RegExpPart
.Type
.Nil
:
1426 // impossible to reach here
1432 private VMInstr
* emit (MemPool pool
, VMInstr
* pc
, RegExpPart
* r
) {
1434 //dd("program emit bytecode on node: %d", (int)r.type);
1436 case RegExpPart
.Type
.Alt
:
1438 pc
.opcode
= Opcode
.Split
;
1441 pc
= emit(pool
, pc
, r
.left
);
1442 if (pc
is null) return null;
1444 pc
.opcode
= Opcode
.Jump
;
1447 pc
= emit(pool
, pc
, r
.right
);
1448 if (pc
is null) return null;
1451 case RegExpPart
.Type
.Cat
:
1453 pc
= emit(pool
, pc
, r
.left
);
1454 if (pc
is null) return null;
1456 pc
= emit(pool
, pc
, r
.right
);
1457 if (pc
is null) return null;
1459 case RegExpPart
.Type
.Lit
:
1460 pc
.opcode
= Opcode
.Char
;
1464 case RegExpPart
.Type
.Class
:
1465 pc
.opcode
= Opcode
.In
;
1466 if (addCharClass(pool
, pc
, r
.range
) != SRes
.Ok
) return null;
1469 case RegExpPart
.Type
.NClass
:
1470 pc
.opcode
= Opcode
.NotIn
;
1471 if (addCharClass(pool
, pc
, r
.range
) != SRes
.Ok
) return null;
1474 case RegExpPart
.Type
.Dot
:
1475 pc
.opcode
= Opcode
.Any
;
1478 case RegExpPart
.Type
.DotML
:
1479 pc
.opcode
= Opcode
.AnyML
;
1482 case RegExpPart
.Type
.Paren
:
1484 pc
.opcode
= Opcode
.Save
;
1485 pc
.group
= 2*r
.group
;
1487 pc
= emit(pool
, pc
, r
.left
);
1488 if (pc
is null) return null;
1489 pc
.opcode
= Opcode
.Save
;
1490 pc
.group
= 2*r
.group
+1;
1493 case RegExpPart
.Type
.Quest
:
1495 pc
.opcode
= Opcode
.Split
;
1498 pc
= emit(pool
, pc
, r
.left
);
1499 if (pc
is null) return null;
1508 case RegExpPart
.Type
.Star
:
1510 pc
.opcode
= Opcode
.Split
;
1513 pc
= emit(pool
, pc
, r
.left
);
1514 if (pc
is null) return null;
1516 pc
.opcode
= Opcode
.Jump
;
1527 case RegExpPart
.Type
.Plus
:
1530 pc
= emit(pool
, pc
, r
.left
);
1531 if (pc
is null) return null;
1533 pc
.opcode
= Opcode
.Split
;
1545 case RegExpPart
.Type
.Assert
:
1546 pc
.opcode
= Opcode
.Assert
;
1547 pc
.assertion
= r
.assertion
;
1550 case RegExpPart
.Type
.TopLevel
:
1551 pc
= emit(pool
, pc
, r
.left
);
1552 if (pc
is null) return null;
1553 pc
.opcode
= Opcode
.Match
;
1554 //dd("setting regex id %ld", (long) r.regex_id);
1555 pc
.regex_id
= r
.regex_id
;
1558 case RegExpPart
.Type
.Nil
:
1562 /* impossible to reach here */
1569 private int addCharClass (MemPool pool
, VMInstr
* pc
, RERange
* range
) {
1575 for (r
= range
; r
; r
= r
.next
) ++n
;
1577 p
= pool
.alloc
!char(cast(uint)RangesInVM
.sizeof
+n
*cast(uint)Range2VM
.sizeof
);
1578 if (p
is null) return SRes
.Error
;
1580 pc
.ranges
= cast(RangesInVM
*)p
;
1582 p
+= RangesInVM
.sizeof
;
1583 pc
.ranges
.head
= cast(Range2VM
*)p
;
1585 pc
.ranges
.count
= n
;
1587 for (i
= 0, r
= range
; r
; i
++, r
= r
.next
) {
1588 pc
.ranges
.head
[i
].from
= r
.from
;
1589 pc
.ranges
.head
[i
].to
= r
.to
;
1596 // ////////////////////////////////////////////////////////////////////////// //
1597 /// thompson virtual machine. used to check if re matches, but can't return position.
1598 public struct Thompson
{
1603 @property bool valid () const pure nothrow @safe @nogc { pragma(inline
, true); return pool
.active
; }
1605 static Thompson
create (RegExp rex
) {
1608 ThompsonThreadList
* clist
, nlist
;
1610 if (!rex
.valid
) return Thompson
.init
;
1614 vm
.pool
= MemPool
.create
;
1615 if (!vm
.pool
.active
) return Thompson
.init
;
1617 ctx
= vm
.pool
.alloc
!ThompsonCtx
;
1618 if (ctx
is null) { vm
.pool
.release
; return Thompson
.init
; }
1619 scope(exit
) ctx
.pool
.release
; // fixup rc
1622 assert(vm
.pool
.impl
.rc
== 2);
1623 ctx
.program
= rex
.reprg
;
1625 len
= rex
.reprg
.len
;
1627 clist
= createThreadList(vm
.pool
, len
);
1628 if (clist
is null) { ctx
.pool
.release
; vm
.pool
.release
; return Thompson
.init
; }
1630 ctx
.current_threads
= clist
;
1632 nlist
= createThreadList(vm
.pool
, len
);
1633 if (nlist
is null) { ctx
.pool
.release
; vm
.pool
.release
; return Thompson
.init
; }
1635 ctx
.next_threads
= nlist
;
1637 ctx
.tag
= rex
.reprg
.tag
+1;
1638 ctx
.first_buf
= true;
1639 assert(vm
.pool
.impl
.rc
== 2);
1644 int exec (const(char)[] input
, bool eof
=true) {
1645 if (!pool
.active
) return SRes
.Error
;
1646 auto ctx
= cast(ThompsonCtx
*)pool
.firstalloc
;
1648 scope(exit
) ctx
.pool
= MemPool
.init
; // fixup rc
1649 static if (input
.length
.sizeof
> 4) {
1650 if (input
.length
> int.max
) return SRes
.Error
;
1652 return execute(ctx
, input
.ptr
, cast(uint)input
.length
, eof
);
1657 struct ThompsonThread
{
1659 void* asserts_handler
;
1664 struct ThompsonThreadList
{
1666 ThompsonThread
[1] threads
;
1670 struct ThompsonCtx
{
1673 const(char)* buffer
;
1675 ThompsonThreadList
* current_threads
;
1676 ThompsonThreadList
* next_threads
;
1680 bool[1] threads_added
; // bit array
1684 int execute (ThompsonCtx
* ctx
, const(char)* input
, uint size
, bool eof
) {
1685 const(char)* sp
, last
;
1692 ThompsonThreadList
* clist
, nlist
, tmp
;
1695 clist
= ctx
.current_threads
;
1696 nlist
= ctx
.next_threads
;
1699 if (ctx
.first_buf
) {
1700 ctx
.first_buf
= false;
1701 addThread(ctx
, clist
, prog
.start
, input
);
1706 for (sp
= input
; sp
< last ||
(eof
&& sp
== last
); sp
++) {
1707 //dd("=== pos %d (char %d).\n", (int)(sp-input), (sp < last) ? (*sp&0xFF) : 0);
1708 if (clist
.count
== 0) break;
1709 /* printf("%d(%02x).", (int)(sp-input), *sp&0xFF); */
1711 for (i
= 0; i
< clist
.count
; i
++) {
1712 t
= clist
.threads
.ptr
+i
;
1714 //dd("--- #%u: pc %d: opcode %d\n", ctx.tag, (int)(pc-prog.start), pc.opcode);
1715 switch (pc
.opcode
) {
1717 if (sp
== last
) break;
1719 for (j
= 0; j
< pc
.ranges
.count
; j
++) {
1720 range
= pc
.ranges
.head
+j
;
1721 //dd("testing %d for [%d, %d] (%u)", *sp, (int)range.from, (int)range.to, (unsigned) j);
1722 if (*sp
>= range
.from
&& *sp
<= range
.to
) {
1728 addThread(ctx
, nlist
, pc
+1, sp
+1);
1731 if (sp
== last
) break;
1733 for (j
= 0; j
< pc
.ranges
.count
; j
++) {
1734 range
= pc
.ranges
.head
+j
;
1735 //dd("testing %d for [%d, %d] (%u)", *sp, (int)range.from, (int)range.to, (unsigned) j);
1736 if (*sp
>= range
.from
&& *sp
<= range
.to
) {
1742 addThread(ctx
, nlist
, pc
+1, sp
+1);
1745 if (sp
== last ||
*sp
!= pc
.ch
) break;
1746 addThread(ctx
, nlist
, pc
+1, sp
+1);
1749 if (sp
== last
) break;
1750 addThread(ctx
, nlist
, pc
+1, sp
+1);
1753 if (sp
== last ||
*sp
== '\n') break;
1754 addThread(ctx
, nlist
, pc
+1, sp
+1);
1757 switch (pc
.assertion
) {
1758 case RegExpPart
.AssType
.StreamEnd
:
1759 if (sp
!= last
) break;
1760 goto assertion_hold
;
1761 case RegExpPart
.AssType
.Eol
:
1762 if (sp
!= last
&& *sp
!= '\n') break;
1763 goto assertion_hold
;
1764 case RegExpPart
.AssType
.NonWord
:
1765 if (cast(ubyte)t
.seen_word ^
cast(ubyte)(sp
!= last
&& isWordChar(*sp
))) {
1766 //dd("\\B assertion failed: %u %c", t.seen_word, *sp);
1769 //dd("\\B assertion passed: %u %c", t.seen_word, *sp);
1770 goto assertion_hold
;
1771 case RegExpPart
.AssType
.Word
:
1772 //dd("seen word: %d, sp == last: %d, char=%d", t.seen_word, sp == last, sp == last ? 0 : *sp);
1773 if ((cast(ubyte)t
.seen_word ^
cast(ubyte)(sp
!= last
&& isWordChar(*sp
))) == 0) {
1774 //dd("\\b assertion failed: %u %c, cur is word: %d, " "pc=%d", (int)t.seen_word, sp == last ? 0 : *sp, sp != last && isWordChar(*sp), (int)(pc-ctx.program.start));
1777 //dd("\\b assertion passed: %u %c", (int)t.seen_word, sp != last ? *sp : 0);
1778 goto assertion_hold
;
1780 // impossible to reach here
1786 addThread(ctx
, clist
, pc
+1, sp
);
1794 * Jmp, Split, Save handled in addthread, so that
1795 * machine execution matches what a backtracker would do.
1796 * This is discussed (but not shown as code) in
1797 * Regular Expression Matching: the Virtual Machine Approach.
1807 if (sp
== last
) break;
1812 ctx
.current_threads
= clist
;
1813 ctx
.next_threads
= nlist
;
1815 if (eof
) return SRes
.NoMatch
;
1821 private void addThread (ThompsonCtx
* ctx
, ThompsonThreadList
* l
, VMInstr
* pc
, const(char)* sp
) {
1822 bool seen_word
= false;
1825 if (pc
.tag
== ctx
.tag
) return; // already on list
1829 switch (pc
.opcode
) {
1831 addThread(ctx
, l
, pc
.x
, sp
);
1834 addThread(ctx
, l
, pc
.x
, sp
);
1835 addThread(ctx
, l
, pc
.y
, sp
);
1838 addThread(ctx
, l
, pc
+1, sp
);
1841 switch (pc
.assertion
) {
1842 case RegExpPart
.AssType
.StreamStart
:
1843 if (sp
!= ctx
.buffer
) {
1844 //dd("\\A assertion failed: %d", (int)(sp-ctx.buffer));
1847 addThread(ctx
, l
, pc
+1, sp
);
1849 case RegExpPart
.AssType
.Bol
:
1850 if (sp
!= ctx
.buffer
&& sp
[-1] != '\n') return;
1851 addThread(ctx
, l
, pc
+1, sp
);
1853 case RegExpPart
.AssType
.Word
:
1854 case RegExpPart
.AssType
.NonWord
:
1855 seen_word
= (sp
!= ctx
.buffer
&& isWordChar(sp
[-1]));
1856 //dd("pc=%d, setting seen word: %u %c", (int)(pc-ctx.program.start), (int)seen_word, (sp != ctx.buffer) ? sp[-1] : 0);
1859 // postpone look-ahead assertions
1867 t
= l
.threads
.ptr
+l
.count
;
1869 t
.seen_word
= seen_word
;
1875 ThompsonThreadList
* createThreadList (MemPool pool
, uint size
) {
1876 auto l
= pool
.alloc
!ThompsonThreadList((size
-1)*cast(uint)ThompsonThread
.sizeof
);
1877 if (l
is null) return null;
1883 // ////////////////////////////////////////////////////////////////////////// //
1884 /// pike virtual machine. can return captures.
1885 public struct Pike
{
1887 align(1) static struct Capture
{
1889 int s
, e
; // starting and ending indicies
1890 @property bool empty () const pure nothrow @safe @nogc { pragma(inline
, true); return (s
< 0 || e
< 0 || s
>= e
); }
1897 @property bool valid () const pure nothrow @safe @nogc { pragma(inline
, true); return pool
.active
; }
1899 static Pike
create (RegExp rex
, Capture
[] ovector
) {
1901 PikeThreadList
* clist
, nlist
;
1903 if (!rex
.valid
) return Pike
.init
;
1907 vm
.pool
= MemPool
.create
;
1908 if (!vm
.pool
.active
) return Pike
.init
;
1910 ctx
= vm
.pool
.alloc
!PikeCtx
;
1911 if (ctx
is null) { vm
.pool
.release
; return Pike
.init
; }
1914 ctx
.program
= rex
.reprg
;
1915 ctx
.processed_bytes
= 0;
1916 ctx
.pending_ovector
= null;
1917 scope(exit
) ctx
.pool
.release
; // fixup rc
1919 clist
= treadListCreate(vm
.pool
);
1920 if (clist
is null) { ctx
.pool
.release
; vm
.pool
.release
; return Pike
.init
; }
1922 ctx
.current_threads
= clist
;
1924 nlist
= treadListCreate(vm
.pool
);
1925 if (nlist
is null) { ctx
.pool
.release
; vm
.pool
.release
; return Pike
.init
; }
1927 ctx
.next_threads
= nlist
;
1929 //ctx.program = rex.reprg;
1931 ctx
.free_capture
= null;
1932 ctx
.free_threads
= null;
1935 if (ovector
.length
> 0) {
1936 ctx
.ovector
= cast(int*)ovector
.ptr
;
1937 ctx
.ovecsize
= cast(int)ovector
.length
*2;
1939 ctx
.ovector
= ctx
.pool
.alloc
!int(int.sizeof
); // 2 ints
1940 if (ctx
.ovector
is null) { ctx
.pool
.release
; vm
.pool
.release
; return Pike
.init
; }
1944 //dd("resetting seen start state");
1945 ctx
.seen_start_state
= false;
1946 ctx
.initial_states_count
= 0;
1947 ctx
.initial_states
= null;
1948 ctx
.first_buf
= true;
1950 ctx
.empty_capture
= false;
1951 ctx
.seen_newline
= false;
1952 ctx
.seen_word
= false;
1957 int exec (const(char)[] input
, bool eof
=true) {
1958 if (!pool
.active
) return SRes
.Error
;
1959 auto ctx
= cast(PikeCtx
*)pool
.firstalloc
;
1961 scope(exit
) ctx
.pool
.release
; // fixup rc
1962 static if (input
.length
.sizeof
> 4) {
1963 if (input
.length
> int.max
) return SRes
.Error
;
1965 return execute(ctx
, input
.ptr
, cast(uint)input
.length
, eof
, null);
1971 enum PikeFreeThreadCtxT
= "t.next = ctx.free_threads; ctx.free_threads = t;";
1976 CaptureRec
* capture
;
1982 struct PikeThreadList
{
1991 int processed_bytes
;
1992 const(char)* buffer
;
1995 CaptureRec
* matched
;
1996 CaptureRec
* free_capture
;
1997 PikeThread
* free_threads
;
1999 int* pending_ovector
;
2003 PikeThreadList
* current_threads
;
2004 PikeThreadList
*next_threads
;
2006 int last_matched_pos
; // the pos for the last (partial) match
2008 VMInstr
** initial_states
;
2009 uint initial_states_count
;
2012 bool seen_start_state
;
2020 int execute (PikeCtx
* ctx
, const(char)* input
, uint size
, bool eof
, int** pending_matched
) {
2021 const(char)* sp
, last
, p
;
2024 bool seen_word
, in_
;
2027 CaptureRec
* cap
, matched
;
2031 PikeThreadList
* clist
, nlist
, tmp
;
2032 PikeThreadList list
;
2041 clist
= ctx
.current_threads
;
2042 nlist
= ctx
.next_threads
;
2043 matched
= ctx
.matched
;
2046 ctx
.last_matched_pos
= -1;
2048 if (ctx
.empty_capture
) {
2049 //dd("found empty capture");
2050 ctx
.empty_capture
= false;
2054 return SRes
.NoMatch
;
2065 //dd("processing buffer size %d", (int)size);
2067 if (ctx
.first_buf
) {
2068 ctx
.first_buf
= false;
2070 cap
= captureCreate(pool
, prog
.ovecsize
, 1, &ctx
.free_capture
);
2071 if (cap
is null) return SRes
.Error
;
2073 ctx
.tag
= prog
.tag
+1;
2074 rc
= addThread(ctx
, clist
, prog
.start
, cap
, cast(int)(sp
-input
), null);
2075 if (rc
!= SRes
.Ok
) {
2080 ctx
.initial_states_count
= clist
.count
;
2081 ctx
.initial_states
= pool
.alloc
!(VMInstr
*)(cast(uint)(VMInstr
*).sizeof
*clist
.count
); //k8: -1 is safe here
2082 if (ctx
.initial_states
is null) return SRes
.Error
;
2084 /* we skip the last thread because it must always be .*? */
2085 for (i
= 0, t
= clist
.head
; t
&& t
.next
; i
++, t
= t
.next
) {
2086 ctx
.initial_states
[i
] = t
.pc
;
2092 for (; sp
< last ||
(eof
&& sp
== last
); sp
++) {
2093 //dd("=== pos %d, offset %d (char '%c' (%d)).\n", (int)(sp-input+ctx.processed_bytes), (int)(sp-input), sp < last ? *sp : '?', sp < last ? *sp : 0);
2094 if (clist
.head
is null) {
2095 //dd("clist empty. abort.");
2100 fprintf(stderr
, "sregex: cur list:");
2101 for (t
= clist
.head
; t
; t
= t
.next
) fprintf(stderr
, " %d", cast(int)(t
.pc
-prog
.start
));
2102 fprintf(stderr
, "\n");
2105 //dd("seen start state: %d", (int)ctx.seen_start_state);
2107 if (prog
.leading_bytes
&& ctx
.seen_start_state
) {
2108 //dd("resetting seen start state");
2109 ctx
.seen_start_state
= false;
2111 if (sp
== last || clist
.count
!= ctx
.initial_states_count
) {
2112 //dd("skip because sp == last or clist.count != initial states count!");
2113 goto run_cur_threads
;
2116 for (i
= 0, t
= clist
.head
; t
&& t
.next
; i
++, t
= t
.next
) {
2117 if (t
.pc
!= ctx
.initial_states
[i
]) {
2118 //dd("skip because pc %d unmatched: %d != %d", (int)i, (int)(t.pc-prog.start), (int)(ctx.initial_states[i]-prog.start));
2119 goto run_cur_threads
;
2123 //dd("XXX found initial state to do first byte search!");
2124 p
= findFirstByte(sp
, last
, prog
.leading_byte
, prog
.leading_bytes
);
2127 //dd("XXX moved sp by %d bytes", (int)(p-sp));
2129 clearThreadList(ctx
, clist
);
2130 cap
= captureCreate(pool
, prog
.ovecsize
, 1, &ctx
.free_capture
);
2131 if (cap
is null) return SRes
.Error
;
2133 rc
= addThread(ctx
, clist
, prog
.start
, cap
, cast(int)(sp
-input
), null);
2134 if (rc
!= SRes
.Ok
) {
2138 if (sp
== last
) break;
2144 while (clist
.head
) {
2146 clist
.head
= t
.next
;
2153 fprintf(stderr, "--- #%u", ctx.tag);
2154 dump(stderr, pc, prog.start);
2155 fprintf(stderr, "\n");
2158 switch (pc
.opcode
) {
2165 for (i
= 0; i
< pc
.ranges
.count
; i
++) {
2166 range
= pc
.ranges
.head
+i
;
2167 //dd("testing %d for [%d, %d] (%u)", *sp, (int)range.from, (int)range.to, (unsigned) i);
2168 if (*sp
>= range
.from
&& *sp
<= range
.to
) {
2177 rc
= addThread(ctx
, nlist
, pc
+1, cap
, cast(int)(sp
-input
+1), &cap
);
2178 if (rc
== SRes
.Done
) goto matched
;
2179 if (rc
!= SRes
.Ok
) {
2190 for (i
= 0; i
< pc
.ranges
.count
; i
++) {
2191 range
= pc
.ranges
.head
+i
;
2192 //dd("testing %d for [%d, %d] (%u)", *sp, (int)range.from, (int)range.to, (unsigned) i);
2193 if (*sp
>= range
.from
&& *sp
<= range
.to
) {
2202 rc
= addThread(ctx
, nlist
, pc
+1, cap
, cast(int)(sp
-input
+1), &cap
);
2203 if (rc
== SRes
.Done
) goto matched
;
2204 if (rc
!= SRes
.Ok
) {
2210 //dd("matching char '%c' (%d) against %d", sp != last ? *sp : '?', sp != last ? *sp : 0, pc.ch);
2211 if (sp
== last ||
*sp
!= pc
.ch
) {
2215 rc
= addThread(ctx
, nlist
, pc
+1, cap
, cast(int)(sp
-input
+1), &cap
);
2216 if (rc
== SRes
.Done
) goto matched
;
2217 if (rc
!= SRes
.Ok
) {
2223 if (sp
== last ||
*sp
== '\n') {
2233 rc
= addThread(ctx
, nlist
, pc
+1, cap
, cast(int)(sp
-input
+1), &cap
);
2234 if (rc
== SRes
.Done
) goto matched
;
2235 if (rc
!= SRes
.Ok
) {
2241 switch (pc
.assertion
) {
2242 case RegExpPart
.AssType
.StreamEnd
:
2243 if (sp
!= last
) break;
2244 goto assertion_hold
;
2245 case RegExpPart
.AssType
.Eol
:
2246 if (sp
!= last
&& *sp
!= '\n') break;
2247 //dd("dollar $ assertion hold: pos=%d", (int)(sp-input+ctx.processed_bytes));
2248 goto assertion_hold
;
2249 case RegExpPart
.AssType
.NonWord
:
2250 seen_word
= (t
.seen_word ||
(sp
== input
&& ctx
.seen_word
));
2251 if (cast(ubyte)seen_word ^
cast(ubyte)(sp
!= last
&& isWordChar(*sp
))) break;
2252 //dd("\\B assertion passed: %u %c", t.seen_word, *sp);
2253 goto assertion_hold
;
2254 case RegExpPart
.AssType
.Word
:
2255 seen_word
= (t
.seen_word ||
(sp
== input
&& ctx
.seen_word
));
2256 if ((cast(ubyte)seen_word ^
cast(ubyte)(sp
!= last
&& isWordChar(*sp
))) == 0) break;
2257 goto assertion_hold
;
2259 /* impossible to reach here */
2267 rc
= addThread(ctx
, &list
, pc
+1, cap
, cast(int)(sp
-input
), null);
2268 if (rc
!= SRes
.Ok
) {
2269 prog
.tag
= ctx
.tag
+1;
2273 *list
.next
= clist
.head
;
2274 clist
.head
= list
.head
;
2275 clist
.count
+= list
.count
;
2278 //dd("sp+1 == last: %d, eof: %u", sp+1 == last, eof);
2281 ctx
.last_matched_pos
= cap
.vector
[1];
2282 cap
.regex_id
= pc
.regex_id
;
2285 //dd("discarding match: ");
2286 ctx
.decref(matched
);
2288 //dd("set matched, regex id: %d", (int)pc.regex_id);
2290 //PikeFreeThreadCtxT(ctx, t);
2291 mixin(PikeFreeThreadCtxT
);
2292 clearThreadList(ctx
, clist
);
2295 * Jmp, Split, Save handled in addthread, so that
2296 * machine execution matches what a backtracker would do.
2297 * This is discussed (but not shown as code) in
2298 * Regular Expression Matching: the Virtual Machine Approach.
2301 /* impossible to reach here */
2304 //PikeFreeThreadCtxT(ctx, t);
2305 mixin(PikeFreeThreadCtxT
);
2311 if (nlist
.head
) clearThreadList(ctx
, nlist
);
2312 if (sp
== last
) break;
2315 //dd("matched: %p, clist: %p, pos: %d", matched, clist.head, (int)(ctx.processed_bytes+(sp-input)));
2317 if (ctx
.last_matched_pos
>= 0) {
2318 p
= input
+ctx
.last_matched_pos
-ctx
.processed_bytes
;
2320 //dd("diff: %d", (int)(ctx.last_matched_pos-ctx.processed_bytes));
2321 //dd("p=%p, input=%p", p, ctx.buffer);
2322 ctx
.seen_newline
= (p
[-1] == '\n');
2323 ctx
.seen_word
= isWordChar(p
[-1]);
2324 //dd("set seen newline: %u", ctx.seen_newline);
2325 //dd("set seen word: %u", ctx.seen_word);
2327 ctx
.last_matched_pos
= -1;
2331 ctx
.current_threads
= clist
;
2332 ctx
.next_threads
= nlist
;
2335 if (eof || clist
.head
is null) {
2336 if (prepareMatchedCaptures(ctx
, matched
, ctx
.ovector
, ctx
.ovecsize
, true) != SRes
.Ok
) return SRes
.Error
;
2339 *clist
.next
= ctx
.free_threads
;
2340 ctx
.free_threads
= clist
.head
;
2346 ctx
.processed_bytes
= ctx
.ovector
[1];
2347 ctx
.empty_capture
= (ctx
.ovector
[0] == ctx
.ovector
[1]);
2350 ctx
.first_buf
= true;
2352 rc
= matched
.regex_id
;
2353 ctx
.decref(matched
);
2355 //dd("set empty capture: %u", ctx.empty_capture);
2360 //dd("clist head cap == matched: %d", clist.head.capture == matched);
2362 if (pending_matched
) {
2363 if (ctx
.pending_ovector
is null) {
2364 ctx
.pending_ovector
= pool
.alloc
!int(2*cast(uint)int.sizeof
); //k8: -1 is safe here
2365 if (ctx
.pending_ovector
is null) return SRes
.Error
;
2367 *pending_matched
= ctx
.pending_ovector
;
2368 if (prepareMatchedCaptures(ctx
, matched
, *pending_matched
, 2, false) != SRes
.Ok
) return SRes
.Error
;
2374 return SRes
.NoMatch
;
2376 if (pending_matched
) *pending_matched
= null;
2379 ctx
.processed_bytes
+= cast(int)(sp
-input
);
2381 //dd("processed bytes: %u", (unsigned) ctx.processed_bytes);
2383 ctx
.matched
= matched
;
2385 prepareTempCaptures(prog
, ctx
);
2391 private void prepareTempCaptures (Program
* prog
, PikeCtx
* ctx
) {
2397 ctx
.ovector
[0] = -1;
2398 ctx
.ovector
[1] = -1;
2400 for (t
= ctx
.current_threads
.head
; t
; t
= t
.next
) {
2403 foreach (int i
; 0..prog
.nregexes
) {
2405 b
= cap
.vector
[ofs
+0];
2406 //dd("%d: %d . %d", (int)0, (int)b, (int)a);
2407 if (b
!= -1 && (a
== -1 || b
< a
)) {
2408 //dd("setting group %d to %d", (int)0, (int)cap.vector[0]);
2411 a
= ctx
.ovector
[0+1];
2412 b
= cap
.vector
[0+1];
2413 //dd("%d: %d . %d", (int)(0+1), (int)b, (int)a);
2414 if (b
!= -1 && (a
== -1 || b
> a
)) {
2415 //dd("setting group %d to %d", (int)(0+1), (int)cap.vector[0+1]);
2416 ctx
.ovector
[0+1] = b
;
2418 ofs
+= 2*(prog
.multi_ncaps
[i
]+1);
2424 private PikeThreadList
* treadListCreate (MemPool pool
) {
2425 auto l
= pool
.alloc
!PikeThreadList
;
2426 if (l
is null) return null;
2434 private int addThread (PikeCtx
* ctx
, PikeThreadList
* l
, VMInstr
* pc
, CaptureRec
* capture
, int pos
, CaptureRec
** pcap
) {
2438 bool seen_word
= false;
2440 if (pc
.tag
== ctx
.tag
) {
2441 //dd("pc %d: already on list: %d", (int)(pc-ctx.program.start), pc.tag);
2442 if (pc
.opcode
== Opcode
.Split
) {
2443 if (pc
.y
.tag
!= ctx
.tag
) {
2444 if (pc
== ctx
.program
.start
) {
2445 //dd("setting seen start state");
2446 ctx
.seen_start_state
= true;
2448 return addThread(ctx
, l
, pc
.y
, capture
, pos
, pcap
);
2454 //dd("adding thread: pc %d, bytecode %d", (int)(pc-ctx.program.start), pc.opcode);
2458 switch (pc
.opcode
) {
2460 return addThread(ctx
, l
, pc
.x
, capture
, pos
, pcap
);
2463 if (pc
== ctx
.program
.start
) {
2464 //dd("setting seen start state");
2465 ctx
.seen_start_state
= true;
2470 rc
= addThread(ctx
, l
, pc
.x
, capture
, pos
, pcap
);
2471 if (rc
!= SRes
.Ok
) {
2476 return addThread(ctx
, l
, pc
.y
, capture
, pos
, pcap
);
2479 //dd("save %u: processed bytes: %u, pos: %u", (unsigned) pc.group, (unsigned) ctx.processed_bytes, (unsigned) pos);
2480 cap
= captureUpdate(ctx
.pool
, capture
, pc
.group
, ctx
.processed_bytes
+pos
, &ctx
.free_capture
);
2481 if (cap
is null) return SRes
.Error
;
2482 return addThread(ctx
, l
, pc
+1, cap
, pos
, pcap
);
2485 switch (pc
.assertion
) {
2486 case RegExpPart
.AssType
.StreamStart
:
2487 if (pos || ctx
.processed_bytes
) break;
2488 return addThread(ctx
, l
, pc
+1, capture
, pos
, pcap
);
2490 case RegExpPart
.AssType
.Bol
:
2491 //dd("seen newline: %u", ctx.seen_newline);
2493 if (ctx
.processed_bytes
&& !ctx
.seen_newline
) break;
2495 if (ctx
.buffer
[pos
-1] != '\n') break;
2497 //dd("newline assertion hold");
2498 return addThread(ctx
, l
, pc
+1, capture
, pos
, pcap
);
2500 case RegExpPart
.AssType
.Word
:
2501 case RegExpPart
.AssType
.NonWord
:
2505 char c
= ctx
.buffer
[pos
-1];
2506 seen_word
= isWordChar(c
);
2511 /* postpone look-ahead assertions */
2517 ctx
.last_matched_pos
= capture
.vector
[1];
2518 capture
.regex_id
= pc
.regex_id
;
2523 goto default; //k8:???
2527 if (ctx
.free_threads
) {
2528 /* fprintf(stderr, "reusing free thread\n"); */
2529 t
= ctx
.free_threads
;
2530 ctx
.free_threads
= t
.next
;
2533 /* fprintf(stderr, "creating new thread\n"); */
2534 t
= ctx
.pool
.alloc
!PikeThread
;
2535 if (t
is null) return SRes
.Error
;
2539 t
.capture
= capture
;
2541 t
.seen_word
= seen_word
;
2543 if (l
.head
is null) {
2552 //dd("added thread: pc %d, bytecode %d", (int)(pc-ctx.program.start), pc.opcode);
2561 private int prepareMatchedCaptures (PikeCtx
* ctx
, CaptureRec
* matched
, int* ovector
, uint ovecsize
, bool complete
) {
2562 import core
.stdc
.string
: memcpy
, memset
;
2564 Program
* prog
= ctx
.program
;
2568 if (matched
.regex_id
>= prog
.nregexes
) {
2569 //dd("bad regex id: %ld >= %ld", (long) matched.regex_id, (long) prog.nregexes);
2573 if (ovecsize
== 0) return SRes
.Ok
;
2575 foreach (uint i
; 0..matched
.regex_id
) ofs
+= prog
.multi_ncaps
[i
]+1;
2580 len
= 2*(prog
.multi_ncaps
[matched
.regex_id
]+1);
2584 if (len
> ovecsize
) len
= ovecsize
;
2586 //dd("ncaps for regex %d: %d", (int)i, (int)prog.multi_ncaps[i]);
2587 //dd("matched captures: ofs: %d, len: %d", (int)ofs, (int)(len/sizeof(int)));
2589 memcpy(ovector
, matched
.vector
+ofs
, len
*int.sizeof
);
2591 if (!complete
) return SRes
.Ok
;
2593 if (ovecsize
> len
) memset(cast(char*)ovector
+len
*int.sizeof
, -1, (ctx
.ovecsize
-len
)*int.sizeof
);
2599 private const(char)* findFirstByte (const(char)* pos
, const(char)* last
, int leading_byte
, Chain
* leading_bytes
) {
2606 // optimize for single CHAR bc
2607 if (leading_byte
!= -1) {
2608 import core
.stdc
.string
: memchr
;
2609 pos
= cast(char*)memchr(pos
, leading_byte
, last
-pos
);
2610 if (pos
is null) return last
;
2614 for (; pos
!= last
; pos
++) {
2615 for (cl
= leading_bytes
; cl
; cl
= cl
.next
) {
2616 pc
= cast(VMInstr
*)cl
.data
;
2617 switch (pc
.opcode
) {
2619 if (*pos
== pc
.ch
) return pos
;
2622 for (i
= 0; i
< pc
.ranges
.count
; i
++) {
2623 range
= pc
.ranges
.head
+i
;
2624 if (*pos
>= range
.from
&& *pos
<= range
.to
) return pos
;
2629 for (i
= 0; i
< pc
.ranges
.count
; i
++) {
2630 range
= pc
.ranges
.head
+i
;
2631 if (*pos
>= range
.from
&& *pos
<= range
.to
) {
2636 if (!in_
) return pos
;
2649 private void clearThreadList (PikeCtx
* ctx
, PikeThreadList
* list
) {
2655 ctx
.decref(t
.capture
);
2656 //PikeFreeThreadCtxT(ctx, t);
2657 mixin(PikeFreeThreadCtxT
);
2659 assert(list
.count
== 0);
2663 void decref (PikeCtx
* ctx
, CaptureRec
* cap
) {
2664 pragma(inline
, true);
2665 if (--cap
.rc
== 0) {
2666 cap
.next
= ctx
.free_capture
;
2667 ctx
.free_capture
= cap
;
2672 private CaptureRec
* captureCreate (MemPool pool
, uint ovecsize
, uint clear
, CaptureRec
** freecap
) {
2676 //dd("reusing cap %p", *freecap);
2678 *freecap
= cap
.next
;
2682 //pragma(msg, CaptureRec.sizeof);
2683 //writeln(ovecsize);
2684 p
= pool
.alloc
!char(cast(uint)CaptureRec
.sizeof
+ovecsize
);
2685 if (p
is null) return null;
2686 cap
= cast(CaptureRec
*)p
;
2687 cap
.ovecsize
= ovecsize
;
2691 p
+= CaptureRec
.sizeof
;
2692 cap
.vector
= cast(int*)p
;
2695 import core
.stdc
.string
: memset
;
2696 memset(cap
.vector
, -1, ovecsize
);
2702 private CaptureRec
* captureUpdate (MemPool pool
, CaptureRec
* cap
, uint group
, int pos
, CaptureRec
** freecap
) {
2704 //dd("update cap %u to %d", group, pos);
2706 import core
.stdc
.string
: memcpy
;
2707 newcap
= captureCreate(pool
, cap
.ovecsize
, 0, freecap
);
2708 if (newcap
is null) return null;
2709 memcpy(newcap
.vector
, cap
.vector
, cap
.ovecsize
);
2711 //dd("!! cap %p: set group %u to %d", newcap, group, pos);
2712 newcap
.vector
[group
] = pos
;
2715 //dd("!! cap %p: set group %u to %d", cap, group, pos);
2716 cap
.vector
[group
] = pos
;