4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** The code in this file implements a compact but reasonably
14 ** efficient regular-expression matcher for posix extended regular
15 ** expressions against UTF8 text.
17 ** This file is an SQLite extension. It registers a single function
18 ** named "regexp(A,B)" where A is the regular expression and B is the
19 ** string to be matched. By registering this function, SQLite will also
20 ** then implement the "B regexp A" operator. Note that with the function
21 ** the regular expression comes first, but with the operator it comes
24 ** The following regular expression syntax is supported:
26 ** X* zero or more occurrences of X
27 ** X+ one or more occurrences of X
28 ** X? zero or one occurrences of X
29 ** X{p,q} between p and q occurrences of X
32 ** ^X X occurring at the beginning of the string
33 ** X$ X occurring at the end of the string
34 ** . Match any single character
35 ** \c Character c where c is one of \{}()[]|*+?.
36 ** \c C-language escapes for c in afnrtv. ex: \t or \n
37 ** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX
38 ** \xXX Where XX is exactly 2 hex digits, unicode value XX
39 ** [abc] Any single character from the set abc
40 ** [^abc] Any single character not in the set abc
41 ** [a-z] Any single character in the range a-z
42 ** [^a-z] Any single character not in the range a-z
44 ** \w Word character. [A-Za-z0-9_]
45 ** \W Non-word character
48 ** \s Whitespace character
49 ** \S Non-whitespace character
51 ** A nondeterministic finite automaton (NFA) is used for matching, so the
52 ** performance is bounded by O(N*M) where N is the size of the regular
53 ** expression and M is the size of the input string. The matcher never
54 ** exhibits exponential behavior. Note that the X{p,q} operator expands
55 ** to p copies of X following by q-p copies of X? and that the size of the
56 ** regular expression in the O(N*M) performance bound is computed after
61 #include "sqlite3ext.h"
62 SQLITE_EXTENSION_INIT1
65 ** The following #defines change the names of some functions implemented in
66 ** this file to prevent name collisions with C-library functions of the
69 #define re_match sqlite3re_match
70 #define re_compile sqlite3re_compile
71 #define re_free sqlite3re_free
73 /* The end-of-input character */
74 #define RE_EOF 0 /* End of input */
76 /* The NFA is implemented as sequence of opcodes taken from the following
77 ** set. Each opcode has a single integer argument.
79 #define RE_OP_MATCH 1 /* Match the one character in the argument */
80 #define RE_OP_ANY 2 /* Match any one character. (Implements ".") */
81 #define RE_OP_ANYSTAR 3 /* Special optimized version of .* */
82 #define RE_OP_FORK 4 /* Continue to both next and opcode at iArg */
83 #define RE_OP_GOTO 5 /* Jump to opcode at iArg */
84 #define RE_OP_ACCEPT 6 /* Halt and indicate a successful match */
85 #define RE_OP_CC_INC 7 /* Beginning of a [...] character class */
86 #define RE_OP_CC_EXC 8 /* Beginning of a [^...] character class */
87 #define RE_OP_CC_VALUE 9 /* Single value in a character class */
88 #define RE_OP_CC_RANGE 10 /* Range of values in a character class */
89 #define RE_OP_WORD 11 /* Perl word character [A-Za-z0-9_] */
90 #define RE_OP_NOTWORD 12 /* Not a perl word character */
91 #define RE_OP_DIGIT 13 /* digit: [0-9] */
92 #define RE_OP_NOTDIGIT 14 /* Not a digit */
93 #define RE_OP_SPACE 15 /* space: [ \t\n\r\v\f] */
94 #define RE_OP_NOTSPACE 16 /* Not a digit */
95 #define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */
97 /* Each opcode is a "state" in the NFA */
98 typedef unsigned short ReStateNumber
;
100 /* Because this is an NFA and not a DFA, multiple states can be active at
101 ** once. An instance of the following object records all active states in
102 ** the NFA. The implementation is optimized for the common case where the
103 ** number of actives states is small.
105 typedef struct ReStateSet
{
106 unsigned nState
; /* Number of current states */
107 ReStateNumber
*aState
; /* Current states */
110 /* An input string read one character at a time.
112 typedef struct ReInput ReInput
;
114 const unsigned char *z
; /* All text */
115 int i
; /* Next byte to read */
116 int mx
; /* EOF when i>=mx */
119 /* A compiled NFA (or an NFA that is in the process of being compiled) is
120 ** an instance of the following object.
122 typedef struct ReCompiled ReCompiled
;
124 ReInput sIn
; /* Regular expression text */
125 const char *zErr
; /* Error message to return */
126 char *aOp
; /* Operators for the virtual machine */
127 int *aArg
; /* Arguments to each operator */
128 unsigned (*xNextChar
)(ReInput
*); /* Next character function */
129 unsigned char zInit
[12]; /* Initial text to match */
130 int nInit
; /* Number of characters in zInit */
131 unsigned nState
; /* Number of entries in aOp[] and aArg[] */
132 unsigned nAlloc
; /* Slots allocated for aOp[] and aArg[] */
135 /* Add a state to the given state set if it is not already there */
136 static void re_add_state(ReStateSet
*pSet
, int newState
){
138 for(i
=0; i
<pSet
->nState
; i
++) if( pSet
->aState
[i
]==newState
) return;
139 pSet
->aState
[pSet
->nState
++] = (ReStateNumber
)newState
;
142 /* Extract the next unicode character from *pzIn and return it. Advance
143 ** *pzIn to the first byte past the end of the character returned. To
144 ** be clear: this routine converts utf8 to unicode. This routine is
145 ** optimized for the common case where the next character is a single byte.
147 static unsigned re_next_char(ReInput
*p
){
149 if( p
->i
>=p
->mx
) return 0;
152 if( (c
&0xe0)==0xc0 && p
->i
<p
->mx
&& (p
->z
[p
->i
]&0xc0)==0x80 ){
153 c
= (c
&0x1f)<<6 | (p
->z
[p
->i
++]&0x3f);
154 if( c
<0x80 ) c
= 0xfffd;
155 }else if( (c
&0xf0)==0xe0 && p
->i
+1<p
->mx
&& (p
->z
[p
->i
]&0xc0)==0x80
156 && (p
->z
[p
->i
+1]&0xc0)==0x80 ){
157 c
= (c
&0x0f)<<12 | ((p
->z
[p
->i
]&0x3f)<<6) | (p
->z
[p
->i
+1]&0x3f);
159 if( c
<=0x7ff || (c
>=0xd800 && c
<=0xdfff) ) c
= 0xfffd;
160 }else if( (c
&0xf8)==0xf0 && p
->i
+3<p
->mx
&& (p
->z
[p
->i
]&0xc0)==0x80
161 && (p
->z
[p
->i
+1]&0xc0)==0x80 && (p
->z
[p
->i
+2]&0xc0)==0x80 ){
162 c
= (c
&0x07)<<18 | ((p
->z
[p
->i
]&0x3f)<<12) | ((p
->z
[p
->i
+1]&0x3f)<<6)
163 | (p
->z
[p
->i
+2]&0x3f);
165 if( c
<=0xffff || c
>0x10ffff ) c
= 0xfffd;
172 static unsigned re_next_char_nocase(ReInput
*p
){
173 unsigned c
= re_next_char(p
);
174 if( c
>='A' && c
<='Z' ) c
+= 'a' - 'A';
178 /* Return true if c is a perl "word" character: [A-Za-z0-9_] */
179 static int re_word_char(int c
){
180 return (c
>='0' && c
<='9') || (c
>='a' && c
<='z')
181 || (c
>='A' && c
<='Z') || c
=='_';
184 /* Return true if c is a "digit" character: [0-9] */
185 static int re_digit_char(int c
){
186 return (c
>='0' && c
<='9');
189 /* Return true if c is a perl "space" character: [ \t\r\n\v\f] */
190 static int re_space_char(int c
){
191 return c
==' ' || c
=='\t' || c
=='\n' || c
=='\r' || c
=='\v' || c
=='\f';
194 /* Run a compiled regular expression on the zero-terminated input
195 ** string zIn[]. Return true on a match and false if there is no match.
197 static int re_match(ReCompiled
*pRe
, const unsigned char *zIn
, int nIn
){
198 ReStateSet aStateSet
[2], *pThis
, *pNext
;
199 ReStateNumber aSpace
[100];
200 ReStateNumber
*pToFree
;
202 unsigned int iSwap
= 0;
210 in
.mx
= nIn
>=0 ? nIn
: (int)strlen((char const*)zIn
);
212 /* Look for the initial prefix match, if there is one. */
214 unsigned char x
= pRe
->zInit
[0];
215 while( in
.i
+pRe
->nInit
<=in
.mx
217 strncmp((const char*)zIn
+in
.i
, (const char*)pRe
->zInit
, pRe
->nInit
)!=0)
221 if( in
.i
+pRe
->nInit
>in
.mx
) return 0;
224 if( pRe
->nState
<=(sizeof(aSpace
)/(sizeof(aSpace
[0])*2)) ){
226 aStateSet
[0].aState
= aSpace
;
228 pToFree
= sqlite3_malloc64( sizeof(ReStateNumber
)*2*pRe
->nState
);
229 if( pToFree
==0 ) return -1;
230 aStateSet
[0].aState
= pToFree
;
232 aStateSet
[1].aState
= &aStateSet
[0].aState
[pRe
->nState
];
233 pNext
= &aStateSet
[1];
235 re_add_state(pNext
, 0);
236 while( c
!=RE_EOF
&& pNext
->nState
>0 ){
238 c
= pRe
->xNextChar(&in
);
240 pNext
= &aStateSet
[iSwap
];
243 for(i
=0; i
<pThis
->nState
; i
++){
244 int x
= pThis
->aState
[i
];
245 switch( pRe
->aOp
[x
] ){
247 if( pRe
->aArg
[x
]==c
) re_add_state(pNext
, x
+1);
251 if( c
!=0 ) re_add_state(pNext
, x
+1);
255 if( re_word_char(c
) ) re_add_state(pNext
, x
+1);
258 case RE_OP_NOTWORD
: {
259 if( !re_word_char(c
) && c
!=0 ) re_add_state(pNext
, x
+1);
263 if( re_digit_char(c
) ) re_add_state(pNext
, x
+1);
266 case RE_OP_NOTDIGIT
: {
267 if( !re_digit_char(c
) && c
!=0 ) re_add_state(pNext
, x
+1);
271 if( re_space_char(c
) ) re_add_state(pNext
, x
+1);
274 case RE_OP_NOTSPACE
: {
275 if( !re_space_char(c
) && c
!=0 ) re_add_state(pNext
, x
+1);
278 case RE_OP_BOUNDARY
: {
279 if( re_word_char(c
)!=re_word_char(cPrev
) ) re_add_state(pThis
, x
+1);
282 case RE_OP_ANYSTAR
: {
283 re_add_state(pNext
, x
);
284 re_add_state(pThis
, x
+1);
288 re_add_state(pThis
, x
+pRe
->aArg
[x
]);
289 re_add_state(pThis
, x
+1);
293 re_add_state(pThis
, x
+pRe
->aArg
[x
]);
302 /* fall-through */ goto re_op_cc_inc
;
304 case RE_OP_CC_INC
: re_op_cc_inc
: {
306 int n
= pRe
->aArg
[x
];
308 for(j
=1; j
>0 && j
<n
; j
++){
309 if( pRe
->aOp
[x
+j
]==RE_OP_CC_VALUE
){
310 if( pRe
->aArg
[x
+j
]==c
){
315 if( pRe
->aArg
[x
+j
]<=c
&& pRe
->aArg
[x
+j
+1]>=c
){
323 if( pRe
->aOp
[x
]==RE_OP_CC_EXC
) hit
= !hit
;
324 if( hit
) re_add_state(pNext
, x
+n
);
330 for(i
=0; i
<pNext
->nState
; i
++){
331 if( pRe
->aOp
[pNext
->aState
[i
]]==RE_OP_ACCEPT
){ rc
= 1; break; }
334 sqlite3_free(pToFree
);
338 /* Resize the opcode and argument arrays for an RE under construction.
340 static int re_resize(ReCompiled
*p
, int N
){
343 aOp
= sqlite3_realloc64(p
->aOp
, N
*sizeof(p
->aOp
[0]));
344 if( aOp
==0 ) return 1;
346 aArg
= sqlite3_realloc64(p
->aArg
, N
*sizeof(p
->aArg
[0]));
347 if( aArg
==0 ) return 1;
353 /* Insert a new opcode and argument into an RE under construction. The
354 ** insertion point is just prior to existing opcode iBefore.
356 static int re_insert(ReCompiled
*p
, int iBefore
, int op
, int arg
){
358 if( p
->nAlloc
<=p
->nState
&& re_resize(p
, p
->nAlloc
*2) ) return 0;
359 for(i
=p
->nState
; i
>iBefore
; i
--){
360 p
->aOp
[i
] = p
->aOp
[i
-1];
361 p
->aArg
[i
] = p
->aArg
[i
-1];
364 p
->aOp
[iBefore
] = (char)op
;
365 p
->aArg
[iBefore
] = arg
;
369 /* Append a new opcode and argument to the end of the RE under construction.
371 static int re_append(ReCompiled
*p
, int op
, int arg
){
372 return re_insert(p
, p
->nState
, op
, arg
);
375 /* Make a copy of N opcodes starting at iStart onto the end of the RE
376 ** under construction.
378 static void re_copy(ReCompiled
*p
, int iStart
, int N
){
379 if( p
->nState
+N
>=p
->nAlloc
&& re_resize(p
, p
->nAlloc
*2+N
) ) return;
380 memcpy(&p
->aOp
[p
->nState
], &p
->aOp
[iStart
], N
*sizeof(p
->aOp
[0]));
381 memcpy(&p
->aArg
[p
->nState
], &p
->aArg
[iStart
], N
*sizeof(p
->aArg
[0]));
385 /* Return true if c is a hexadecimal digit character: [0-9a-fA-F]
386 ** If c is a hex digit, also set *pV = (*pV)*16 + valueof(c). If
387 ** c is not a hex digit *pV is unchanged.
389 static int re_hex(int c
, int *pV
){
390 if( c
>='0' && c
<='9' ){
392 }else if( c
>='a' && c
<='f' ){
394 }else if( c
>='A' && c
<='F' ){
399 *pV
= (*pV
)*16 + (c
& 0xff);
403 /* A backslash character has been seen, read the next character and
404 ** return its interpretation.
406 static unsigned re_esc_char(ReCompiled
*p
){
407 static const char zEsc
[] = "afnrtv\\()*.+?[$^{|}]";
408 static const char zTrans
[] = "\a\f\n\r\t\v";
411 if( p
->sIn
.i
>=p
->sIn
.mx
) return 0;
412 c
= p
->sIn
.z
[p
->sIn
.i
];
413 if( c
=='u' && p
->sIn
.i
+4<p
->sIn
.mx
){
414 const unsigned char *zIn
= p
->sIn
.z
+ p
->sIn
.i
;
415 if( re_hex(zIn
[1],&v
)
424 if( c
=='x' && p
->sIn
.i
+2<p
->sIn
.mx
){
425 const unsigned char *zIn
= p
->sIn
.z
+ p
->sIn
.i
;
426 if( re_hex(zIn
[1],&v
)
433 for(i
=0; zEsc
[i
] && zEsc
[i
]!=c
; i
++){}
435 if( i
<6 ) c
= zTrans
[i
];
438 p
->zErr
= "unknown \\ escape";
443 /* Forward declaration */
444 static const char *re_subcompile_string(ReCompiled
*);
446 /* Peek at the next byte of input */
447 static unsigned char rePeek(ReCompiled
*p
){
448 return p
->sIn
.i
<p
->sIn
.mx
? p
->sIn
.z
[p
->sIn
.i
] : 0;
451 /* Compile RE text into a sequence of opcodes. Continue up to the
452 ** first unmatched ")" character, then return. If an error is found,
453 ** return a pointer to the error message string.
455 static const char *re_subcompile_re(ReCompiled
*p
){
457 int iStart
, iEnd
, iGoto
;
459 zErr
= re_subcompile_string(p
);
460 if( zErr
) return zErr
;
461 while( rePeek(p
)=='|' ){
463 re_insert(p
, iStart
, RE_OP_FORK
, iEnd
+ 2 - iStart
);
464 iGoto
= re_append(p
, RE_OP_GOTO
, 0);
466 zErr
= re_subcompile_string(p
);
467 if( zErr
) return zErr
;
468 p
->aArg
[iGoto
] = p
->nState
- iGoto
;
473 /* Compile an element of regular expression text (anything that can be
474 ** an operand to the "|" operator). Return NULL on success or a pointer
475 ** to the error message if there is a problem.
477 static const char *re_subcompile_string(ReCompiled
*p
){
482 while( (c
= p
->xNextChar(&p
->sIn
))!=0 ){
492 zErr
= re_subcompile_re(p
);
493 if( zErr
) return zErr
;
494 if( rePeek(p
)!=')' ) return "unmatched '('";
499 if( rePeek(p
)=='*' ){
500 re_append(p
, RE_OP_ANYSTAR
, 0);
503 re_append(p
, RE_OP_ANY
, 0);
508 if( iPrev
<0 ) return "'*' without operand";
509 re_insert(p
, iPrev
, RE_OP_GOTO
, p
->nState
- iPrev
+ 1);
510 re_append(p
, RE_OP_FORK
, iPrev
- p
->nState
+ 1);
514 if( iPrev
<0 ) return "'+' without operand";
515 re_append(p
, RE_OP_FORK
, iPrev
- p
->nState
);
519 if( iPrev
<0 ) return "'?' without operand";
520 re_insert(p
, iPrev
, RE_OP_FORK
, p
->nState
- iPrev
+1);
526 if( iPrev
<0 ) return "'{m,n}' without operand";
527 while( (c
=rePeek(p
))>='0' && c
<='9' ){ m
= m
*10 + c
- '0'; p
->sIn
.i
++; }
532 while( (c
=rePeek(p
))>='0' && c
<='9' ){ n
= n
*10 + c
-'0'; p
->sIn
.i
++; }
534 if( c
!='}' ) return "unmatched '{'";
535 if( n
>0 && n
<m
) return "n less than m in '{m,n}'";
537 sz
= p
->nState
- iPrev
;
539 if( n
==0 ) return "both m and n are zero in '{m,n}'";
540 re_insert(p
, iPrev
, RE_OP_FORK
, sz
+1);
543 for(j
=1; j
<m
; j
++) re_copy(p
, iPrev
, sz
);
546 re_append(p
, RE_OP_FORK
, sz
+1);
547 re_copy(p
, iPrev
, sz
);
550 re_append(p
, RE_OP_FORK
, -sz
);
555 int iFirst
= p
->nState
;
556 if( rePeek(p
)=='^' ){
557 re_append(p
, RE_OP_CC_EXC
, 0);
560 re_append(p
, RE_OP_CC_INC
, 0);
562 while( (c
= p
->xNextChar(&p
->sIn
))!=0 ){
563 if( c
=='[' && rePeek(p
)==':' ){
564 return "POSIX character classes not supported";
566 if( c
=='\\' ) c
= re_esc_char(p
);
567 if( rePeek(p
)=='-' ){
568 re_append(p
, RE_OP_CC_RANGE
, c
);
570 c
= p
->xNextChar(&p
->sIn
);
571 if( c
=='\\' ) c
= re_esc_char(p
);
572 re_append(p
, RE_OP_CC_RANGE
, c
);
574 re_append(p
, RE_OP_CC_VALUE
, c
);
576 if( rePeek(p
)==']' ){ p
->sIn
.i
++; break; }
578 if( c
==0 ) return "unclosed '['";
579 p
->aArg
[iFirst
] = p
->nState
- iFirst
;
585 case 'b': specialOp
= RE_OP_BOUNDARY
; break;
586 case 'd': specialOp
= RE_OP_DIGIT
; break;
587 case 'D': specialOp
= RE_OP_NOTDIGIT
; break;
588 case 's': specialOp
= RE_OP_SPACE
; break;
589 case 'S': specialOp
= RE_OP_NOTSPACE
; break;
590 case 'w': specialOp
= RE_OP_WORD
; break;
591 case 'W': specialOp
= RE_OP_NOTWORD
; break;
595 re_append(p
, specialOp
, 0);
598 re_append(p
, RE_OP_MATCH
, c
);
603 re_append(p
, RE_OP_MATCH
, c
);
612 /* Free and reclaim all the memory used by a previously compiled
613 ** regular expression. Applications should invoke this routine once
614 ** for every call to re_compile() to avoid memory leaks.
616 static void re_free(ReCompiled
*pRe
){
618 sqlite3_free(pRe
->aOp
);
619 sqlite3_free(pRe
->aArg
);
625 ** Compile a textual regular expression in zIn[] into a compiled regular
626 ** expression suitable for us by re_match() and return a pointer to the
627 ** compiled regular expression in *ppRe. Return NULL on success or an
628 ** error message if something goes wrong.
630 static const char *re_compile(ReCompiled
**ppRe
, const char *zIn
, int noCase
){
636 pRe
= sqlite3_malloc( sizeof(*pRe
) );
638 return "out of memory";
640 memset(pRe
, 0, sizeof(*pRe
));
641 pRe
->xNextChar
= noCase
? re_next_char_nocase
: re_next_char
;
642 if( re_resize(pRe
, 30) ){
644 return "out of memory";
649 re_append(pRe
, RE_OP_ANYSTAR
, 0);
651 pRe
->sIn
.z
= (unsigned char*)zIn
;
653 pRe
->sIn
.mx
= (int)strlen(zIn
);
654 zErr
= re_subcompile_re(pRe
);
659 if( rePeek(pRe
)=='$' && pRe
->sIn
.i
+1>=pRe
->sIn
.mx
){
660 re_append(pRe
, RE_OP_MATCH
, RE_EOF
);
661 re_append(pRe
, RE_OP_ACCEPT
, 0);
663 }else if( pRe
->sIn
.i
>=pRe
->sIn
.mx
){
664 re_append(pRe
, RE_OP_ACCEPT
, 0);
668 return "unrecognized character";
671 /* The following is a performance optimization. If the regex begins with
672 ** ".*" (if the input regex lacks an initial "^") and afterwards there are
673 ** one or more matching characters, enter those matching characters into
674 ** zInit[]. The re_match() routine can then search ahead in the input
675 ** string looking for the initial match without having to run the whole
676 ** regex engine over the string. Do not worry able trying to match
677 ** unicode characters beyond plane 0 - those are very rare and this is
678 ** just an optimization. */
679 if( pRe
->aOp
[0]==RE_OP_ANYSTAR
&& !noCase
){
680 for(j
=0, i
=1; j
<(int)sizeof(pRe
->zInit
)-2 && pRe
->aOp
[i
]==RE_OP_MATCH
; i
++){
681 unsigned x
= pRe
->aArg
[i
];
683 pRe
->zInit
[j
++] = (unsigned char)x
;
684 }else if( x
<=0xfff ){
685 pRe
->zInit
[j
++] = (unsigned char)(0xc0 | (x
>>6));
686 pRe
->zInit
[j
++] = 0x80 | (x
&0x3f);
687 }else if( x
<=0xffff ){
688 pRe
->zInit
[j
++] = (unsigned char)(0xd0 | (x
>>12));
689 pRe
->zInit
[j
++] = 0x80 | ((x
>>6)&0x3f);
690 pRe
->zInit
[j
++] = 0x80 | (x
&0x3f);
695 if( j
>0 && pRe
->zInit
[j
-1]==0 ) j
--;
702 ** Implementation of the regexp() SQL function. This function implements
703 ** the build-in REGEXP operator. The first argument to the function is the
704 ** pattern and the second argument is the string. So, the SQL statements:
708 ** is implemented as regexp(B,A).
710 static void re_sql_func(
711 sqlite3_context
*context
,
715 ReCompiled
*pRe
; /* Compiled regular expression */
716 const char *zPattern
; /* The regular expression */
717 const unsigned char *zStr
;/* String being searched */
718 const char *zErr
; /* Compile error message */
719 int setAux
= 0; /* True to invoke sqlite3_set_auxdata() */
721 (void)argc
; /* Unused */
722 pRe
= sqlite3_get_auxdata(context
, 0);
724 zPattern
= (const char*)sqlite3_value_text(argv
[0]);
725 if( zPattern
==0 ) return;
726 zErr
= re_compile(&pRe
, zPattern
, sqlite3_user_data(context
)!=0);
729 sqlite3_result_error(context
, zErr
, -1);
733 sqlite3_result_error_nomem(context
);
738 zStr
= (const unsigned char*)sqlite3_value_text(argv
[1]);
740 sqlite3_result_int(context
, re_match(pRe
, zStr
, -1));
743 sqlite3_set_auxdata(context
, 0, pRe
, (void(*)(void*))re_free
);
748 ** Invoke this routine to register the regexp() function with the
749 ** SQLite database connection.
752 __declspec(dllexport
)
754 int sqlite3_regexp_init(
757 const sqlite3_api_routines
*pApi
760 SQLITE_EXTENSION_INIT2(pApi
);
761 (void)pzErrMsg
; /* Unused */
762 rc
= sqlite3_create_function(db
, "regexp", 2, SQLITE_UTF8
|SQLITE_INNOCUOUS
,
763 0, re_sql_func
, 0, 0);
765 /* The regexpi(PATTERN,STRING) function is a case-insensitive version
766 ** of regexp(PATTERN,STRING). */
767 rc
= sqlite3_create_function(db
, "regexpi", 2, SQLITE_UTF8
|SQLITE_INNOCUOUS
,
768 (void*)db
, re_sql_func
, 0, 0);