tgupdate: merge pcreposix-compat base into pcreposix-compat
[pcreposix-compat.git] / pcreposix.h
blobef2d0f2b658c4d0a2405d9dfa12abc7e8423b897
1 /**********************************************************
2 * Perl-Compatible Regular Expressions + POSIX *
3 ***********************************************************/
5 #ifndef _PCREPOSIX_H
6 #define _PCREPOSIX_H
8 /* This is the header for the POSIX wrapper interface to the PCRE Perl-
9 Compatible Regular Expression library. It defines the things POSIX says should
10 be there. I hope.
12 Copyright (C) 1997-2012 University of Cambridge
13 Copyright (C) 2017-2020 Kyle J. McKay <mackyle@gmail.com>
14 All Rights Reserved
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
45 /* Have to include stdlib.h or stddef.h in order to ensure that size_t is
46 ** defined, but stdlib.h has historically been used and may be more
47 ** ubiquitous and therefore more compatible so it's still used here. */
49 #include <stdlib.h>
51 /* Allow for C++ users */
53 #ifdef __cplusplus
54 extern "C" {
55 #endif
58 ** regcomp options
60 ** mostly defined by POSIX, but with some extras
63 #define REG_BASIC 0 /* BSD compatibility define */
65 #define REG_ICASE 0x0001 /* Maps to PCRE_CASELESS */
66 #define REG_NEWLINE 0x0002 /* Affects four different PCRE options */
67 #define REG_MULTILINE 0x0004 /* NOT defined by POSIX; maps to PCRE_MULTILINE */
68 #define REG_EXPANDED 0x0008 /* NOT defined by POSIX; maps to PCRE_EXTENDED */
69 #define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE_DOTALL */
70 #define REG_NOSUB 0x0020 /* Maps to PCRE_NO_AUTO_CAPTURE */
71 #define REG_UTF8 0x0040 /* NOT defined by POSIX; maps to PCRE_UTF8 */
72 #define REG_PEND 0x0080 /* BSD feature: pattern ends at re_endp addr */
73 #define REG_EXTENDED 0x0100 /* Maps to !PCRE_POSIX_BASIC_ESC */
74 #define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE_UNGREEDY */
75 #define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE_UCP */
76 #define REG_DENDONLY 0x0800 /* Maps to PCRE_DOLLAR_ENDONLY */
77 #define REG_ANCHORED 0x1000 /* Maps to PCRE_ANCHORED */
78 #define REG_NOSPEC 0x2000 /* BSD feature: treat pattern as literal string */
79 #define REG_JAVASCPT 0x4000 /* Maps to PCRE_JAVASCRIPT_COMPAT + REG_PCRE */
80 #define REG_PCRE ((int)0x8000) /* cast needed with 16-bit ints */
83 ** regexec options
85 ** mostly defined by POSIX, but with some extras
88 #define REG_NOTBOL 0x0004 /* Maps to PCRE_NOTBOL */
89 #define REG_NOTEOL 0x0008 /* Maps to PCRE_NOTEOL */
90 #define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */
91 #define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE_NOTEMPTY */
94 ** REG_PCRE and friends
97 When REG_PCRE is set in the cflags (3rd) argument to the regcomp function
98 then the following behavior occurs:
100 (The notation "!REG_EXTENDED" below means the "REG_EXTENDED" flag was NOT
101 included in the cflags argument and similarly for other "!REG_..." notation.)
103 a) All PCRE regular expression extensions are allowed. Most of them are
104 allowed in plain REG_EXTENDED mode, but only the ones that would fall into
105 the "undefined behavior" area of a POSIX regular expression. Setting the
106 REG_PCRE option allows all possible PCRE regular expressions.
108 b) Setting REG_PCRE implies REG_EXTENDED but with PCRE extensions which
109 means that PCRE_POSIC_BASIC_ESC will no longer be set for !REG_EXTENDED.
110 Do not try and use Basic Regular Expressions (BREs) with REG_PCRE! Duh.
112 c) Setting REG_PCRE makes REG_NEWLINE strictly an alias for REG_MULTILINE.
113 In other words all of the extra "macro" logic to activate the various
114 "POSIX" semantics PCRE options for !REG_NEWLINE and REG_NEWLINE is
115 disabled. This means that !REG_NEWLINE will no longer force both
116 PCRE_DOTALL and PCRE_DOLLAR_ENDONLY and REG_NEWLINE will only set
117 PCRE_MULTILINE but will no longer force PCRE_NOT_EXCLUDES_NL.
119 The following table may be helpful when using REG_PCRE:
121 PCRE & Perl Option to use for regcomp POSIX REG_PCRE/!REG_PCRE Defaults
122 ----------- ------------------------- ------ -----------------------------
123 (?i) or /i REG_ICASE Yes Off/Off
124 (?s) or /s REG_DOTALL No Off/On unless REG_NEWLINE set
125 (?m) or /m REG_MULTILINE/REG_NEWLINE No/Yes Off/Off (REG_NEWLINE => more)
126 (?x) or /x REG_EXPANDED No Off/Off
128 In !REG_PCRE (aka POSIX) mode, !REG_NEWLINE sets both PCRE_DOTALL and
129 PCRE_DOLLAR_ENDONLY whereas REG_NEWLINE sets both PCRE_MULTILINE and
130 PCRE_NOT_EXCLUDES_NL. In REG_PCRE (aka PCRE) mode, REG_NEWLINE is strictly
131 an alias for REG_MULTILINE (all magic macro logic is disabled).
133 Setting REG_JAVASCPT implies REG_PCRE. All the comments above about REG_PCRE
134 also apply to REG_JAVASCPT but REG_JAVASCPT also sets PCRE_JAVASCRIPT_COMPAT. */
136 /* Error values. Not all these are relevant or used by the wrapper. */
138 #define REG_ASSERT 1 /* internal error ? */
139 #define REG_BADBR 2 /* invalid repeat counts in {} */
140 #define REG_BADPAT 3 /* pattern error */
141 #define REG_BADRPT 4 /* ? * + invalid */
142 #define REG_EBRACE 5 /* unbalanced {} */
143 #define REG_EBRACK 6 /* unbalanced [] */
144 #define REG_ECOLLATE 7 /* collation error - not relevant */
145 #define REG_ECTYPE 8 /* bad class */
146 #define REG_EESCAPE 9 /* bad escape sequence */
147 #define REG_EMPTY 10 /* empty expression */
148 #define REG_EPAREN 11 /* unbalanced () */
149 #define REG_ERANGE 12 /* bad range inside [] */
150 #define REG_ESIZE 13 /* expression too big */
151 #define REG_ESPACE 14 /* failed to get memory */
152 #define REG_ESUBREG 15 /* bad back reference */
153 #define REG_INVARG 16 /* bad argument */
154 #define REG_NOMATCH 17 /* match failed */
156 /* The structure representing a compiled regular expression. */
158 typedef struct {
159 void *re_impl; /* keep out */
160 size_t re_nsub;
161 size_t re_erroffset;
162 const char *re_endp; /* REG_PEND only */
163 } regex_t;
165 /* The structure in which a captured offset is returned. */
167 /* POSIX requires this be a signed int type that can hold the largest value of
168 either an ssize_t or a ptrdiff_t type. A long will do nicely on all but LLP64
169 architectures where a long long is needed instead. */
171 #if defined(__LLP64__) || defined(_WIN64)
172 typedef long long regoff_t;
173 #else
174 typedef long regoff_t;
175 #endif
177 typedef struct {
178 regoff_t rm_so;
179 regoff_t rm_eo;
180 } regmatch_t;
182 /* When an application links to a PCRE DLL in Windows, the symbols that are
183 imported have to be identified as such. When building PCRE, the appropriate
184 export settings are needed, and are set in pcreposix.c before including this
185 file. */
187 #if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL)
188 # define PCREPOSIX_EXP_DECL extern __declspec(dllimport)
189 # define PCREPOSIX_EXP_DEFN __declspec(dllimport)
190 #endif
192 /* By default, we use the standard "extern" declarations. */
194 #ifndef PCREPOSIX_EXP_DECL
195 # ifdef __cplusplus
196 # define PCREPOSIX_EXP_DECL extern "C"
197 # define PCREPOSIX_EXP_DEFN extern "C"
198 # else
199 # define PCREPOSIX_EXP_DECL extern
200 # define PCREPOSIX_EXP_DEFN extern
201 # endif
202 #endif
204 /* The functions */
206 PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
207 PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
208 regmatch_t *, int);
209 PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
210 PCREPOSIX_EXP_DECL void regfree(regex_t *);
212 #ifdef __cplusplus
213 } /* extern "C" */
214 #endif
216 #endif /* End of pcreposix.h */