2 * Secret Labs' Regular Expression Engine
4 * regular expression matching engine
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
26 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
28 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
32 * Portions of this engine have been developed in cooperation with
33 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
34 * other compatibility work.
39 static char copyright
[] =
40 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
42 #define PY_SSIZE_T_CLEAN
45 #include "structmember.h" /* offsetof */
51 /* name of this module, minus the leading underscore */
52 #if !defined(SRE_MODULE)
53 #define SRE_MODULE "sre"
56 #define SRE_PY_MODULE "re"
58 /* defining this one enables tracing */
61 #if PY_VERSION_HEX >= 0x01060000
62 #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
63 /* defining this enables unicode support (default under 1.6a1 and later) */
68 /* -------------------------------------------------------------------- */
69 /* optional features */
71 /* enables fast searching */
72 #define USE_FAST_SEARCH
74 /* enables aggressive inlining (always on for Visual C) */
77 /* enables copy/deepcopy handling (work in progress) */
78 #undef USE_BUILTIN_COPY
80 #if PY_VERSION_HEX < 0x01060000
81 #define PyObject_DEL(op) PyMem_DEL((op))
84 /* -------------------------------------------------------------------- */
87 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
88 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
89 /* fastest possible local call under MSVC */
90 #define LOCAL(type) static __inline type __fastcall
91 #elif defined(USE_INLINE)
92 #define LOCAL(type) static inline type
94 #define LOCAL(type) static type
98 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
99 #define SRE_ERROR_STATE -2 /* illegal state */
100 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
101 #define SRE_ERROR_MEMORY -9 /* out of memory */
102 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
105 #define TRACE(v) printf v
110 /* -------------------------------------------------------------------- */
111 /* search engine state */
113 /* default character predicates (run sre_chars.py to regenerate tables) */
115 #define SRE_DIGIT_MASK 1
116 #define SRE_SPACE_MASK 2
117 #define SRE_LINEBREAK_MASK 4
118 #define SRE_ALNUM_MASK 8
119 #define SRE_WORD_MASK 16
121 /* FIXME: this assumes ASCII. create tables in init_sre() instead */
123 static char sre_char_info
[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
124 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
126 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
127 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
128 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
129 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
131 static char sre_char_lower
[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
132 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
133 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
134 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
135 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
136 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
137 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
138 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
139 120, 121, 122, 123, 124, 125, 126, 127 };
141 #define SRE_IS_DIGIT(ch)\
142 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
143 #define SRE_IS_SPACE(ch)\
144 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
145 #define SRE_IS_LINEBREAK(ch)\
146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
147 #define SRE_IS_ALNUM(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
149 #define SRE_IS_WORD(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
152 static unsigned int sre_lower(unsigned int ch
)
154 return ((ch
) < 128 ? (unsigned int)sre_char_lower
[ch
] : ch
);
157 /* locale-specific character predicates */
158 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
159 * warnings when c's type supports only numbers < N+1 */
160 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
161 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
162 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
163 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
164 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
166 static unsigned int sre_lower_locale(unsigned int ch
)
168 return ((ch
) < 256 ? (unsigned int)tolower((ch
)) : ch
);
171 /* unicode-specific character predicates */
173 #if defined(HAVE_UNICODE)
175 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
176 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
177 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
178 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
179 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
181 static unsigned int sre_lower_unicode(unsigned int ch
)
183 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE
)(ch
));
189 sre_category(SRE_CODE category
, unsigned int ch
)
193 case SRE_CATEGORY_DIGIT
:
194 return SRE_IS_DIGIT(ch
);
195 case SRE_CATEGORY_NOT_DIGIT
:
196 return !SRE_IS_DIGIT(ch
);
197 case SRE_CATEGORY_SPACE
:
198 return SRE_IS_SPACE(ch
);
199 case SRE_CATEGORY_NOT_SPACE
:
200 return !SRE_IS_SPACE(ch
);
201 case SRE_CATEGORY_WORD
:
202 return SRE_IS_WORD(ch
);
203 case SRE_CATEGORY_NOT_WORD
:
204 return !SRE_IS_WORD(ch
);
205 case SRE_CATEGORY_LINEBREAK
:
206 return SRE_IS_LINEBREAK(ch
);
207 case SRE_CATEGORY_NOT_LINEBREAK
:
208 return !SRE_IS_LINEBREAK(ch
);
210 case SRE_CATEGORY_LOC_WORD
:
211 return SRE_LOC_IS_WORD(ch
);
212 case SRE_CATEGORY_LOC_NOT_WORD
:
213 return !SRE_LOC_IS_WORD(ch
);
215 #if defined(HAVE_UNICODE)
216 case SRE_CATEGORY_UNI_DIGIT
:
217 return SRE_UNI_IS_DIGIT(ch
);
218 case SRE_CATEGORY_UNI_NOT_DIGIT
:
219 return !SRE_UNI_IS_DIGIT(ch
);
220 case SRE_CATEGORY_UNI_SPACE
:
221 return SRE_UNI_IS_SPACE(ch
);
222 case SRE_CATEGORY_UNI_NOT_SPACE
:
223 return !SRE_UNI_IS_SPACE(ch
);
224 case SRE_CATEGORY_UNI_WORD
:
225 return SRE_UNI_IS_WORD(ch
);
226 case SRE_CATEGORY_UNI_NOT_WORD
:
227 return !SRE_UNI_IS_WORD(ch
);
228 case SRE_CATEGORY_UNI_LINEBREAK
:
229 return SRE_UNI_IS_LINEBREAK(ch
);
230 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
231 return !SRE_UNI_IS_LINEBREAK(ch
);
233 case SRE_CATEGORY_UNI_DIGIT
:
234 return SRE_IS_DIGIT(ch
);
235 case SRE_CATEGORY_UNI_NOT_DIGIT
:
236 return !SRE_IS_DIGIT(ch
);
237 case SRE_CATEGORY_UNI_SPACE
:
238 return SRE_IS_SPACE(ch
);
239 case SRE_CATEGORY_UNI_NOT_SPACE
:
240 return !SRE_IS_SPACE(ch
);
241 case SRE_CATEGORY_UNI_WORD
:
242 return SRE_LOC_IS_WORD(ch
);
243 case SRE_CATEGORY_UNI_NOT_WORD
:
244 return !SRE_LOC_IS_WORD(ch
);
245 case SRE_CATEGORY_UNI_LINEBREAK
:
246 return SRE_IS_LINEBREAK(ch
);
247 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
248 return !SRE_IS_LINEBREAK(ch
);
257 data_stack_dealloc(SRE_STATE
* state
)
259 if (state
->data_stack
) {
260 PyMem_FREE(state
->data_stack
);
261 state
->data_stack
= NULL
;
263 state
->data_stack_size
= state
->data_stack_base
= 0;
267 data_stack_grow(SRE_STATE
* state
, Py_ssize_t size
)
269 Py_ssize_t minsize
, cursize
;
270 minsize
= state
->data_stack_base
+size
;
271 cursize
= state
->data_stack_size
;
272 if (cursize
< minsize
) {
274 cursize
= minsize
+minsize
/4+1024;
275 TRACE(("allocate/grow stack %d\n", cursize
));
276 stack
= PyMem_REALLOC(state
->data_stack
, cursize
);
278 data_stack_dealloc(state
);
279 return SRE_ERROR_MEMORY
;
281 state
->data_stack
= (char *)stack
;
282 state
->data_stack_size
= cursize
;
287 /* generate 8-bit version */
289 #define SRE_CHAR unsigned char
290 #define SRE_AT sre_at
291 #define SRE_COUNT sre_count
292 #define SRE_CHARSET sre_charset
293 #define SRE_INFO sre_info
294 #define SRE_MATCH sre_match
295 #define SRE_MATCH_CONTEXT sre_match_context
296 #define SRE_SEARCH sre_search
297 #define SRE_LITERAL_TEMPLATE sre_literal_template
299 #if defined(HAVE_UNICODE)
301 #define SRE_RECURSIVE
305 #undef SRE_LITERAL_TEMPLATE
308 #undef SRE_MATCH_CONTEXT
315 /* generate 16-bit unicode version */
317 #define SRE_CHAR Py_UNICODE
318 #define SRE_AT sre_uat
319 #define SRE_COUNT sre_ucount
320 #define SRE_CHARSET sre_ucharset
321 #define SRE_INFO sre_uinfo
322 #define SRE_MATCH sre_umatch
323 #define SRE_MATCH_CONTEXT sre_umatch_context
324 #define SRE_SEARCH sre_usearch
325 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
328 #endif /* SRE_RECURSIVE */
330 /* -------------------------------------------------------------------- */
331 /* String matching engine */
333 /* the following section is compiled twice, with different character
337 SRE_AT(SRE_STATE
* state
, SRE_CHAR
* ptr
, SRE_CODE at
)
339 /* check if pointer is at given position */
341 Py_ssize_t thisp
, thatp
;
345 case SRE_AT_BEGINNING
:
346 case SRE_AT_BEGINNING_STRING
:
347 return ((void*) ptr
== state
->beginning
);
349 case SRE_AT_BEGINNING_LINE
:
350 return ((void*) ptr
== state
->beginning
||
351 SRE_IS_LINEBREAK((int) ptr
[-1]));
354 return (((void*) (ptr
+1) == state
->end
&&
355 SRE_IS_LINEBREAK((int) ptr
[0])) ||
356 ((void*) ptr
== state
->end
));
358 case SRE_AT_END_LINE
:
359 return ((void*) ptr
== state
->end
||
360 SRE_IS_LINEBREAK((int) ptr
[0]));
362 case SRE_AT_END_STRING
:
363 return ((void*) ptr
== state
->end
);
365 case SRE_AT_BOUNDARY
:
366 if (state
->beginning
== state
->end
)
368 thatp
= ((void*) ptr
> state
->beginning
) ?
369 SRE_IS_WORD((int) ptr
[-1]) : 0;
370 thisp
= ((void*) ptr
< state
->end
) ?
371 SRE_IS_WORD((int) ptr
[0]) : 0;
372 return thisp
!= thatp
;
374 case SRE_AT_NON_BOUNDARY
:
375 if (state
->beginning
== state
->end
)
377 thatp
= ((void*) ptr
> state
->beginning
) ?
378 SRE_IS_WORD((int) ptr
[-1]) : 0;
379 thisp
= ((void*) ptr
< state
->end
) ?
380 SRE_IS_WORD((int) ptr
[0]) : 0;
381 return thisp
== thatp
;
383 case SRE_AT_LOC_BOUNDARY
:
384 if (state
->beginning
== state
->end
)
386 thatp
= ((void*) ptr
> state
->beginning
) ?
387 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
388 thisp
= ((void*) ptr
< state
->end
) ?
389 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
390 return thisp
!= thatp
;
392 case SRE_AT_LOC_NON_BOUNDARY
:
393 if (state
->beginning
== state
->end
)
395 thatp
= ((void*) ptr
> state
->beginning
) ?
396 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
397 thisp
= ((void*) ptr
< state
->end
) ?
398 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
399 return thisp
== thatp
;
401 #if defined(HAVE_UNICODE)
402 case SRE_AT_UNI_BOUNDARY
:
403 if (state
->beginning
== state
->end
)
405 thatp
= ((void*) ptr
> state
->beginning
) ?
406 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
407 thisp
= ((void*) ptr
< state
->end
) ?
408 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
409 return thisp
!= thatp
;
411 case SRE_AT_UNI_NON_BOUNDARY
:
412 if (state
->beginning
== state
->end
)
414 thatp
= ((void*) ptr
> state
->beginning
) ?
415 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
416 thisp
= ((void*) ptr
< state
->end
) ?
417 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
418 return thisp
== thatp
;
427 SRE_CHARSET(SRE_CODE
* set
, SRE_CODE ch
)
429 /* check if character is a member of the given set */
440 /* <LITERAL> <code> */
446 case SRE_OP_CATEGORY
:
447 /* <CATEGORY> <code> */
448 if (sre_category(set
[0], (int) ch
))
454 if (sizeof(SRE_CODE
) == 2) {
455 /* <CHARSET> <bitmap> (16 bits per code word) */
456 if (ch
< 256 && (set
[ch
>> 4] & (1 << (ch
& 15))))
461 /* <CHARSET> <bitmap> (32 bits per code word) */
462 if (ch
< 256 && (set
[ch
>> 5] & (1 << (ch
& 31))))
469 /* <RANGE> <lower> <upper> */
470 if (set
[0] <= ch
&& ch
<= set
[1])
479 case SRE_OP_BIGCHARSET
:
480 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
482 Py_ssize_t count
, block
;
485 if (sizeof(SRE_CODE
) == 2) {
486 block
= ((unsigned char*)set
)[ch
>> 8];
488 if (set
[block
*16 + ((ch
& 255)>>4)] & (1 << (ch
& 15)))
493 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
494 * warnings when c's type supports only numbers < N+1 */
496 block
= ((unsigned char*)set
)[ch
>> 8];
501 (set
[block
*8 + ((ch
& 255)>>5)] & (1 << (ch
& 31))))
509 /* internal error -- there's not much we can do about it
510 here, so let's just pretend it didn't match... */
516 LOCAL(Py_ssize_t
) SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
);
519 SRE_COUNT(SRE_STATE
* state
, SRE_CODE
* pattern
, Py_ssize_t maxcount
)
522 SRE_CHAR
* ptr
= (SRE_CHAR
*)state
->ptr
;
523 SRE_CHAR
* end
= (SRE_CHAR
*)state
->end
;
527 if (maxcount
< end
- ptr
&& maxcount
!= 65535)
528 end
= ptr
+ maxcount
;
530 switch (pattern
[0]) {
534 TRACE(("|%p|%p|COUNT IN\n", pattern
, ptr
));
535 while (ptr
< end
&& SRE_CHARSET(pattern
+ 2, *ptr
))
540 /* repeated dot wildcard. */
541 TRACE(("|%p|%p|COUNT ANY\n", pattern
, ptr
));
542 while (ptr
< end
&& !SRE_IS_LINEBREAK(*ptr
))
547 /* repeated dot wildcard. skip to the end of the target
548 string, and backtrack from there */
549 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern
, ptr
));
554 /* repeated literal */
556 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern
, ptr
, chr
));
557 while (ptr
< end
&& (SRE_CODE
) *ptr
== chr
)
561 case SRE_OP_LITERAL_IGNORE
:
562 /* repeated literal */
564 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
565 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) == chr
)
569 case SRE_OP_NOT_LITERAL
:
570 /* repeated non-literal */
572 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern
, ptr
, chr
));
573 while (ptr
< end
&& (SRE_CODE
) *ptr
!= chr
)
577 case SRE_OP_NOT_LITERAL_IGNORE
:
578 /* repeated non-literal */
580 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
581 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) != chr
)
586 /* repeated single character pattern */
587 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern
, ptr
));
588 while ((SRE_CHAR
*) state
->ptr
< end
) {
589 i
= SRE_MATCH(state
, pattern
);
595 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
,
596 (SRE_CHAR
*) state
->ptr
- ptr
));
597 return (SRE_CHAR
*) state
->ptr
- ptr
;
600 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
, ptr
- (SRE_CHAR
*) state
->ptr
));
601 return ptr
- (SRE_CHAR
*) state
->ptr
;
604 #if 0 /* not used in this release */
606 SRE_INFO(SRE_STATE
* state
, SRE_CODE
* pattern
)
608 /* check if an SRE_OP_INFO block matches at the current position.
609 returns the number of SRE_CODE objects to skip if successful, 0
612 SRE_CHAR
* end
= state
->end
;
613 SRE_CHAR
* ptr
= state
->ptr
;
616 /* check minimal length */
617 if (pattern
[3] && (end
- ptr
) < pattern
[3])
620 /* check known prefix */
621 if (pattern
[2] & SRE_INFO_PREFIX
&& pattern
[5] > 1) {
622 /* <length> <skip> <prefix data> <overlap data> */
623 for (i
= 0; i
< pattern
[5]; i
++)
624 if ((SRE_CODE
) ptr
[i
] != pattern
[7 + i
])
626 return pattern
[0] + 2 * pattern
[6];
632 /* The macros below should be used to protect recursive SRE_MATCH()
633 * calls that *failed* and do *not* return immediately (IOW, those
634 * that will backtrack). Explaining:
636 * - Recursive SRE_MATCH() returned true: that's usually a success
637 * (besides atypical cases like ASSERT_NOT), therefore there's no
638 * reason to restore lastmark;
640 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
641 * is returning to the caller: If the current SRE_MATCH() is the
642 * top function of the recursion, returning false will be a matching
643 * failure, and it doesn't matter where lastmark is pointing to.
644 * If it's *not* the top function, it will be a recursive SRE_MATCH()
645 * failure by itself, and the calling SRE_MATCH() will have to deal
646 * with the failure by the same rules explained here (it will restore
647 * lastmark by itself if necessary);
649 * - Recursive SRE_MATCH() returned false, and will continue the
650 * outside 'for' loop: must be protected when breaking, since the next
651 * OP could potentially depend on lastmark;
653 * - Recursive SRE_MATCH() returned false, and will be called again
654 * inside a local for/while loop: must be protected between each
655 * loop iteration, since the recursive SRE_MATCH() could do anything,
656 * and could potentially depend on lastmark.
658 * For more information, check the discussion at SF patch #712900.
660 #define LASTMARK_SAVE() \
662 ctx->lastmark = state->lastmark; \
663 ctx->lastindex = state->lastindex; \
665 #define LASTMARK_RESTORE() \
667 state->lastmark = ctx->lastmark; \
668 state->lastindex = ctx->lastindex; \
671 #define RETURN_ERROR(i) do { return i; } while(0)
672 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
673 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
675 #define RETURN_ON_ERROR(i) \
676 do { if (i < 0) RETURN_ERROR(i); } while (0)
677 #define RETURN_ON_SUCCESS(i) \
678 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
679 #define RETURN_ON_FAILURE(i) \
680 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
684 #define DATA_STACK_ALLOC(state, type, ptr) \
686 alloc_pos = state->data_stack_base; \
687 TRACE(("allocating %s in %d (%d)\n", \
688 SFY(type), alloc_pos, sizeof(type))); \
689 if (state->data_stack_size < alloc_pos+sizeof(type)) { \
690 int j = data_stack_grow(state, sizeof(type)); \
691 if (j < 0) return j; \
693 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
695 ptr = (type*)(state->data_stack+alloc_pos); \
696 state->data_stack_base += sizeof(type); \
699 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
701 TRACE(("looking up %s at %d\n", SFY(type), pos)); \
702 ptr = (type*)(state->data_stack+pos); \
705 #define DATA_STACK_PUSH(state, data, size) \
707 TRACE(("copy data in %p to %d (%d)\n", \
708 data, state->data_stack_base, size)); \
709 if (state->data_stack_size < state->data_stack_base+size) { \
710 int j = data_stack_grow(state, size); \
711 if (j < 0) return j; \
713 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
715 memcpy(state->data_stack+state->data_stack_base, data, size); \
716 state->data_stack_base += size; \
719 #define DATA_STACK_POP(state, data, size, discard) \
721 TRACE(("copy data to %p from %d (%d)\n", \
722 data, state->data_stack_base-size, size)); \
723 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
725 state->data_stack_base -= size; \
728 #define DATA_STACK_POP_DISCARD(state, size) \
730 TRACE(("discard data from %d (%d)\n", \
731 state->data_stack_base-size, size)); \
732 state->data_stack_base -= size; \
735 #define DATA_PUSH(x) \
736 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
737 #define DATA_POP(x) \
738 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
739 #define DATA_POP_DISCARD(x) \
740 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
741 #define DATA_ALLOC(t,p) \
742 DATA_STACK_ALLOC(state, t, p)
743 #define DATA_LOOKUP_AT(t,p,pos) \
744 DATA_STACK_LOOKUP_AT(state,t,p,pos)
746 #define MARK_PUSH(lastmark) \
747 do if (lastmark > 0) { \
748 i = lastmark; /* ctx->lastmark may change if reallocated */ \
749 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
751 #define MARK_POP(lastmark) \
752 do if (lastmark > 0) { \
753 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
755 #define MARK_POP_KEEP(lastmark) \
756 do if (lastmark > 0) { \
757 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
759 #define MARK_POP_DISCARD(lastmark) \
760 do if (lastmark > 0) { \
761 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
765 #define JUMP_MAX_UNTIL_1 1
766 #define JUMP_MAX_UNTIL_2 2
767 #define JUMP_MAX_UNTIL_3 3
768 #define JUMP_MIN_UNTIL_1 4
769 #define JUMP_MIN_UNTIL_2 5
770 #define JUMP_MIN_UNTIL_3 6
771 #define JUMP_REPEAT 7
772 #define JUMP_REPEAT_ONE_1 8
773 #define JUMP_REPEAT_ONE_2 9
774 #define JUMP_MIN_REPEAT_ONE 10
775 #define JUMP_BRANCH 11
776 #define JUMP_ASSERT 12
777 #define JUMP_ASSERT_NOT 13
779 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
780 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
781 nextctx->last_ctx_pos = ctx_pos; \
782 nextctx->jump = jumpvalue; \
783 nextctx->pattern = nextpattern; \
784 ctx_pos = alloc_pos; \
788 while (0) /* gcc doesn't like labels at end of scopes */ \
791 Py_ssize_t last_ctx_pos
;
797 Py_ssize_t lastindex
;
804 /* check if string matches the given pattern. returns <0 for
805 error, 0 for failure, and 1 for success */
807 SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
)
809 SRE_CHAR
* end
= (SRE_CHAR
*)state
->end
;
810 Py_ssize_t alloc_pos
, ctx_pos
= -1;
811 Py_ssize_t i
, ret
= 0;
813 unsigned int sigcount
=0;
815 SRE_MATCH_CONTEXT
* ctx
;
816 SRE_MATCH_CONTEXT
* nextctx
;
818 TRACE(("|%p|%p|ENTER\n", pattern
, state
->ptr
));
820 DATA_ALLOC(SRE_MATCH_CONTEXT
, ctx
);
821 ctx
->last_ctx_pos
= -1;
822 ctx
->jump
= JUMP_NONE
;
823 ctx
->pattern
= pattern
;
828 ctx
->ptr
= (SRE_CHAR
*)state
->ptr
;
830 if (ctx
->pattern
[0] == SRE_OP_INFO
) {
831 /* optimization info block */
832 /* <INFO> <1=skip> <2=flags> <3=min> ... */
833 if (ctx
->pattern
[3] && (end
- ctx
->ptr
) < ctx
->pattern
[3]) {
834 TRACE(("reject (got %d chars, need %d)\n",
835 (end
- ctx
->ptr
), ctx
->pattern
[3]));
838 ctx
->pattern
+= ctx
->pattern
[1] + 1;
843 if ((0 == (sigcount
& 0xfff)) && PyErr_CheckSignals())
844 RETURN_ERROR(SRE_ERROR_INTERRUPTED
);
846 switch (*ctx
->pattern
++) {
851 TRACE(("|%p|%p|MARK %d\n", ctx
->pattern
,
852 ctx
->ptr
, ctx
->pattern
[0]));
855 state
->lastindex
= i
/2 + 1;
856 if (i
> state
->lastmark
) {
857 /* state->lastmark is the highest valid index in the
858 state->mark array. If it is increased by more than 1,
859 the intervening marks must be set to NULL to signal
860 that these marks have not been encountered. */
861 Py_ssize_t j
= state
->lastmark
+ 1;
863 state
->mark
[j
++] = NULL
;
866 state
->mark
[i
] = ctx
->ptr
;
871 /* match literal string */
872 /* <LITERAL> <code> */
873 TRACE(("|%p|%p|LITERAL %d\n", ctx
->pattern
,
874 ctx
->ptr
, *ctx
->pattern
));
875 if (ctx
->ptr
>= end
|| (SRE_CODE
) ctx
->ptr
[0] != ctx
->pattern
[0])
881 case SRE_OP_NOT_LITERAL
:
882 /* match anything that is not literal character */
883 /* <NOT_LITERAL> <code> */
884 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx
->pattern
,
885 ctx
->ptr
, *ctx
->pattern
));
886 if (ctx
->ptr
>= end
|| (SRE_CODE
) ctx
->ptr
[0] == ctx
->pattern
[0])
894 TRACE(("|%p|%p|SUCCESS\n", ctx
->pattern
, ctx
->ptr
));
895 state
->ptr
= ctx
->ptr
;
899 /* match at given position */
901 TRACE(("|%p|%p|AT %d\n", ctx
->pattern
, ctx
->ptr
, *ctx
->pattern
));
902 if (!SRE_AT(state
, ctx
->ptr
, *ctx
->pattern
))
907 case SRE_OP_CATEGORY
:
908 /* match at given category */
909 /* <CATEGORY> <code> */
910 TRACE(("|%p|%p|CATEGORY %d\n", ctx
->pattern
,
911 ctx
->ptr
, *ctx
->pattern
));
912 if (ctx
->ptr
>= end
|| !sre_category(ctx
->pattern
[0], ctx
->ptr
[0]))
919 /* match anything (except a newline) */
921 TRACE(("|%p|%p|ANY\n", ctx
->pattern
, ctx
->ptr
));
922 if (ctx
->ptr
>= end
|| SRE_IS_LINEBREAK(ctx
->ptr
[0]))
930 TRACE(("|%p|%p|ANY_ALL\n", ctx
->pattern
, ctx
->ptr
));
937 /* match set member (or non_member) */
938 /* <IN> <skip> <set> */
939 TRACE(("|%p|%p|IN\n", ctx
->pattern
, ctx
->ptr
));
940 if (ctx
->ptr
>= end
|| !SRE_CHARSET(ctx
->pattern
+ 1, *ctx
->ptr
))
942 ctx
->pattern
+= ctx
->pattern
[0];
946 case SRE_OP_LITERAL_IGNORE
:
947 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
948 ctx
->pattern
, ctx
->ptr
, ctx
->pattern
[0]));
949 if (ctx
->ptr
>= end
||
950 state
->lower(*ctx
->ptr
) != state
->lower(*ctx
->pattern
))
956 case SRE_OP_NOT_LITERAL_IGNORE
:
957 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
958 ctx
->pattern
, ctx
->ptr
, *ctx
->pattern
));
959 if (ctx
->ptr
>= end
||
960 state
->lower(*ctx
->ptr
) == state
->lower(*ctx
->pattern
))
966 case SRE_OP_IN_IGNORE
:
967 TRACE(("|%p|%p|IN_IGNORE\n", ctx
->pattern
, ctx
->ptr
));
969 || !SRE_CHARSET(ctx
->pattern
+1,
970 (SRE_CODE
)state
->lower(*ctx
->ptr
)))
972 ctx
->pattern
+= ctx
->pattern
[0];
979 /* <JUMP> <offset> */
980 TRACE(("|%p|%p|JUMP %d\n", ctx
->pattern
,
981 ctx
->ptr
, ctx
->pattern
[0]));
982 ctx
->pattern
+= ctx
->pattern
[0];
987 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
988 TRACE(("|%p|%p|BRANCH\n", ctx
->pattern
, ctx
->ptr
));
990 ctx
->u
.rep
= state
->repeat
;
992 MARK_PUSH(ctx
->lastmark
);
993 for (; ctx
->pattern
[0]; ctx
->pattern
+= ctx
->pattern
[0]) {
994 if (ctx
->pattern
[1] == SRE_OP_LITERAL
&&
996 (SRE_CODE
) *ctx
->ptr
!= ctx
->pattern
[2]))
998 if (ctx
->pattern
[1] == SRE_OP_IN
&&
1000 !SRE_CHARSET(ctx
->pattern
+ 3, (SRE_CODE
) *ctx
->ptr
)))
1002 state
->ptr
= ctx
->ptr
;
1003 DO_JUMP(JUMP_BRANCH
, jump_branch
, ctx
->pattern
+1);
1006 MARK_POP_DISCARD(ctx
->lastmark
);
1007 RETURN_ON_ERROR(ret
);
1011 MARK_POP_KEEP(ctx
->lastmark
);
1015 MARK_POP_DISCARD(ctx
->lastmark
);
1018 case SRE_OP_REPEAT_ONE
:
1019 /* match repeated sequence (maximizing regexp) */
1021 /* this operator only works if the repeated item is
1022 exactly one character wide, and we're not already
1023 collecting backtracking points. for other cases,
1024 use the MAX_REPEAT operator */
1026 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1028 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx
->pattern
, ctx
->ptr
,
1029 ctx
->pattern
[1], ctx
->pattern
[2]));
1031 if (ctx
->ptr
+ ctx
->pattern
[1] > end
)
1032 RETURN_FAILURE
; /* cannot match */
1034 state
->ptr
= ctx
->ptr
;
1036 ret
= SRE_COUNT(state
, ctx
->pattern
+3, ctx
->pattern
[2]);
1037 RETURN_ON_ERROR(ret
);
1038 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1040 ctx
->ptr
+= ctx
->count
;
1042 /* when we arrive here, count contains the number of
1043 matches, and ctx->ptr points to the tail of the target
1044 string. check if the rest of the pattern matches,
1045 and backtrack if not. */
1047 if (ctx
->count
< (Py_ssize_t
) ctx
->pattern
[1])
1050 if (ctx
->pattern
[ctx
->pattern
[0]] == SRE_OP_SUCCESS
) {
1051 /* tail is empty. we're finished */
1052 state
->ptr
= ctx
->ptr
;
1058 if (ctx
->pattern
[ctx
->pattern
[0]] == SRE_OP_LITERAL
) {
1059 /* tail starts with a literal. skip positions where
1060 the rest of the pattern cannot possibly match */
1061 ctx
->u
.chr
= ctx
->pattern
[ctx
->pattern
[0]+1];
1063 while (ctx
->count
>= (Py_ssize_t
) ctx
->pattern
[1] &&
1064 (ctx
->ptr
>= end
|| *ctx
->ptr
!= ctx
->u
.chr
)) {
1068 if (ctx
->count
< (Py_ssize_t
) ctx
->pattern
[1])
1070 state
->ptr
= ctx
->ptr
;
1071 DO_JUMP(JUMP_REPEAT_ONE_1
, jump_repeat_one_1
,
1072 ctx
->pattern
+ctx
->pattern
[0]);
1074 RETURN_ON_ERROR(ret
);
1086 while (ctx
->count
>= (Py_ssize_t
) ctx
->pattern
[1]) {
1087 state
->ptr
= ctx
->ptr
;
1088 DO_JUMP(JUMP_REPEAT_ONE_2
, jump_repeat_one_2
,
1089 ctx
->pattern
+ctx
->pattern
[0]);
1091 RETURN_ON_ERROR(ret
);
1101 case SRE_OP_MIN_REPEAT_ONE
:
1102 /* match repeated sequence (minimizing regexp) */
1104 /* this operator only works if the repeated item is
1105 exactly one character wide, and we're not already
1106 collecting backtracking points. for other cases,
1107 use the MIN_REPEAT operator */
1109 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1111 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx
->pattern
, ctx
->ptr
,
1112 ctx
->pattern
[1], ctx
->pattern
[2]));
1114 if (ctx
->ptr
+ ctx
->pattern
[1] > end
)
1115 RETURN_FAILURE
; /* cannot match */
1117 state
->ptr
= ctx
->ptr
;
1119 if (ctx
->pattern
[1] == 0)
1122 /* count using pattern min as the maximum */
1123 ret
= SRE_COUNT(state
, ctx
->pattern
+3, ctx
->pattern
[1]);
1124 RETURN_ON_ERROR(ret
);
1125 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1126 if (ret
< (Py_ssize_t
) ctx
->pattern
[1])
1127 /* didn't match minimum number of times */
1129 /* advance past minimum matches of repeat */
1131 ctx
->ptr
+= ctx
->count
;
1134 if (ctx
->pattern
[ctx
->pattern
[0]] == SRE_OP_SUCCESS
) {
1135 /* tail is empty. we're finished */
1136 state
->ptr
= ctx
->ptr
;
1142 while ((Py_ssize_t
)ctx
->pattern
[2] == 65535
1143 || ctx
->count
<= (Py_ssize_t
)ctx
->pattern
[2]) {
1144 state
->ptr
= ctx
->ptr
;
1145 DO_JUMP(JUMP_MIN_REPEAT_ONE
,jump_min_repeat_one
,
1146 ctx
->pattern
+ctx
->pattern
[0]);
1148 RETURN_ON_ERROR(ret
);
1151 state
->ptr
= ctx
->ptr
;
1152 ret
= SRE_COUNT(state
, ctx
->pattern
+3, 1);
1153 RETURN_ON_ERROR(ret
);
1154 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1166 /* create repeat context. all the hard work is done
1167 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1168 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1169 TRACE(("|%p|%p|REPEAT %d %d\n", ctx
->pattern
, ctx
->ptr
,
1170 ctx
->pattern
[1], ctx
->pattern
[2]));
1172 /* install new repeat context */
1173 ctx
->u
.rep
= (SRE_REPEAT
*) PyObject_MALLOC(sizeof(*ctx
->u
.rep
));
1178 ctx
->u
.rep
->count
= -1;
1179 ctx
->u
.rep
->pattern
= ctx
->pattern
;
1180 ctx
->u
.rep
->prev
= state
->repeat
;
1181 ctx
->u
.rep
->last_ptr
= NULL
;
1182 state
->repeat
= ctx
->u
.rep
;
1184 state
->ptr
= ctx
->ptr
;
1185 DO_JUMP(JUMP_REPEAT
, jump_repeat
, ctx
->pattern
+ctx
->pattern
[0]);
1186 state
->repeat
= ctx
->u
.rep
->prev
;
1187 PyObject_FREE(ctx
->u
.rep
);
1190 RETURN_ON_ERROR(ret
);
1195 case SRE_OP_MAX_UNTIL
:
1196 /* maximizing repeat */
1197 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1199 /* FIXME: we probably need to deal with zero-width
1200 matches in here... */
1202 ctx
->u
.rep
= state
->repeat
;
1204 RETURN_ERROR(SRE_ERROR_STATE
);
1206 state
->ptr
= ctx
->ptr
;
1208 ctx
->count
= ctx
->u
.rep
->count
+1;
1210 TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx
->pattern
,
1211 ctx
->ptr
, ctx
->count
));
1213 if (ctx
->count
< ctx
->u
.rep
->pattern
[1]) {
1214 /* not enough matches */
1215 ctx
->u
.rep
->count
= ctx
->count
;
1216 DO_JUMP(JUMP_MAX_UNTIL_1
, jump_max_until_1
,
1217 ctx
->u
.rep
->pattern
+3);
1219 RETURN_ON_ERROR(ret
);
1222 ctx
->u
.rep
->count
= ctx
->count
-1;
1223 state
->ptr
= ctx
->ptr
;
1227 if ((ctx
->count
< ctx
->u
.rep
->pattern
[2] ||
1228 ctx
->u
.rep
->pattern
[2] == 65535) &&
1229 state
->ptr
!= ctx
->u
.rep
->last_ptr
) {
1230 /* we may have enough matches, but if we can
1231 match another item, do so */
1232 ctx
->u
.rep
->count
= ctx
->count
;
1234 MARK_PUSH(ctx
->lastmark
);
1235 /* zero-width match protection */
1236 DATA_PUSH(&ctx
->u
.rep
->last_ptr
);
1237 ctx
->u
.rep
->last_ptr
= state
->ptr
;
1238 DO_JUMP(JUMP_MAX_UNTIL_2
, jump_max_until_2
,
1239 ctx
->u
.rep
->pattern
+3);
1240 DATA_POP(&ctx
->u
.rep
->last_ptr
);
1242 MARK_POP_DISCARD(ctx
->lastmark
);
1243 RETURN_ON_ERROR(ret
);
1246 MARK_POP(ctx
->lastmark
);
1248 ctx
->u
.rep
->count
= ctx
->count
-1;
1249 state
->ptr
= ctx
->ptr
;
1252 /* cannot match more repeated items here. make sure the
1254 state
->repeat
= ctx
->u
.rep
->prev
;
1255 DO_JUMP(JUMP_MAX_UNTIL_3
, jump_max_until_3
, ctx
->pattern
);
1256 RETURN_ON_SUCCESS(ret
);
1257 state
->repeat
= ctx
->u
.rep
;
1258 state
->ptr
= ctx
->ptr
;
1261 case SRE_OP_MIN_UNTIL
:
1262 /* minimizing repeat */
1263 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1265 ctx
->u
.rep
= state
->repeat
;
1267 RETURN_ERROR(SRE_ERROR_STATE
);
1269 state
->ptr
= ctx
->ptr
;
1271 ctx
->count
= ctx
->u
.rep
->count
+1;
1273 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx
->pattern
,
1274 ctx
->ptr
, ctx
->count
, ctx
->u
.rep
->pattern
));
1276 if (ctx
->count
< ctx
->u
.rep
->pattern
[1]) {
1277 /* not enough matches */
1278 ctx
->u
.rep
->count
= ctx
->count
;
1279 DO_JUMP(JUMP_MIN_UNTIL_1
, jump_min_until_1
,
1280 ctx
->u
.rep
->pattern
+3);
1282 RETURN_ON_ERROR(ret
);
1285 ctx
->u
.rep
->count
= ctx
->count
-1;
1286 state
->ptr
= ctx
->ptr
;
1292 /* see if the tail matches */
1293 state
->repeat
= ctx
->u
.rep
->prev
;
1294 DO_JUMP(JUMP_MIN_UNTIL_2
, jump_min_until_2
, ctx
->pattern
);
1296 RETURN_ON_ERROR(ret
);
1300 state
->repeat
= ctx
->u
.rep
;
1301 state
->ptr
= ctx
->ptr
;
1305 if (ctx
->count
>= ctx
->u
.rep
->pattern
[2]
1306 && ctx
->u
.rep
->pattern
[2] != 65535)
1309 ctx
->u
.rep
->count
= ctx
->count
;
1310 DO_JUMP(JUMP_MIN_UNTIL_3
,jump_min_until_3
,
1311 ctx
->u
.rep
->pattern
+3);
1313 RETURN_ON_ERROR(ret
);
1316 ctx
->u
.rep
->count
= ctx
->count
-1;
1317 state
->ptr
= ctx
->ptr
;
1320 case SRE_OP_GROUPREF
:
1321 /* match backreference */
1322 TRACE(("|%p|%p|GROUPREF %d\n", ctx
->pattern
,
1323 ctx
->ptr
, ctx
->pattern
[0]));
1324 i
= ctx
->pattern
[0];
1326 Py_ssize_t groupref
= i
+i
;
1327 if (groupref
>= state
->lastmark
) {
1330 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[groupref
];
1331 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[groupref
+1];
1332 if (!p
|| !e
|| e
< p
)
1335 if (ctx
->ptr
>= end
|| *ctx
->ptr
!= *p
)
1344 case SRE_OP_GROUPREF_IGNORE
:
1345 /* match backreference */
1346 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx
->pattern
,
1347 ctx
->ptr
, ctx
->pattern
[0]));
1348 i
= ctx
->pattern
[0];
1350 Py_ssize_t groupref
= i
+i
;
1351 if (groupref
>= state
->lastmark
) {
1354 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[groupref
];
1355 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[groupref
+1];
1356 if (!p
|| !e
|| e
< p
)
1359 if (ctx
->ptr
>= end
||
1360 state
->lower(*ctx
->ptr
) != state
->lower(*p
))
1369 case SRE_OP_GROUPREF_EXISTS
:
1370 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx
->pattern
,
1371 ctx
->ptr
, ctx
->pattern
[0]));
1372 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1373 i
= ctx
->pattern
[0];
1375 Py_ssize_t groupref
= i
+i
;
1376 if (groupref
>= state
->lastmark
) {
1377 ctx
->pattern
+= ctx
->pattern
[1];
1380 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[groupref
];
1381 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[groupref
+1];
1382 if (!p
|| !e
|| e
< p
) {
1383 ctx
->pattern
+= ctx
->pattern
[1];
1392 /* assert subpattern */
1393 /* <ASSERT> <skip> <back> <pattern> */
1394 TRACE(("|%p|%p|ASSERT %d\n", ctx
->pattern
,
1395 ctx
->ptr
, ctx
->pattern
[1]));
1396 state
->ptr
= ctx
->ptr
- ctx
->pattern
[1];
1397 if (state
->ptr
< state
->beginning
)
1399 DO_JUMP(JUMP_ASSERT
, jump_assert
, ctx
->pattern
+2);
1400 RETURN_ON_FAILURE(ret
);
1401 ctx
->pattern
+= ctx
->pattern
[0];
1404 case SRE_OP_ASSERT_NOT
:
1405 /* assert not subpattern */
1406 /* <ASSERT_NOT> <skip> <back> <pattern> */
1407 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx
->pattern
,
1408 ctx
->ptr
, ctx
->pattern
[1]));
1409 state
->ptr
= ctx
->ptr
- ctx
->pattern
[1];
1410 if (state
->ptr
>= state
->beginning
) {
1411 DO_JUMP(JUMP_ASSERT_NOT
, jump_assert_not
, ctx
->pattern
+2);
1413 RETURN_ON_ERROR(ret
);
1417 ctx
->pattern
+= ctx
->pattern
[0];
1420 case SRE_OP_FAILURE
:
1421 /* immediate failure */
1422 TRACE(("|%p|%p|FAILURE\n", ctx
->pattern
, ctx
->ptr
));
1426 TRACE(("|%p|%p|UNKNOWN %d\n", ctx
->pattern
, ctx
->ptr
,
1428 RETURN_ERROR(SRE_ERROR_ILLEGAL
);
1433 ctx_pos
= ctx
->last_ctx_pos
;
1435 DATA_POP_DISCARD(ctx
);
1438 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1441 case JUMP_MAX_UNTIL_2
:
1442 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx
->pattern
, ctx
->ptr
));
1443 goto jump_max_until_2
;
1444 case JUMP_MAX_UNTIL_3
:
1445 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx
->pattern
, ctx
->ptr
));
1446 goto jump_max_until_3
;
1447 case JUMP_MIN_UNTIL_2
:
1448 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx
->pattern
, ctx
->ptr
));
1449 goto jump_min_until_2
;
1450 case JUMP_MIN_UNTIL_3
:
1451 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx
->pattern
, ctx
->ptr
));
1452 goto jump_min_until_3
;
1454 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx
->pattern
, ctx
->ptr
));
1456 case JUMP_MAX_UNTIL_1
:
1457 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx
->pattern
, ctx
->ptr
));
1458 goto jump_max_until_1
;
1459 case JUMP_MIN_UNTIL_1
:
1460 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx
->pattern
, ctx
->ptr
));
1461 goto jump_min_until_1
;
1463 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx
->pattern
, ctx
->ptr
));
1465 case JUMP_REPEAT_ONE_1
:
1466 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx
->pattern
, ctx
->ptr
));
1467 goto jump_repeat_one_1
;
1468 case JUMP_REPEAT_ONE_2
:
1469 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx
->pattern
, ctx
->ptr
));
1470 goto jump_repeat_one_2
;
1471 case JUMP_MIN_REPEAT_ONE
:
1472 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx
->pattern
, ctx
->ptr
));
1473 goto jump_min_repeat_one
;
1475 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx
->pattern
, ctx
->ptr
));
1477 case JUMP_ASSERT_NOT
:
1478 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx
->pattern
, ctx
->ptr
));
1479 goto jump_assert_not
;
1481 TRACE(("|%p|%p|RETURN %d\n", ctx
->pattern
, ctx
->ptr
, ret
));
1485 return ret
; /* should never get here */
1489 SRE_SEARCH(SRE_STATE
* state
, SRE_CODE
* pattern
)
1491 SRE_CHAR
* ptr
= (SRE_CHAR
*)state
->start
;
1492 SRE_CHAR
* end
= (SRE_CHAR
*)state
->end
;
1493 Py_ssize_t status
= 0;
1494 Py_ssize_t prefix_len
= 0;
1495 Py_ssize_t prefix_skip
= 0;
1496 SRE_CODE
* prefix
= NULL
;
1497 SRE_CODE
* charset
= NULL
;
1498 SRE_CODE
* overlap
= NULL
;
1501 if (pattern
[0] == SRE_OP_INFO
) {
1502 /* optimization info block */
1503 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1507 if (pattern
[3] > 1) {
1508 /* adjust end point (but make sure we leave at least one
1509 character in there, so literal search will work) */
1510 end
-= pattern
[3]-1;
1515 if (flags
& SRE_INFO_PREFIX
) {
1516 /* pattern starts with a known prefix */
1517 /* <length> <skip> <prefix data> <overlap data> */
1518 prefix_len
= pattern
[5];
1519 prefix_skip
= pattern
[6];
1520 prefix
= pattern
+ 7;
1521 overlap
= prefix
+ prefix_len
- 1;
1522 } else if (flags
& SRE_INFO_CHARSET
)
1523 /* pattern starts with a character from a known set */
1525 charset
= pattern
+ 5;
1527 pattern
+= 1 + pattern
[1];
1530 TRACE(("prefix = %p %d %d\n", prefix
, prefix_len
, prefix_skip
));
1531 TRACE(("charset = %p\n", charset
));
1533 #if defined(USE_FAST_SEARCH)
1534 if (prefix_len
> 1) {
1535 /* pattern starts with a known prefix. use the overlap
1536 table to skip forward as fast as we possibly can */
1538 end
= (SRE_CHAR
*)state
->end
;
1541 if ((SRE_CODE
) ptr
[0] != prefix
[i
]) {
1547 if (++i
== prefix_len
) {
1548 /* found a potential match */
1549 TRACE(("|%p|%p|SEARCH SCAN\n", pattern
, ptr
));
1550 state
->start
= ptr
+ 1 - prefix_len
;
1551 state
->ptr
= ptr
+ 1 - prefix_len
+ prefix_skip
;
1552 if (flags
& SRE_INFO_LITERAL
)
1553 return 1; /* we got all of it */
1554 status
= SRE_MATCH(state
, pattern
+ 2*prefix_skip
);
1557 /* close but no cigar -- try again */
1569 if (pattern
[0] == SRE_OP_LITERAL
) {
1570 /* pattern starts with a literal character. this is used
1571 for short prefixes, and if fast search is disabled */
1572 SRE_CODE chr
= pattern
[1];
1573 end
= (SRE_CHAR
*)state
->end
;
1575 while (ptr
< end
&& (SRE_CODE
) ptr
[0] != chr
)
1579 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern
, ptr
));
1582 if (flags
& SRE_INFO_LITERAL
)
1583 return 1; /* we got all of it */
1584 status
= SRE_MATCH(state
, pattern
+ 2);
1588 } else if (charset
) {
1589 /* pattern starts with a character from a known set */
1590 end
= (SRE_CHAR
*)state
->end
;
1592 while (ptr
< end
&& !SRE_CHARSET(charset
, ptr
[0]))
1596 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern
, ptr
));
1599 status
= SRE_MATCH(state
, pattern
);
1606 while (ptr
<= end
) {
1607 TRACE(("|%p|%p|SEARCH\n", pattern
, ptr
));
1608 state
->start
= state
->ptr
= ptr
++;
1609 status
= SRE_MATCH(state
, pattern
);
1618 SRE_LITERAL_TEMPLATE(SRE_CHAR
* ptr
, Py_ssize_t len
)
1620 /* check if given string is a literal template (i.e. no escapes) */
1627 #if !defined(SRE_RECURSIVE)
1629 /* -------------------------------------------------------------------- */
1630 /* factories and destructors */
1632 /* see sre.h for object declarations */
1633 static PyObject
*pattern_new_match(PatternObject
*, SRE_STATE
*, int);
1634 static PyObject
*pattern_scanner(PatternObject
*, PyObject
*);
1637 sre_codesize(PyObject
* self
, PyObject
*unused
)
1639 return Py_BuildValue("l", sizeof(SRE_CODE
));
1643 sre_getlower(PyObject
* self
, PyObject
* args
)
1645 int character
, flags
;
1646 if (!PyArg_ParseTuple(args
, "ii", &character
, &flags
))
1648 if (flags
& SRE_FLAG_LOCALE
)
1649 return Py_BuildValue("i", sre_lower_locale(character
));
1650 if (flags
& SRE_FLAG_UNICODE
)
1651 #if defined(HAVE_UNICODE)
1652 return Py_BuildValue("i", sre_lower_unicode(character
));
1654 return Py_BuildValue("i", sre_lower_locale(character
));
1656 return Py_BuildValue("i", sre_lower(character
));
1660 state_reset(SRE_STATE
* state
)
1662 /* FIXME: dynamic! */
1663 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
1665 state
->lastmark
= -1;
1666 state
->lastindex
= -1;
1668 state
->repeat
= NULL
;
1670 data_stack_dealloc(state
);
1674 getstring(PyObject
* string
, Py_ssize_t
* p_length
, int* p_charsize
)
1676 /* given a python object, return a data pointer, a length (in
1677 characters), and a character size. return NULL if the object
1678 is not a string (or not compatible) */
1680 PyBufferProcs
*buffer
;
1681 Py_ssize_t size
, bytes
;
1685 #if defined(HAVE_UNICODE)
1686 if (PyUnicode_Check(string
)) {
1687 /* unicode strings doesn't always support the buffer interface */
1688 ptr
= (void*) PyUnicode_AS_DATA(string
);
1689 bytes
= PyUnicode_GET_DATA_SIZE(string
);
1690 size
= PyUnicode_GET_SIZE(string
);
1691 charsize
= sizeof(Py_UNICODE
);
1696 /* get pointer to string buffer */
1697 buffer
= Py_TYPE(string
)->tp_as_buffer
;
1698 if (!buffer
|| !buffer
->bf_getreadbuffer
|| !buffer
->bf_getsegcount
||
1699 buffer
->bf_getsegcount(string
, NULL
) != 1) {
1700 PyErr_SetString(PyExc_TypeError
, "expected string or buffer");
1704 /* determine buffer size */
1705 bytes
= buffer
->bf_getreadbuffer(string
, 0, &ptr
);
1707 PyErr_SetString(PyExc_TypeError
, "buffer has negative size");
1711 /* determine character size */
1712 #if PY_VERSION_HEX >= 0x01060000
1713 size
= PyObject_Size(string
);
1715 size
= PyObject_Length(string
);
1718 if (PyString_Check(string
) || bytes
== size
)
1720 #if defined(HAVE_UNICODE)
1721 else if (bytes
== (Py_ssize_t
) (size
* sizeof(Py_UNICODE
)))
1722 charsize
= sizeof(Py_UNICODE
);
1725 PyErr_SetString(PyExc_TypeError
, "buffer size mismatch");
1729 #if defined(HAVE_UNICODE)
1734 *p_charsize
= charsize
;
1740 state_init(SRE_STATE
* state
, PatternObject
* pattern
, PyObject
* string
,
1741 Py_ssize_t start
, Py_ssize_t end
)
1743 /* prepare state object */
1749 memset(state
, 0, sizeof(SRE_STATE
));
1751 state
->lastmark
= -1;
1752 state
->lastindex
= -1;
1754 ptr
= getstring(string
, &length
, &charsize
);
1758 /* adjust boundaries */
1761 else if (start
> length
)
1766 else if (end
> length
)
1769 state
->charsize
= charsize
;
1771 state
->beginning
= ptr
;
1773 state
->start
= (void*) ((char*) ptr
+ start
* state
->charsize
);
1774 state
->end
= (void*) ((char*) ptr
+ end
* state
->charsize
);
1777 state
->string
= string
;
1779 state
->endpos
= end
;
1781 if (pattern
->flags
& SRE_FLAG_LOCALE
)
1782 state
->lower
= sre_lower_locale
;
1783 else if (pattern
->flags
& SRE_FLAG_UNICODE
)
1784 #if defined(HAVE_UNICODE)
1785 state
->lower
= sre_lower_unicode
;
1787 state
->lower
= sre_lower_locale
;
1790 state
->lower
= sre_lower
;
1796 state_fini(SRE_STATE
* state
)
1798 Py_XDECREF(state
->string
);
1799 data_stack_dealloc(state
);
1802 /* calculate offset from start of string */
1803 #define STATE_OFFSET(state, member)\
1804 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1807 state_getslice(SRE_STATE
* state
, Py_ssize_t index
, PyObject
* string
, int empty
)
1811 index
= (index
- 1) * 2;
1813 if (string
== Py_None
|| index
>= state
->lastmark
|| !state
->mark
[index
] || !state
->mark
[index
+1]) {
1815 /* want empty string */
1822 i
= STATE_OFFSET(state
, state
->mark
[index
]);
1823 j
= STATE_OFFSET(state
, state
->mark
[index
+1]);
1826 return PySequence_GetSlice(string
, i
, j
);
1830 pattern_error(int status
)
1833 case SRE_ERROR_RECURSION_LIMIT
:
1836 "maximum recursion limit exceeded"
1839 case SRE_ERROR_MEMORY
:
1842 case SRE_ERROR_INTERRUPTED
:
1843 /* An exception has already been raised, so let it fly */
1846 /* other error codes indicate compiler/engine bugs */
1849 "internal error in regular expression engine"
1855 pattern_dealloc(PatternObject
* self
)
1857 if (self
->weakreflist
!= NULL
)
1858 PyObject_ClearWeakRefs((PyObject
*) self
);
1859 Py_XDECREF(self
->pattern
);
1860 Py_XDECREF(self
->groupindex
);
1861 Py_XDECREF(self
->indexgroup
);
1866 pattern_match(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1872 Py_ssize_t start
= 0;
1873 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1874 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1875 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|nn:match", kwlist
,
1876 &string
, &start
, &end
))
1879 string
= state_init(&state
, self
, string
, start
, end
);
1883 state
.ptr
= state
.start
;
1885 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self
), state
.ptr
));
1887 if (state
.charsize
== 1) {
1888 status
= sre_match(&state
, PatternObject_GetCode(self
));
1890 #if defined(HAVE_UNICODE)
1891 status
= sre_umatch(&state
, PatternObject_GetCode(self
));
1895 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1896 if (PyErr_Occurred())
1901 return pattern_new_match(self
, &state
, status
);
1905 pattern_search(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1911 Py_ssize_t start
= 0;
1912 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1913 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1914 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|nn:search", kwlist
,
1915 &string
, &start
, &end
))
1918 string
= state_init(&state
, self
, string
, start
, end
);
1922 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self
), state
.ptr
));
1924 if (state
.charsize
== 1) {
1925 status
= sre_search(&state
, PatternObject_GetCode(self
));
1927 #if defined(HAVE_UNICODE)
1928 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
1932 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1936 if (PyErr_Occurred())
1939 return pattern_new_match(self
, &state
, status
);
1943 call(char* module
, char* function
, PyObject
* args
)
1952 name
= PyString_FromString(module
);
1955 mod
= PyImport_Import(name
);
1959 func
= PyObject_GetAttrString(mod
, function
);
1963 result
= PyObject_CallObject(func
, args
);
1969 #ifdef USE_BUILTIN_COPY
1971 deepcopy(PyObject
** object
, PyObject
* memo
)
1977 PyTuple_Pack(2, *object
, memo
)
1985 return 1; /* success */
1990 join_list(PyObject
* list
, PyObject
* string
)
1992 /* join list elements */
1995 #if PY_VERSION_HEX >= 0x01060000
2001 joiner
= PySequence_GetSlice(string
, 0, 0);
2005 if (PyList_GET_SIZE(list
) == 0) {
2010 #if PY_VERSION_HEX >= 0x01060000
2011 function
= PyObject_GetAttrString(joiner
, "join");
2016 args
= PyTuple_New(1);
2018 Py_DECREF(function
);
2022 PyTuple_SET_ITEM(args
, 0, list
);
2023 result
= PyObject_CallObject(function
, args
);
2024 Py_DECREF(args
); /* also removes list */
2025 Py_DECREF(function
);
2029 PyTuple_Pack(2, list
, joiner
)
2038 pattern_findall(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2046 Py_ssize_t start
= 0;
2047 Py_ssize_t end
= PY_SSIZE_T_MAX
;
2048 static char* kwlist
[] = { "source", "pos", "endpos", NULL
};
2049 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|nn:findall", kwlist
,
2050 &string
, &start
, &end
))
2053 string
= state_init(&state
, self
, string
, start
, end
);
2057 list
= PyList_New(0);
2063 while (state
.start
<= state
.end
) {
2067 state_reset(&state
);
2069 state
.ptr
= state
.start
;
2071 if (state
.charsize
== 1) {
2072 status
= sre_search(&state
, PatternObject_GetCode(self
));
2074 #if defined(HAVE_UNICODE)
2075 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2079 if (PyErr_Occurred())
2085 pattern_error(status
);
2089 /* don't bother to build a match object */
2090 switch (self
->groups
) {
2092 b
= STATE_OFFSET(&state
, state
.start
);
2093 e
= STATE_OFFSET(&state
, state
.ptr
);
2094 item
= PySequence_GetSlice(string
, b
, e
);
2099 item
= state_getslice(&state
, 1, string
, 1);
2104 item
= PyTuple_New(self
->groups
);
2107 for (i
= 0; i
< self
->groups
; i
++) {
2108 PyObject
* o
= state_getslice(&state
, i
+1, string
, 1);
2113 PyTuple_SET_ITEM(item
, i
, o
);
2118 status
= PyList_Append(list
, item
);
2123 if (state
.ptr
== state
.start
)
2124 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2126 state
.start
= state
.ptr
;
2140 #if PY_VERSION_HEX >= 0x02020000
2142 pattern_finditer(PatternObject
* pattern
, PyObject
* args
)
2148 scanner
= pattern_scanner(pattern
, args
);
2152 search
= PyObject_GetAttrString(scanner
, "search");
2157 iterator
= PyCallIter_New(search
, Py_None
);
2165 pattern_split(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2176 Py_ssize_t maxsplit
= 0;
2177 static char* kwlist
[] = { "source", "maxsplit", NULL
};
2178 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|n:split", kwlist
,
2179 &string
, &maxsplit
))
2182 string
= state_init(&state
, self
, string
, 0, PY_SSIZE_T_MAX
);
2186 list
= PyList_New(0);
2195 while (!maxsplit
|| n
< maxsplit
) {
2197 state_reset(&state
);
2199 state
.ptr
= state
.start
;
2201 if (state
.charsize
== 1) {
2202 status
= sre_search(&state
, PatternObject_GetCode(self
));
2204 #if defined(HAVE_UNICODE)
2205 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2209 if (PyErr_Occurred())
2215 pattern_error(status
);
2219 if (state
.start
== state
.ptr
) {
2220 if (last
== state
.end
)
2222 /* skip one character */
2223 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2227 /* get segment before this match */
2228 item
= PySequence_GetSlice(
2229 string
, STATE_OFFSET(&state
, last
),
2230 STATE_OFFSET(&state
, state
.start
)
2234 status
= PyList_Append(list
, item
);
2239 /* add groups (if any) */
2240 for (i
= 0; i
< self
->groups
; i
++) {
2241 item
= state_getslice(&state
, i
+1, string
, 0);
2244 status
= PyList_Append(list
, item
);
2252 last
= state
.start
= state
.ptr
;
2256 /* get segment following last match (even if empty) */
2257 item
= PySequence_GetSlice(
2258 string
, STATE_OFFSET(&state
, last
), state
.endpos
2262 status
= PyList_Append(list
, item
);
2278 pattern_subx(PatternObject
* self
, PyObject
* ptemplate
, PyObject
* string
,
2279 Py_ssize_t count
, Py_ssize_t subn
)
2292 int filter_is_callable
;
2294 if (PyCallable_Check(ptemplate
)) {
2295 /* sub/subn takes either a function or a template */
2298 filter_is_callable
= 1;
2300 /* if not callable, check if it's a literal string */
2302 ptr
= getstring(ptemplate
, &n
, &bint
);
2306 literal
= sre_literal_template((unsigned char *)ptr
, n
);
2308 #if defined(HAVE_UNICODE)
2309 literal
= sre_uliteral_template((Py_UNICODE
*)ptr
, n
);
2319 filter_is_callable
= 0;
2321 /* not a literal; hand it over to the template compiler */
2323 SRE_PY_MODULE
, "_subx",
2324 PyTuple_Pack(2, self
, ptemplate
)
2328 filter_is_callable
= PyCallable_Check(filter
);
2332 string
= state_init(&state
, self
, string
, 0, PY_SSIZE_T_MAX
);
2338 list
= PyList_New(0);
2347 while (!count
|| n
< count
) {
2349 state_reset(&state
);
2351 state
.ptr
= state
.start
;
2353 if (state
.charsize
== 1) {
2354 status
= sre_search(&state
, PatternObject_GetCode(self
));
2356 #if defined(HAVE_UNICODE)
2357 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2361 if (PyErr_Occurred())
2367 pattern_error(status
);
2371 b
= STATE_OFFSET(&state
, state
.start
);
2372 e
= STATE_OFFSET(&state
, state
.ptr
);
2375 /* get segment before this match */
2376 item
= PySequence_GetSlice(string
, i
, b
);
2379 status
= PyList_Append(list
, item
);
2384 } else if (i
== b
&& i
== e
&& n
> 0)
2385 /* ignore empty match on latest position */
2388 if (filter_is_callable
) {
2389 /* pass match object through filter */
2390 match
= pattern_new_match(self
, &state
, 1);
2393 args
= PyTuple_Pack(1, match
);
2398 item
= PyObject_CallObject(filter
, args
);
2404 /* filter is literal string */
2410 if (item
!= Py_None
) {
2411 status
= PyList_Append(list
, item
);
2422 if (state
.ptr
== state
.start
)
2423 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2425 state
.start
= state
.ptr
;
2429 /* get segment following last match */
2430 if (i
< state
.endpos
) {
2431 item
= PySequence_GetSlice(string
, i
, state
.endpos
);
2434 status
= PyList_Append(list
, item
);
2444 /* convert list to single string (also removes list) */
2445 item
= join_list(list
, string
);
2451 return Py_BuildValue("Ni", item
, n
);
2464 pattern_sub(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2466 PyObject
* ptemplate
;
2468 Py_ssize_t count
= 0;
2469 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2470 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|n:sub", kwlist
,
2471 &ptemplate
, &string
, &count
))
2474 return pattern_subx(self
, ptemplate
, string
, count
, 0);
2478 pattern_subn(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2480 PyObject
* ptemplate
;
2482 Py_ssize_t count
= 0;
2483 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2484 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|n:subn", kwlist
,
2485 &ptemplate
, &string
, &count
))
2488 return pattern_subx(self
, ptemplate
, string
, count
, 1);
2492 pattern_copy(PatternObject
* self
, PyObject
*unused
)
2494 #ifdef USE_BUILTIN_COPY
2495 PatternObject
* copy
;
2498 copy
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, self
->codesize
);
2502 offset
= offsetof(PatternObject
, groups
);
2504 Py_XINCREF(self
->groupindex
);
2505 Py_XINCREF(self
->indexgroup
);
2506 Py_XINCREF(self
->pattern
);
2508 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
2509 sizeof(PatternObject
) + self
->codesize
* sizeof(SRE_CODE
) - offset
);
2510 copy
->weakreflist
= NULL
;
2512 return (PyObject
*) copy
;
2514 PyErr_SetString(PyExc_TypeError
, "cannot copy this pattern object");
2520 pattern_deepcopy(PatternObject
* self
, PyObject
* memo
)
2522 #ifdef USE_BUILTIN_COPY
2523 PatternObject
* copy
;
2525 copy
= (PatternObject
*) pattern_copy(self
);
2529 if (!deepcopy(©
->groupindex
, memo
) ||
2530 !deepcopy(©
->indexgroup
, memo
) ||
2531 !deepcopy(©
->pattern
, memo
)) {
2537 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this pattern object");
2542 PyDoc_STRVAR(pattern_match_doc
,
2543 "match(string[, pos[, endpos]]) --> match object or None.\n\
2544 Matches zero or more characters at the beginning of the string");
2546 PyDoc_STRVAR(pattern_search_doc
,
2547 "search(string[, pos[, endpos]]) --> match object or None.\n\
2548 Scan through string looking for a match, and return a corresponding\n\
2549 MatchObject instance. Return None if no position in the string matches.");
2551 PyDoc_STRVAR(pattern_split_doc
,
2552 "split(string[, maxsplit = 0]) --> list.\n\
2553 Split string by the occurrences of pattern.");
2555 PyDoc_STRVAR(pattern_findall_doc
,
2556 "findall(string[, pos[, endpos]]) --> list.\n\
2557 Return a list of all non-overlapping matches of pattern in string.");
2559 PyDoc_STRVAR(pattern_finditer_doc
,
2560 "finditer(string[, pos[, endpos]]) --> iterator.\n\
2561 Return an iterator over all non-overlapping matches for the \n\
2562 RE pattern in string. For each match, the iterator returns a\n\
2565 PyDoc_STRVAR(pattern_sub_doc
,
2566 "sub(repl, string[, count = 0]) --> newstring\n\
2567 Return the string obtained by replacing the leftmost non-overlapping\n\
2568 occurrences of pattern in string by the replacement repl.");
2570 PyDoc_STRVAR(pattern_subn_doc
,
2571 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2572 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2573 the leftmost non-overlapping occurrences of pattern with the\n\
2574 replacement repl.");
2576 PyDoc_STRVAR(pattern_doc
, "Compiled regular expression objects");
2578 static PyMethodDef pattern_methods
[] = {
2579 {"match", (PyCFunction
) pattern_match
, METH_VARARGS
|METH_KEYWORDS
,
2581 {"search", (PyCFunction
) pattern_search
, METH_VARARGS
|METH_KEYWORDS
,
2582 pattern_search_doc
},
2583 {"sub", (PyCFunction
) pattern_sub
, METH_VARARGS
|METH_KEYWORDS
,
2585 {"subn", (PyCFunction
) pattern_subn
, METH_VARARGS
|METH_KEYWORDS
,
2587 {"split", (PyCFunction
) pattern_split
, METH_VARARGS
|METH_KEYWORDS
,
2589 {"findall", (PyCFunction
) pattern_findall
, METH_VARARGS
|METH_KEYWORDS
,
2590 pattern_findall_doc
},
2591 #if PY_VERSION_HEX >= 0x02020000
2592 {"finditer", (PyCFunction
) pattern_finditer
, METH_VARARGS
,
2593 pattern_finditer_doc
},
2595 {"scanner", (PyCFunction
) pattern_scanner
, METH_VARARGS
},
2596 {"__copy__", (PyCFunction
) pattern_copy
, METH_NOARGS
},
2597 {"__deepcopy__", (PyCFunction
) pattern_deepcopy
, METH_O
},
2602 pattern_getattr(PatternObject
* self
, char* name
)
2606 res
= Py_FindMethod(pattern_methods
, (PyObject
*) self
, name
);
2614 if (!strcmp(name
, "pattern")) {
2615 Py_INCREF(self
->pattern
);
2616 return self
->pattern
;
2619 if (!strcmp(name
, "flags"))
2620 return Py_BuildValue("i", self
->flags
);
2622 if (!strcmp(name
, "groups"))
2623 return Py_BuildValue("i", self
->groups
);
2625 if (!strcmp(name
, "groupindex") && self
->groupindex
) {
2626 Py_INCREF(self
->groupindex
);
2627 return self
->groupindex
;
2630 PyErr_SetString(PyExc_AttributeError
, name
);
2634 statichere PyTypeObject Pattern_Type
= {
2635 PyObject_HEAD_INIT(NULL
)
2636 0, "_" SRE_MODULE
".SRE_Pattern",
2637 sizeof(PatternObject
), sizeof(SRE_CODE
),
2638 (destructor
)pattern_dealloc
, /*tp_dealloc*/
2640 (getattrfunc
)pattern_getattr
, /*tp_getattr*/
2644 0, /* tp_as_number */
2645 0, /* tp_as_sequence */
2646 0, /* tp_as_mapping */
2650 0, /* tp_getattro */
2651 0, /* tp_setattro */
2652 0, /* tp_as_buffer */
2653 Py_TPFLAGS_HAVE_WEAKREFS
, /* tp_flags */
2654 pattern_doc
, /* tp_doc */
2655 0, /* tp_traverse */
2657 0, /* tp_richcompare */
2658 offsetof(PatternObject
, weakreflist
), /* tp_weaklistoffset */
2662 _compile(PyObject
* self_
, PyObject
* args
)
2664 /* "compile" pattern descriptor to pattern object */
2666 PatternObject
* self
;
2672 Py_ssize_t groups
= 0;
2673 PyObject
* groupindex
= NULL
;
2674 PyObject
* indexgroup
= NULL
;
2675 if (!PyArg_ParseTuple(args
, "OiO!|nOO", &pattern
, &flags
,
2676 &PyList_Type
, &code
, &groups
,
2677 &groupindex
, &indexgroup
))
2680 n
= PyList_GET_SIZE(code
);
2681 /* coverity[ampersand_in_size] */
2682 self
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, n
);
2688 for (i
= 0; i
< n
; i
++) {
2689 PyObject
*o
= PyList_GET_ITEM(code
, i
);
2690 unsigned long value
= PyInt_Check(o
) ? (unsigned long)PyInt_AsLong(o
)
2691 : PyLong_AsUnsignedLong(o
);
2692 self
->code
[i
] = (SRE_CODE
) value
;
2693 if ((unsigned long) self
->code
[i
] != value
) {
2694 PyErr_SetString(PyExc_OverflowError
,
2695 "regular expression code size limit exceeded");
2700 if (PyErr_Occurred()) {
2706 self
->pattern
= pattern
;
2708 self
->flags
= flags
;
2710 self
->groups
= groups
;
2712 Py_XINCREF(groupindex
);
2713 self
->groupindex
= groupindex
;
2715 Py_XINCREF(indexgroup
);
2716 self
->indexgroup
= indexgroup
;
2718 self
->weakreflist
= NULL
;
2720 return (PyObject
*) self
;
2723 /* -------------------------------------------------------------------- */
2727 match_dealloc(MatchObject
* self
)
2729 Py_XDECREF(self
->regs
);
2730 Py_XDECREF(self
->string
);
2731 Py_DECREF(self
->pattern
);
2736 match_getslice_by_index(MatchObject
* self
, Py_ssize_t index
, PyObject
* def
)
2738 if (index
< 0 || index
>= self
->groups
) {
2739 /* raise IndexError if we were given a bad group number */
2749 if (self
->string
== Py_None
|| self
->mark
[index
] < 0) {
2750 /* return default value if the string or group is undefined */
2755 return PySequence_GetSlice(
2756 self
->string
, self
->mark
[index
], self
->mark
[index
+1]
2761 match_getindex(MatchObject
* self
, PyObject
* index
)
2765 if (PyInt_Check(index
))
2766 return PyInt_AsSsize_t(index
);
2770 if (self
->pattern
->groupindex
) {
2771 index
= PyObject_GetItem(self
->pattern
->groupindex
, index
);
2773 if (PyInt_Check(index
) || PyLong_Check(index
))
2774 i
= PyInt_AsSsize_t(index
);
2784 match_getslice(MatchObject
* self
, PyObject
* index
, PyObject
* def
)
2786 return match_getslice_by_index(self
, match_getindex(self
, index
), def
);
2790 match_expand(MatchObject
* self
, PyObject
* ptemplate
)
2792 /* delegate to Python code */
2794 SRE_PY_MODULE
, "_expand",
2795 PyTuple_Pack(3, self
->pattern
, self
, ptemplate
)
2800 match_group(MatchObject
* self
, PyObject
* args
)
2805 size
= PyTuple_GET_SIZE(args
);
2809 result
= match_getslice(self
, Py_False
, Py_None
);
2812 result
= match_getslice(self
, PyTuple_GET_ITEM(args
, 0), Py_None
);
2815 /* fetch multiple items */
2816 result
= PyTuple_New(size
);
2819 for (i
= 0; i
< size
; i
++) {
2820 PyObject
* item
= match_getslice(
2821 self
, PyTuple_GET_ITEM(args
, i
), Py_None
2827 PyTuple_SET_ITEM(result
, i
, item
);
2835 match_groups(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2840 PyObject
* def
= Py_None
;
2841 static char* kwlist
[] = { "default", NULL
};
2842 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groups", kwlist
, &def
))
2845 result
= PyTuple_New(self
->groups
-1);
2849 for (index
= 1; index
< self
->groups
; index
++) {
2851 item
= match_getslice_by_index(self
, index
, def
);
2856 PyTuple_SET_ITEM(result
, index
-1, item
);
2863 match_groupdict(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2869 PyObject
* def
= Py_None
;
2870 static char* kwlist
[] = { "default", NULL
};
2871 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groupdict", kwlist
, &def
))
2874 result
= PyDict_New();
2875 if (!result
|| !self
->pattern
->groupindex
)
2878 keys
= PyMapping_Keys(self
->pattern
->groupindex
);
2882 for (index
= 0; index
< PyList_GET_SIZE(keys
); index
++) {
2886 key
= PyList_GET_ITEM(keys
, index
);
2889 value
= match_getslice(self
, key
, def
);
2894 status
= PyDict_SetItem(result
, key
, value
);
2911 match_start(MatchObject
* self
, PyObject
* args
)
2915 PyObject
* index_
= Py_False
; /* zero */
2916 if (!PyArg_UnpackTuple(args
, "start", 0, 1, &index_
))
2919 index
= match_getindex(self
, index_
);
2921 if (index
< 0 || index
>= self
->groups
) {
2929 /* mark is -1 if group is undefined */
2930 return Py_BuildValue("i", self
->mark
[index
*2]);
2934 match_end(MatchObject
* self
, PyObject
* args
)
2938 PyObject
* index_
= Py_False
; /* zero */
2939 if (!PyArg_UnpackTuple(args
, "end", 0, 1, &index_
))
2942 index
= match_getindex(self
, index_
);
2944 if (index
< 0 || index
>= self
->groups
) {
2952 /* mark is -1 if group is undefined */
2953 return Py_BuildValue("i", self
->mark
[index
*2+1]);
2957 _pair(Py_ssize_t i1
, Py_ssize_t i2
)
2962 pair
= PyTuple_New(2);
2966 item
= PyInt_FromSsize_t(i1
);
2969 PyTuple_SET_ITEM(pair
, 0, item
);
2971 item
= PyInt_FromSsize_t(i2
);
2974 PyTuple_SET_ITEM(pair
, 1, item
);
2984 match_span(MatchObject
* self
, PyObject
* args
)
2988 PyObject
* index_
= Py_False
; /* zero */
2989 if (!PyArg_UnpackTuple(args
, "span", 0, 1, &index_
))
2992 index
= match_getindex(self
, index_
);
2994 if (index
< 0 || index
>= self
->groups
) {
3002 /* marks are -1 if group is undefined */
3003 return _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
3007 match_regs(MatchObject
* self
)
3013 regs
= PyTuple_New(self
->groups
);
3017 for (index
= 0; index
< self
->groups
; index
++) {
3018 item
= _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
3023 PyTuple_SET_ITEM(regs
, index
, item
);
3033 match_copy(MatchObject
* self
, PyObject
*unused
)
3035 #ifdef USE_BUILTIN_COPY
3037 Py_ssize_t slots
, offset
;
3039 slots
= 2 * (self
->pattern
->groups
+1);
3041 copy
= PyObject_NEW_VAR(MatchObject
, &Match_Type
, slots
);
3045 /* this value a constant, but any compiler should be able to
3046 figure that out all by itself */
3047 offset
= offsetof(MatchObject
, string
);
3049 Py_XINCREF(self
->pattern
);
3050 Py_XINCREF(self
->string
);
3051 Py_XINCREF(self
->regs
);
3053 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
3054 sizeof(MatchObject
) + slots
* sizeof(Py_ssize_t
) - offset
);
3056 return (PyObject
*) copy
;
3058 PyErr_SetString(PyExc_TypeError
, "cannot copy this match object");
3064 match_deepcopy(MatchObject
* self
, PyObject
* memo
)
3066 #ifdef USE_BUILTIN_COPY
3069 copy
= (MatchObject
*) match_copy(self
);
3073 if (!deepcopy((PyObject
**) ©
->pattern
, memo
) ||
3074 !deepcopy(©
->string
, memo
) ||
3075 !deepcopy(©
->regs
, memo
)) {
3081 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this match object");
3086 static PyMethodDef match_methods
[] = {
3087 {"group", (PyCFunction
) match_group
, METH_VARARGS
},
3088 {"start", (PyCFunction
) match_start
, METH_VARARGS
},
3089 {"end", (PyCFunction
) match_end
, METH_VARARGS
},
3090 {"span", (PyCFunction
) match_span
, METH_VARARGS
},
3091 {"groups", (PyCFunction
) match_groups
, METH_VARARGS
|METH_KEYWORDS
},
3092 {"groupdict", (PyCFunction
) match_groupdict
, METH_VARARGS
|METH_KEYWORDS
},
3093 {"expand", (PyCFunction
) match_expand
, METH_O
},
3094 {"__copy__", (PyCFunction
) match_copy
, METH_NOARGS
},
3095 {"__deepcopy__", (PyCFunction
) match_deepcopy
, METH_O
},
3100 match_getattr(MatchObject
* self
, char* name
)
3104 res
= Py_FindMethod(match_methods
, (PyObject
*) self
, name
);
3110 if (!strcmp(name
, "lastindex")) {
3111 if (self
->lastindex
>= 0)
3112 return Py_BuildValue("i", self
->lastindex
);
3117 if (!strcmp(name
, "lastgroup")) {
3118 if (self
->pattern
->indexgroup
&& self
->lastindex
>= 0) {
3119 PyObject
* result
= PySequence_GetItem(
3120 self
->pattern
->indexgroup
, self
->lastindex
3130 if (!strcmp(name
, "string")) {
3132 Py_INCREF(self
->string
);
3133 return self
->string
;
3140 if (!strcmp(name
, "regs")) {
3142 Py_INCREF(self
->regs
);
3145 return match_regs(self
);
3148 if (!strcmp(name
, "re")) {
3149 Py_INCREF(self
->pattern
);
3150 return (PyObject
*) self
->pattern
;
3153 if (!strcmp(name
, "pos"))
3154 return Py_BuildValue("i", self
->pos
);
3156 if (!strcmp(name
, "endpos"))
3157 return Py_BuildValue("i", self
->endpos
);
3159 PyErr_SetString(PyExc_AttributeError
, name
);
3163 /* FIXME: implement setattr("string", None) as a special case (to
3164 detach the associated string, if any */
3166 statichere PyTypeObject Match_Type
= {
3167 PyObject_HEAD_INIT(NULL
)
3168 0, "_" SRE_MODULE
".SRE_Match",
3169 sizeof(MatchObject
), sizeof(Py_ssize_t
),
3170 (destructor
)match_dealloc
, /*tp_dealloc*/
3172 (getattrfunc
)match_getattr
/*tp_getattr*/
3176 pattern_new_match(PatternObject
* pattern
, SRE_STATE
* state
, int status
)
3178 /* create match object (from state object) */
3187 /* create match object (with room for extra group marks) */
3188 /* coverity[ampersand_in_size] */
3189 match
= PyObject_NEW_VAR(MatchObject
, &Match_Type
,
3190 2*(pattern
->groups
+1));
3195 match
->pattern
= pattern
;
3197 Py_INCREF(state
->string
);
3198 match
->string
= state
->string
;
3201 match
->groups
= pattern
->groups
+1;
3203 /* fill in group slices */
3205 base
= (char*) state
->beginning
;
3206 n
= state
->charsize
;
3208 match
->mark
[0] = ((char*) state
->start
- base
) / n
;
3209 match
->mark
[1] = ((char*) state
->ptr
- base
) / n
;
3211 for (i
= j
= 0; i
< pattern
->groups
; i
++, j
+=2)
3212 if (j
+1 <= state
->lastmark
&& state
->mark
[j
] && state
->mark
[j
+1]) {
3213 match
->mark
[j
+2] = ((char*) state
->mark
[j
] - base
) / n
;
3214 match
->mark
[j
+3] = ((char*) state
->mark
[j
+1] - base
) / n
;
3216 match
->mark
[j
+2] = match
->mark
[j
+3] = -1; /* undefined */
3218 match
->pos
= state
->pos
;
3219 match
->endpos
= state
->endpos
;
3221 match
->lastindex
= state
->lastindex
;
3223 return (PyObject
*) match
;
3225 } else if (status
== 0) {
3233 /* internal error */
3234 pattern_error(status
);
3239 /* -------------------------------------------------------------------- */
3240 /* scanner methods (experimental) */
3243 scanner_dealloc(ScannerObject
* self
)
3245 state_fini(&self
->state
);
3246 Py_DECREF(self
->pattern
);
3251 scanner_match(ScannerObject
* self
, PyObject
*unused
)
3253 SRE_STATE
* state
= &self
->state
;
3259 state
->ptr
= state
->start
;
3261 if (state
->charsize
== 1) {
3262 status
= sre_match(state
, PatternObject_GetCode(self
->pattern
));
3264 #if defined(HAVE_UNICODE)
3265 status
= sre_umatch(state
, PatternObject_GetCode(self
->pattern
));
3268 if (PyErr_Occurred())
3271 match
= pattern_new_match((PatternObject
*) self
->pattern
,
3274 if (status
== 0 || state
->ptr
== state
->start
)
3275 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
3277 state
->start
= state
->ptr
;
3284 scanner_search(ScannerObject
* self
, PyObject
*unused
)
3286 SRE_STATE
* state
= &self
->state
;
3292 state
->ptr
= state
->start
;
3294 if (state
->charsize
== 1) {
3295 status
= sre_search(state
, PatternObject_GetCode(self
->pattern
));
3297 #if defined(HAVE_UNICODE)
3298 status
= sre_usearch(state
, PatternObject_GetCode(self
->pattern
));
3301 if (PyErr_Occurred())
3304 match
= pattern_new_match((PatternObject
*) self
->pattern
,
3307 if (status
== 0 || state
->ptr
== state
->start
)
3308 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
3310 state
->start
= state
->ptr
;
3315 static PyMethodDef scanner_methods
[] = {
3316 {"match", (PyCFunction
) scanner_match
, METH_NOARGS
},
3317 {"search", (PyCFunction
) scanner_search
, METH_NOARGS
},
3322 scanner_getattr(ScannerObject
* self
, char* name
)
3326 res
= Py_FindMethod(scanner_methods
, (PyObject
*) self
, name
);
3333 if (!strcmp(name
, "pattern")) {
3334 Py_INCREF(self
->pattern
);
3335 return self
->pattern
;
3338 PyErr_SetString(PyExc_AttributeError
, name
);
3342 statichere PyTypeObject Scanner_Type
= {
3343 PyObject_HEAD_INIT(NULL
)
3344 0, "_" SRE_MODULE
".SRE_Scanner",
3345 sizeof(ScannerObject
), 0,
3346 (destructor
)scanner_dealloc
, /*tp_dealloc*/
3348 (getattrfunc
)scanner_getattr
, /*tp_getattr*/
3352 pattern_scanner(PatternObject
* pattern
, PyObject
* args
)
3354 /* create search state object */
3356 ScannerObject
* self
;
3359 Py_ssize_t start
= 0;
3360 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3361 if (!PyArg_ParseTuple(args
, "O|nn:scanner", &string
, &start
, &end
))
3364 /* create scanner object */
3365 self
= PyObject_NEW(ScannerObject
, &Scanner_Type
);
3369 string
= state_init(&self
->state
, pattern
, string
, start
, end
);
3376 self
->pattern
= (PyObject
*) pattern
;
3378 return (PyObject
*) self
;
3381 static PyMethodDef _functions
[] = {
3382 {"compile", _compile
, METH_VARARGS
},
3383 {"getcodesize", sre_codesize
, METH_NOARGS
},
3384 {"getlower", sre_getlower
, METH_VARARGS
},
3388 #if PY_VERSION_HEX < 0x02030000
3389 DL_EXPORT(void) init_sre(void)
3391 PyMODINIT_FUNC
init_sre(void)
3398 /* Patch object types */
3399 Pattern_Type
.ob_type
= Match_Type
.ob_type
=
3400 Scanner_Type
.ob_type
= &PyType_Type
;
3402 m
= Py_InitModule("_" SRE_MODULE
, _functions
);
3405 d
= PyModule_GetDict(m
);
3407 x
= PyInt_FromLong(SRE_MAGIC
);
3409 PyDict_SetItemString(d
, "MAGIC", x
);
3413 x
= PyInt_FromLong(sizeof(SRE_CODE
));
3415 PyDict_SetItemString(d
, "CODESIZE", x
);
3419 x
= PyString_FromString(copyright
);
3421 PyDict_SetItemString(d
, "copyright", x
);
3426 #endif /* !defined(SRE_RECURSIVE) */