2 * Secret Labs' Regular Expression Engine
4 * regular expression matching engine
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
26 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
28 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
32 * Portions of this engine have been developed in cooperation with
33 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
34 * other compatibility work.
39 static char copyright
[] =
40 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
42 #define PY_SSIZE_T_CLEAN
45 #include "structmember.h" /* offsetof */
51 /* name of this module, minus the leading underscore */
52 #if !defined(SRE_MODULE)
53 #define SRE_MODULE "sre"
56 #define SRE_PY_MODULE "re"
58 /* defining this one enables tracing */
61 #if PY_VERSION_HEX >= 0x01060000
62 #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
63 /* defining this enables unicode support (default under 1.6a1 and later) */
68 /* -------------------------------------------------------------------- */
69 /* optional features */
71 /* enables fast searching */
72 #define USE_FAST_SEARCH
74 /* enables aggressive inlining (always on for Visual C) */
77 /* enables copy/deepcopy handling (work in progress) */
78 #undef USE_BUILTIN_COPY
80 #if PY_VERSION_HEX < 0x01060000
81 #define PyObject_DEL(op) PyMem_DEL((op))
84 /* -------------------------------------------------------------------- */
87 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
88 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
89 /* fastest possible local call under MSVC */
90 #define LOCAL(type) static __inline type __fastcall
91 #elif defined(USE_INLINE)
92 #define LOCAL(type) static inline type
94 #define LOCAL(type) static type
98 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
99 #define SRE_ERROR_STATE -2 /* illegal state */
100 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
101 #define SRE_ERROR_MEMORY -9 /* out of memory */
104 #define TRACE(v) printf v
109 /* -------------------------------------------------------------------- */
110 /* search engine state */
112 /* default character predicates (run sre_chars.py to regenerate tables) */
114 #define SRE_DIGIT_MASK 1
115 #define SRE_SPACE_MASK 2
116 #define SRE_LINEBREAK_MASK 4
117 #define SRE_ALNUM_MASK 8
118 #define SRE_WORD_MASK 16
120 /* FIXME: this assumes ASCII. create tables in init_sre() instead */
122 static char sre_char_info
[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
123 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
125 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
126 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
127 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
128 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
130 static char sre_char_lower
[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
131 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
132 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
133 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
134 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
135 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
136 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
137 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
138 120, 121, 122, 123, 124, 125, 126, 127 };
140 #define SRE_IS_DIGIT(ch)\
141 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
142 #define SRE_IS_SPACE(ch)\
143 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
144 #define SRE_IS_LINEBREAK(ch)\
145 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
146 #define SRE_IS_ALNUM(ch)\
147 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
148 #define SRE_IS_WORD(ch)\
149 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
151 static unsigned int sre_lower(unsigned int ch
)
153 return ((ch
) < 128 ? (unsigned int)sre_char_lower
[ch
] : ch
);
156 /* locale-specific character predicates */
157 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
158 * warnings when c's type supports only numbers < N+1 */
159 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
160 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
161 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
162 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
163 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
165 static unsigned int sre_lower_locale(unsigned int ch
)
167 return ((ch
) < 256 ? (unsigned int)tolower((ch
)) : ch
);
170 /* unicode-specific character predicates */
172 #if defined(HAVE_UNICODE)
174 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
175 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
176 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
177 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
178 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
180 static unsigned int sre_lower_unicode(unsigned int ch
)
182 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE
)(ch
));
188 sre_category(SRE_CODE category
, unsigned int ch
)
192 case SRE_CATEGORY_DIGIT
:
193 return SRE_IS_DIGIT(ch
);
194 case SRE_CATEGORY_NOT_DIGIT
:
195 return !SRE_IS_DIGIT(ch
);
196 case SRE_CATEGORY_SPACE
:
197 return SRE_IS_SPACE(ch
);
198 case SRE_CATEGORY_NOT_SPACE
:
199 return !SRE_IS_SPACE(ch
);
200 case SRE_CATEGORY_WORD
:
201 return SRE_IS_WORD(ch
);
202 case SRE_CATEGORY_NOT_WORD
:
203 return !SRE_IS_WORD(ch
);
204 case SRE_CATEGORY_LINEBREAK
:
205 return SRE_IS_LINEBREAK(ch
);
206 case SRE_CATEGORY_NOT_LINEBREAK
:
207 return !SRE_IS_LINEBREAK(ch
);
209 case SRE_CATEGORY_LOC_WORD
:
210 return SRE_LOC_IS_WORD(ch
);
211 case SRE_CATEGORY_LOC_NOT_WORD
:
212 return !SRE_LOC_IS_WORD(ch
);
214 #if defined(HAVE_UNICODE)
215 case SRE_CATEGORY_UNI_DIGIT
:
216 return SRE_UNI_IS_DIGIT(ch
);
217 case SRE_CATEGORY_UNI_NOT_DIGIT
:
218 return !SRE_UNI_IS_DIGIT(ch
);
219 case SRE_CATEGORY_UNI_SPACE
:
220 return SRE_UNI_IS_SPACE(ch
);
221 case SRE_CATEGORY_UNI_NOT_SPACE
:
222 return !SRE_UNI_IS_SPACE(ch
);
223 case SRE_CATEGORY_UNI_WORD
:
224 return SRE_UNI_IS_WORD(ch
);
225 case SRE_CATEGORY_UNI_NOT_WORD
:
226 return !SRE_UNI_IS_WORD(ch
);
227 case SRE_CATEGORY_UNI_LINEBREAK
:
228 return SRE_UNI_IS_LINEBREAK(ch
);
229 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
230 return !SRE_UNI_IS_LINEBREAK(ch
);
232 case SRE_CATEGORY_UNI_DIGIT
:
233 return SRE_IS_DIGIT(ch
);
234 case SRE_CATEGORY_UNI_NOT_DIGIT
:
235 return !SRE_IS_DIGIT(ch
);
236 case SRE_CATEGORY_UNI_SPACE
:
237 return SRE_IS_SPACE(ch
);
238 case SRE_CATEGORY_UNI_NOT_SPACE
:
239 return !SRE_IS_SPACE(ch
);
240 case SRE_CATEGORY_UNI_WORD
:
241 return SRE_LOC_IS_WORD(ch
);
242 case SRE_CATEGORY_UNI_NOT_WORD
:
243 return !SRE_LOC_IS_WORD(ch
);
244 case SRE_CATEGORY_UNI_LINEBREAK
:
245 return SRE_IS_LINEBREAK(ch
);
246 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
247 return !SRE_IS_LINEBREAK(ch
);
256 data_stack_dealloc(SRE_STATE
* state
)
258 if (state
->data_stack
) {
259 PyMem_FREE(state
->data_stack
);
260 state
->data_stack
= NULL
;
262 state
->data_stack_size
= state
->data_stack_base
= 0;
266 data_stack_grow(SRE_STATE
* state
, Py_ssize_t size
)
268 Py_ssize_t minsize
, cursize
;
269 minsize
= state
->data_stack_base
+size
;
270 cursize
= state
->data_stack_size
;
271 if (cursize
< minsize
) {
273 cursize
= minsize
+minsize
/4+1024;
274 TRACE(("allocate/grow stack %d\n", cursize
));
275 stack
= PyMem_REALLOC(state
->data_stack
, cursize
);
277 data_stack_dealloc(state
);
278 return SRE_ERROR_MEMORY
;
280 state
->data_stack
= (char *)stack
;
281 state
->data_stack_size
= cursize
;
286 /* generate 8-bit version */
288 #define SRE_CHAR unsigned char
289 #define SRE_AT sre_at
290 #define SRE_COUNT sre_count
291 #define SRE_CHARSET sre_charset
292 #define SRE_INFO sre_info
293 #define SRE_MATCH sre_match
294 #define SRE_MATCH_CONTEXT sre_match_context
295 #define SRE_SEARCH sre_search
296 #define SRE_LITERAL_TEMPLATE sre_literal_template
298 #if defined(HAVE_UNICODE)
300 #define SRE_RECURSIVE
304 #undef SRE_LITERAL_TEMPLATE
307 #undef SRE_MATCH_CONTEXT
314 /* generate 16-bit unicode version */
316 #define SRE_CHAR Py_UNICODE
317 #define SRE_AT sre_uat
318 #define SRE_COUNT sre_ucount
319 #define SRE_CHARSET sre_ucharset
320 #define SRE_INFO sre_uinfo
321 #define SRE_MATCH sre_umatch
322 #define SRE_MATCH_CONTEXT sre_umatch_context
323 #define SRE_SEARCH sre_usearch
324 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
327 #endif /* SRE_RECURSIVE */
329 /* -------------------------------------------------------------------- */
330 /* String matching engine */
332 /* the following section is compiled twice, with different character
336 SRE_AT(SRE_STATE
* state
, SRE_CHAR
* ptr
, SRE_CODE at
)
338 /* check if pointer is at given position */
340 Py_ssize_t thisp
, thatp
;
344 case SRE_AT_BEGINNING
:
345 case SRE_AT_BEGINNING_STRING
:
346 return ((void*) ptr
== state
->beginning
);
348 case SRE_AT_BEGINNING_LINE
:
349 return ((void*) ptr
== state
->beginning
||
350 SRE_IS_LINEBREAK((int) ptr
[-1]));
353 return (((void*) (ptr
+1) == state
->end
&&
354 SRE_IS_LINEBREAK((int) ptr
[0])) ||
355 ((void*) ptr
== state
->end
));
357 case SRE_AT_END_LINE
:
358 return ((void*) ptr
== state
->end
||
359 SRE_IS_LINEBREAK((int) ptr
[0]));
361 case SRE_AT_END_STRING
:
362 return ((void*) ptr
== state
->end
);
364 case SRE_AT_BOUNDARY
:
365 if (state
->beginning
== state
->end
)
367 thatp
= ((void*) ptr
> state
->beginning
) ?
368 SRE_IS_WORD((int) ptr
[-1]) : 0;
369 thisp
= ((void*) ptr
< state
->end
) ?
370 SRE_IS_WORD((int) ptr
[0]) : 0;
371 return thisp
!= thatp
;
373 case SRE_AT_NON_BOUNDARY
:
374 if (state
->beginning
== state
->end
)
376 thatp
= ((void*) ptr
> state
->beginning
) ?
377 SRE_IS_WORD((int) ptr
[-1]) : 0;
378 thisp
= ((void*) ptr
< state
->end
) ?
379 SRE_IS_WORD((int) ptr
[0]) : 0;
380 return thisp
== thatp
;
382 case SRE_AT_LOC_BOUNDARY
:
383 if (state
->beginning
== state
->end
)
385 thatp
= ((void*) ptr
> state
->beginning
) ?
386 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
387 thisp
= ((void*) ptr
< state
->end
) ?
388 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
389 return thisp
!= thatp
;
391 case SRE_AT_LOC_NON_BOUNDARY
:
392 if (state
->beginning
== state
->end
)
394 thatp
= ((void*) ptr
> state
->beginning
) ?
395 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
396 thisp
= ((void*) ptr
< state
->end
) ?
397 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
398 return thisp
== thatp
;
400 #if defined(HAVE_UNICODE)
401 case SRE_AT_UNI_BOUNDARY
:
402 if (state
->beginning
== state
->end
)
404 thatp
= ((void*) ptr
> state
->beginning
) ?
405 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
406 thisp
= ((void*) ptr
< state
->end
) ?
407 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
408 return thisp
!= thatp
;
410 case SRE_AT_UNI_NON_BOUNDARY
:
411 if (state
->beginning
== state
->end
)
413 thatp
= ((void*) ptr
> state
->beginning
) ?
414 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
415 thisp
= ((void*) ptr
< state
->end
) ?
416 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
417 return thisp
== thatp
;
426 SRE_CHARSET(SRE_CODE
* set
, SRE_CODE ch
)
428 /* check if character is a member of the given set */
439 /* <LITERAL> <code> */
445 case SRE_OP_CATEGORY
:
446 /* <CATEGORY> <code> */
447 if (sre_category(set
[0], (int) ch
))
453 if (sizeof(SRE_CODE
) == 2) {
454 /* <CHARSET> <bitmap> (16 bits per code word) */
455 if (ch
< 256 && (set
[ch
>> 4] & (1 << (ch
& 15))))
460 /* <CHARSET> <bitmap> (32 bits per code word) */
461 if (ch
< 256 && (set
[ch
>> 5] & (1 << (ch
& 31))))
468 /* <RANGE> <lower> <upper> */
469 if (set
[0] <= ch
&& ch
<= set
[1])
478 case SRE_OP_BIGCHARSET
:
479 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
481 Py_ssize_t count
, block
;
484 if (sizeof(SRE_CODE
) == 2) {
485 block
= ((unsigned char*)set
)[ch
>> 8];
487 if (set
[block
*16 + ((ch
& 255)>>4)] & (1 << (ch
& 15)))
492 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
493 * warnings when c's type supports only numbers < N+1 */
495 block
= ((unsigned char*)set
)[ch
>> 8];
500 (set
[block
*8 + ((ch
& 255)>>5)] & (1 << (ch
& 31))))
508 /* internal error -- there's not much we can do about it
509 here, so let's just pretend it didn't match... */
515 LOCAL(Py_ssize_t
) SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
);
518 SRE_COUNT(SRE_STATE
* state
, SRE_CODE
* pattern
, Py_ssize_t maxcount
)
521 SRE_CHAR
* ptr
= (SRE_CHAR
*)state
->ptr
;
522 SRE_CHAR
* end
= (SRE_CHAR
*)state
->end
;
526 if (maxcount
< end
- ptr
&& maxcount
!= 65535)
527 end
= ptr
+ maxcount
;
529 switch (pattern
[0]) {
533 TRACE(("|%p|%p|COUNT IN\n", pattern
, ptr
));
534 while (ptr
< end
&& SRE_CHARSET(pattern
+ 2, *ptr
))
539 /* repeated dot wildcard. */
540 TRACE(("|%p|%p|COUNT ANY\n", pattern
, ptr
));
541 while (ptr
< end
&& !SRE_IS_LINEBREAK(*ptr
))
546 /* repeated dot wildcard. skip to the end of the target
547 string, and backtrack from there */
548 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern
, ptr
));
553 /* repeated literal */
555 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern
, ptr
, chr
));
556 while (ptr
< end
&& (SRE_CODE
) *ptr
== chr
)
560 case SRE_OP_LITERAL_IGNORE
:
561 /* repeated literal */
563 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
564 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) == chr
)
568 case SRE_OP_NOT_LITERAL
:
569 /* repeated non-literal */
571 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern
, ptr
, chr
));
572 while (ptr
< end
&& (SRE_CODE
) *ptr
!= chr
)
576 case SRE_OP_NOT_LITERAL_IGNORE
:
577 /* repeated non-literal */
579 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
580 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) != chr
)
585 /* repeated single character pattern */
586 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern
, ptr
));
587 while ((SRE_CHAR
*) state
->ptr
< end
) {
588 i
= SRE_MATCH(state
, pattern
);
594 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
,
595 (SRE_CHAR
*) state
->ptr
- ptr
));
596 return (SRE_CHAR
*) state
->ptr
- ptr
;
599 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
, ptr
- (SRE_CHAR
*) state
->ptr
));
600 return ptr
- (SRE_CHAR
*) state
->ptr
;
603 #if 0 /* not used in this release */
605 SRE_INFO(SRE_STATE
* state
, SRE_CODE
* pattern
)
607 /* check if an SRE_OP_INFO block matches at the current position.
608 returns the number of SRE_CODE objects to skip if successful, 0
611 SRE_CHAR
* end
= state
->end
;
612 SRE_CHAR
* ptr
= state
->ptr
;
615 /* check minimal length */
616 if (pattern
[3] && (end
- ptr
) < pattern
[3])
619 /* check known prefix */
620 if (pattern
[2] & SRE_INFO_PREFIX
&& pattern
[5] > 1) {
621 /* <length> <skip> <prefix data> <overlap data> */
622 for (i
= 0; i
< pattern
[5]; i
++)
623 if ((SRE_CODE
) ptr
[i
] != pattern
[7 + i
])
625 return pattern
[0] + 2 * pattern
[6];
631 /* The macros below should be used to protect recursive SRE_MATCH()
632 * calls that *failed* and do *not* return immediately (IOW, those
633 * that will backtrack). Explaining:
635 * - Recursive SRE_MATCH() returned true: that's usually a success
636 * (besides atypical cases like ASSERT_NOT), therefore there's no
637 * reason to restore lastmark;
639 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
640 * is returning to the caller: If the current SRE_MATCH() is the
641 * top function of the recursion, returning false will be a matching
642 * failure, and it doesn't matter where lastmark is pointing to.
643 * If it's *not* the top function, it will be a recursive SRE_MATCH()
644 * failure by itself, and the calling SRE_MATCH() will have to deal
645 * with the failure by the same rules explained here (it will restore
646 * lastmark by itself if necessary);
648 * - Recursive SRE_MATCH() returned false, and will continue the
649 * outside 'for' loop: must be protected when breaking, since the next
650 * OP could potentially depend on lastmark;
652 * - Recursive SRE_MATCH() returned false, and will be called again
653 * inside a local for/while loop: must be protected between each
654 * loop iteration, since the recursive SRE_MATCH() could do anything,
655 * and could potentially depend on lastmark.
657 * For more information, check the discussion at SF patch #712900.
659 #define LASTMARK_SAVE() \
661 ctx->lastmark = state->lastmark; \
662 ctx->lastindex = state->lastindex; \
664 #define LASTMARK_RESTORE() \
666 state->lastmark = ctx->lastmark; \
667 state->lastindex = ctx->lastindex; \
670 #define RETURN_ERROR(i) do { return i; } while(0)
671 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
672 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
674 #define RETURN_ON_ERROR(i) \
675 do { if (i < 0) RETURN_ERROR(i); } while (0)
676 #define RETURN_ON_SUCCESS(i) \
677 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
678 #define RETURN_ON_FAILURE(i) \
679 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
683 #define DATA_STACK_ALLOC(state, type, ptr) \
685 alloc_pos = state->data_stack_base; \
686 TRACE(("allocating %s in %d (%d)\n", \
687 SFY(type), alloc_pos, sizeof(type))); \
688 if (state->data_stack_size < alloc_pos+sizeof(type)) { \
689 int j = data_stack_grow(state, sizeof(type)); \
690 if (j < 0) return j; \
692 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
694 ptr = (type*)(state->data_stack+alloc_pos); \
695 state->data_stack_base += sizeof(type); \
698 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
700 TRACE(("looking up %s at %d\n", SFY(type), pos)); \
701 ptr = (type*)(state->data_stack+pos); \
704 #define DATA_STACK_PUSH(state, data, size) \
706 TRACE(("copy data in %p to %d (%d)\n", \
707 data, state->data_stack_base, size)); \
708 if (state->data_stack_size < state->data_stack_base+size) { \
709 int j = data_stack_grow(state, size); \
710 if (j < 0) return j; \
712 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
714 memcpy(state->data_stack+state->data_stack_base, data, size); \
715 state->data_stack_base += size; \
718 #define DATA_STACK_POP(state, data, size, discard) \
720 TRACE(("copy data to %p from %d (%d)\n", \
721 data, state->data_stack_base-size, size)); \
722 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
724 state->data_stack_base -= size; \
727 #define DATA_STACK_POP_DISCARD(state, size) \
729 TRACE(("discard data from %d (%d)\n", \
730 state->data_stack_base-size, size)); \
731 state->data_stack_base -= size; \
734 #define DATA_PUSH(x) \
735 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
736 #define DATA_POP(x) \
737 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
738 #define DATA_POP_DISCARD(x) \
739 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
740 #define DATA_ALLOC(t,p) \
741 DATA_STACK_ALLOC(state, t, p)
742 #define DATA_LOOKUP_AT(t,p,pos) \
743 DATA_STACK_LOOKUP_AT(state,t,p,pos)
745 #define MARK_PUSH(lastmark) \
746 do if (lastmark > 0) { \
747 i = lastmark; /* ctx->lastmark may change if reallocated */ \
748 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
750 #define MARK_POP(lastmark) \
751 do if (lastmark > 0) { \
752 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
754 #define MARK_POP_KEEP(lastmark) \
755 do if (lastmark > 0) { \
756 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
758 #define MARK_POP_DISCARD(lastmark) \
759 do if (lastmark > 0) { \
760 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
764 #define JUMP_MAX_UNTIL_1 1
765 #define JUMP_MAX_UNTIL_2 2
766 #define JUMP_MAX_UNTIL_3 3
767 #define JUMP_MIN_UNTIL_1 4
768 #define JUMP_MIN_UNTIL_2 5
769 #define JUMP_MIN_UNTIL_3 6
770 #define JUMP_REPEAT 7
771 #define JUMP_REPEAT_ONE_1 8
772 #define JUMP_REPEAT_ONE_2 9
773 #define JUMP_MIN_REPEAT_ONE 10
774 #define JUMP_BRANCH 11
775 #define JUMP_ASSERT 12
776 #define JUMP_ASSERT_NOT 13
778 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
779 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
780 nextctx->last_ctx_pos = ctx_pos; \
781 nextctx->jump = jumpvalue; \
782 nextctx->pattern = nextpattern; \
783 ctx_pos = alloc_pos; \
787 while (0) /* gcc doesn't like labels at end of scopes */ \
790 Py_ssize_t last_ctx_pos
;
796 Py_ssize_t lastindex
;
803 /* check if string matches the given pattern. returns <0 for
804 error, 0 for failure, and 1 for success */
806 SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
)
808 SRE_CHAR
* end
= (SRE_CHAR
*)state
->end
;
809 Py_ssize_t alloc_pos
, ctx_pos
= -1;
810 Py_ssize_t i
, ret
= 0;
813 SRE_MATCH_CONTEXT
* ctx
;
814 SRE_MATCH_CONTEXT
* nextctx
;
816 TRACE(("|%p|%p|ENTER\n", pattern
, state
->ptr
));
818 DATA_ALLOC(SRE_MATCH_CONTEXT
, ctx
);
819 ctx
->last_ctx_pos
= -1;
820 ctx
->jump
= JUMP_NONE
;
821 ctx
->pattern
= pattern
;
826 ctx
->ptr
= (SRE_CHAR
*)state
->ptr
;
828 if (ctx
->pattern
[0] == SRE_OP_INFO
) {
829 /* optimization info block */
830 /* <INFO> <1=skip> <2=flags> <3=min> ... */
831 if (ctx
->pattern
[3] && (end
- ctx
->ptr
) < ctx
->pattern
[3]) {
832 TRACE(("reject (got %d chars, need %d)\n",
833 (end
- ctx
->ptr
), ctx
->pattern
[3]));
836 ctx
->pattern
+= ctx
->pattern
[1] + 1;
841 switch (*ctx
->pattern
++) {
846 TRACE(("|%p|%p|MARK %d\n", ctx
->pattern
,
847 ctx
->ptr
, ctx
->pattern
[0]));
850 state
->lastindex
= i
/2 + 1;
851 if (i
> state
->lastmark
) {
852 /* state->lastmark is the highest valid index in the
853 state->mark array. If it is increased by more than 1,
854 the intervening marks must be set to NULL to signal
855 that these marks have not been encountered. */
856 Py_ssize_t j
= state
->lastmark
+ 1;
858 state
->mark
[j
++] = NULL
;
861 state
->mark
[i
] = ctx
->ptr
;
866 /* match literal string */
867 /* <LITERAL> <code> */
868 TRACE(("|%p|%p|LITERAL %d\n", ctx
->pattern
,
869 ctx
->ptr
, *ctx
->pattern
));
870 if (ctx
->ptr
>= end
|| (SRE_CODE
) ctx
->ptr
[0] != ctx
->pattern
[0])
876 case SRE_OP_NOT_LITERAL
:
877 /* match anything that is not literal character */
878 /* <NOT_LITERAL> <code> */
879 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx
->pattern
,
880 ctx
->ptr
, *ctx
->pattern
));
881 if (ctx
->ptr
>= end
|| (SRE_CODE
) ctx
->ptr
[0] == ctx
->pattern
[0])
889 TRACE(("|%p|%p|SUCCESS\n", ctx
->pattern
, ctx
->ptr
));
890 state
->ptr
= ctx
->ptr
;
894 /* match at given position */
896 TRACE(("|%p|%p|AT %d\n", ctx
->pattern
, ctx
->ptr
, *ctx
->pattern
));
897 if (!SRE_AT(state
, ctx
->ptr
, *ctx
->pattern
))
902 case SRE_OP_CATEGORY
:
903 /* match at given category */
904 /* <CATEGORY> <code> */
905 TRACE(("|%p|%p|CATEGORY %d\n", ctx
->pattern
,
906 ctx
->ptr
, *ctx
->pattern
));
907 if (ctx
->ptr
>= end
|| !sre_category(ctx
->pattern
[0], ctx
->ptr
[0]))
914 /* match anything (except a newline) */
916 TRACE(("|%p|%p|ANY\n", ctx
->pattern
, ctx
->ptr
));
917 if (ctx
->ptr
>= end
|| SRE_IS_LINEBREAK(ctx
->ptr
[0]))
925 TRACE(("|%p|%p|ANY_ALL\n", ctx
->pattern
, ctx
->ptr
));
932 /* match set member (or non_member) */
933 /* <IN> <skip> <set> */
934 TRACE(("|%p|%p|IN\n", ctx
->pattern
, ctx
->ptr
));
935 if (ctx
->ptr
>= end
|| !SRE_CHARSET(ctx
->pattern
+ 1, *ctx
->ptr
))
937 ctx
->pattern
+= ctx
->pattern
[0];
941 case SRE_OP_LITERAL_IGNORE
:
942 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
943 ctx
->pattern
, ctx
->ptr
, ctx
->pattern
[0]));
944 if (ctx
->ptr
>= end
||
945 state
->lower(*ctx
->ptr
) != state
->lower(*ctx
->pattern
))
951 case SRE_OP_NOT_LITERAL_IGNORE
:
952 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
953 ctx
->pattern
, ctx
->ptr
, *ctx
->pattern
));
954 if (ctx
->ptr
>= end
||
955 state
->lower(*ctx
->ptr
) == state
->lower(*ctx
->pattern
))
961 case SRE_OP_IN_IGNORE
:
962 TRACE(("|%p|%p|IN_IGNORE\n", ctx
->pattern
, ctx
->ptr
));
964 || !SRE_CHARSET(ctx
->pattern
+1,
965 (SRE_CODE
)state
->lower(*ctx
->ptr
)))
967 ctx
->pattern
+= ctx
->pattern
[0];
974 /* <JUMP> <offset> */
975 TRACE(("|%p|%p|JUMP %d\n", ctx
->pattern
,
976 ctx
->ptr
, ctx
->pattern
[0]));
977 ctx
->pattern
+= ctx
->pattern
[0];
982 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
983 TRACE(("|%p|%p|BRANCH\n", ctx
->pattern
, ctx
->ptr
));
985 ctx
->u
.rep
= state
->repeat
;
987 MARK_PUSH(ctx
->lastmark
);
988 for (; ctx
->pattern
[0]; ctx
->pattern
+= ctx
->pattern
[0]) {
989 if (ctx
->pattern
[1] == SRE_OP_LITERAL
&&
991 (SRE_CODE
) *ctx
->ptr
!= ctx
->pattern
[2]))
993 if (ctx
->pattern
[1] == SRE_OP_IN
&&
995 !SRE_CHARSET(ctx
->pattern
+ 3, (SRE_CODE
) *ctx
->ptr
)))
997 state
->ptr
= ctx
->ptr
;
998 DO_JUMP(JUMP_BRANCH
, jump_branch
, ctx
->pattern
+1);
1001 MARK_POP_DISCARD(ctx
->lastmark
);
1002 RETURN_ON_ERROR(ret
);
1006 MARK_POP_KEEP(ctx
->lastmark
);
1010 MARK_POP_DISCARD(ctx
->lastmark
);
1013 case SRE_OP_REPEAT_ONE
:
1014 /* match repeated sequence (maximizing regexp) */
1016 /* this operator only works if the repeated item is
1017 exactly one character wide, and we're not already
1018 collecting backtracking points. for other cases,
1019 use the MAX_REPEAT operator */
1021 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1023 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx
->pattern
, ctx
->ptr
,
1024 ctx
->pattern
[1], ctx
->pattern
[2]));
1026 if (ctx
->ptr
+ ctx
->pattern
[1] > end
)
1027 RETURN_FAILURE
; /* cannot match */
1029 state
->ptr
= ctx
->ptr
;
1031 ret
= SRE_COUNT(state
, ctx
->pattern
+3, ctx
->pattern
[2]);
1032 RETURN_ON_ERROR(ret
);
1033 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1035 ctx
->ptr
+= ctx
->count
;
1037 /* when we arrive here, count contains the number of
1038 matches, and ctx->ptr points to the tail of the target
1039 string. check if the rest of the pattern matches,
1040 and backtrack if not. */
1042 if (ctx
->count
< (Py_ssize_t
) ctx
->pattern
[1])
1045 if (ctx
->pattern
[ctx
->pattern
[0]] == SRE_OP_SUCCESS
) {
1046 /* tail is empty. we're finished */
1047 state
->ptr
= ctx
->ptr
;
1053 if (ctx
->pattern
[ctx
->pattern
[0]] == SRE_OP_LITERAL
) {
1054 /* tail starts with a literal. skip positions where
1055 the rest of the pattern cannot possibly match */
1056 ctx
->u
.chr
= ctx
->pattern
[ctx
->pattern
[0]+1];
1058 while (ctx
->count
>= (Py_ssize_t
) ctx
->pattern
[1] &&
1059 (ctx
->ptr
>= end
|| *ctx
->ptr
!= ctx
->u
.chr
)) {
1063 if (ctx
->count
< (Py_ssize_t
) ctx
->pattern
[1])
1065 state
->ptr
= ctx
->ptr
;
1066 DO_JUMP(JUMP_REPEAT_ONE_1
, jump_repeat_one_1
,
1067 ctx
->pattern
+ctx
->pattern
[0]);
1069 RETURN_ON_ERROR(ret
);
1081 while (ctx
->count
>= (Py_ssize_t
) ctx
->pattern
[1]) {
1082 state
->ptr
= ctx
->ptr
;
1083 DO_JUMP(JUMP_REPEAT_ONE_2
, jump_repeat_one_2
,
1084 ctx
->pattern
+ctx
->pattern
[0]);
1086 RETURN_ON_ERROR(ret
);
1096 case SRE_OP_MIN_REPEAT_ONE
:
1097 /* match repeated sequence (minimizing regexp) */
1099 /* this operator only works if the repeated item is
1100 exactly one character wide, and we're not already
1101 collecting backtracking points. for other cases,
1102 use the MIN_REPEAT operator */
1104 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1106 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx
->pattern
, ctx
->ptr
,
1107 ctx
->pattern
[1], ctx
->pattern
[2]));
1109 if (ctx
->ptr
+ ctx
->pattern
[1] > end
)
1110 RETURN_FAILURE
; /* cannot match */
1112 state
->ptr
= ctx
->ptr
;
1114 if (ctx
->pattern
[1] == 0)
1117 /* count using pattern min as the maximum */
1118 ret
= SRE_COUNT(state
, ctx
->pattern
+3, ctx
->pattern
[1]);
1119 RETURN_ON_ERROR(ret
);
1120 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1121 if (ret
< (Py_ssize_t
) ctx
->pattern
[1])
1122 /* didn't match minimum number of times */
1124 /* advance past minimum matches of repeat */
1126 ctx
->ptr
+= ctx
->count
;
1129 if (ctx
->pattern
[ctx
->pattern
[0]] == SRE_OP_SUCCESS
) {
1130 /* tail is empty. we're finished */
1131 state
->ptr
= ctx
->ptr
;
1137 while ((Py_ssize_t
)ctx
->pattern
[2] == 65535
1138 || ctx
->count
<= (Py_ssize_t
)ctx
->pattern
[2]) {
1139 state
->ptr
= ctx
->ptr
;
1140 DO_JUMP(JUMP_MIN_REPEAT_ONE
,jump_min_repeat_one
,
1141 ctx
->pattern
+ctx
->pattern
[0]);
1143 RETURN_ON_ERROR(ret
);
1146 state
->ptr
= ctx
->ptr
;
1147 ret
= SRE_COUNT(state
, ctx
->pattern
+3, 1);
1148 RETURN_ON_ERROR(ret
);
1149 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1161 /* create repeat context. all the hard work is done
1162 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1163 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1164 TRACE(("|%p|%p|REPEAT %d %d\n", ctx
->pattern
, ctx
->ptr
,
1165 ctx
->pattern
[1], ctx
->pattern
[2]));
1167 /* install new repeat context */
1168 ctx
->u
.rep
= (SRE_REPEAT
*) PyObject_MALLOC(sizeof(*ctx
->u
.rep
));
1173 ctx
->u
.rep
->count
= -1;
1174 ctx
->u
.rep
->pattern
= ctx
->pattern
;
1175 ctx
->u
.rep
->prev
= state
->repeat
;
1176 ctx
->u
.rep
->last_ptr
= NULL
;
1177 state
->repeat
= ctx
->u
.rep
;
1179 state
->ptr
= ctx
->ptr
;
1180 DO_JUMP(JUMP_REPEAT
, jump_repeat
, ctx
->pattern
+ctx
->pattern
[0]);
1181 state
->repeat
= ctx
->u
.rep
->prev
;
1182 PyObject_FREE(ctx
->u
.rep
);
1185 RETURN_ON_ERROR(ret
);
1190 case SRE_OP_MAX_UNTIL
:
1191 /* maximizing repeat */
1192 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1194 /* FIXME: we probably need to deal with zero-width
1195 matches in here... */
1197 ctx
->u
.rep
= state
->repeat
;
1199 RETURN_ERROR(SRE_ERROR_STATE
);
1201 state
->ptr
= ctx
->ptr
;
1203 ctx
->count
= ctx
->u
.rep
->count
+1;
1205 TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx
->pattern
,
1206 ctx
->ptr
, ctx
->count
));
1208 if (ctx
->count
< ctx
->u
.rep
->pattern
[1]) {
1209 /* not enough matches */
1210 ctx
->u
.rep
->count
= ctx
->count
;
1211 DO_JUMP(JUMP_MAX_UNTIL_1
, jump_max_until_1
,
1212 ctx
->u
.rep
->pattern
+3);
1214 RETURN_ON_ERROR(ret
);
1217 ctx
->u
.rep
->count
= ctx
->count
-1;
1218 state
->ptr
= ctx
->ptr
;
1222 if ((ctx
->count
< ctx
->u
.rep
->pattern
[2] ||
1223 ctx
->u
.rep
->pattern
[2] == 65535) &&
1224 state
->ptr
!= ctx
->u
.rep
->last_ptr
) {
1225 /* we may have enough matches, but if we can
1226 match another item, do so */
1227 ctx
->u
.rep
->count
= ctx
->count
;
1229 MARK_PUSH(ctx
->lastmark
);
1230 /* zero-width match protection */
1231 DATA_PUSH(&ctx
->u
.rep
->last_ptr
);
1232 ctx
->u
.rep
->last_ptr
= state
->ptr
;
1233 DO_JUMP(JUMP_MAX_UNTIL_2
, jump_max_until_2
,
1234 ctx
->u
.rep
->pattern
+3);
1235 DATA_POP(&ctx
->u
.rep
->last_ptr
);
1237 MARK_POP_DISCARD(ctx
->lastmark
);
1238 RETURN_ON_ERROR(ret
);
1241 MARK_POP(ctx
->lastmark
);
1243 ctx
->u
.rep
->count
= ctx
->count
-1;
1244 state
->ptr
= ctx
->ptr
;
1247 /* cannot match more repeated items here. make sure the
1249 state
->repeat
= ctx
->u
.rep
->prev
;
1250 DO_JUMP(JUMP_MAX_UNTIL_3
, jump_max_until_3
, ctx
->pattern
);
1251 RETURN_ON_SUCCESS(ret
);
1252 state
->repeat
= ctx
->u
.rep
;
1253 state
->ptr
= ctx
->ptr
;
1256 case SRE_OP_MIN_UNTIL
:
1257 /* minimizing repeat */
1258 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1260 ctx
->u
.rep
= state
->repeat
;
1262 RETURN_ERROR(SRE_ERROR_STATE
);
1264 state
->ptr
= ctx
->ptr
;
1266 ctx
->count
= ctx
->u
.rep
->count
+1;
1268 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx
->pattern
,
1269 ctx
->ptr
, ctx
->count
, ctx
->u
.rep
->pattern
));
1271 if (ctx
->count
< ctx
->u
.rep
->pattern
[1]) {
1272 /* not enough matches */
1273 ctx
->u
.rep
->count
= ctx
->count
;
1274 DO_JUMP(JUMP_MIN_UNTIL_1
, jump_min_until_1
,
1275 ctx
->u
.rep
->pattern
+3);
1277 RETURN_ON_ERROR(ret
);
1280 ctx
->u
.rep
->count
= ctx
->count
-1;
1281 state
->ptr
= ctx
->ptr
;
1287 /* see if the tail matches */
1288 state
->repeat
= ctx
->u
.rep
->prev
;
1289 DO_JUMP(JUMP_MIN_UNTIL_2
, jump_min_until_2
, ctx
->pattern
);
1291 RETURN_ON_ERROR(ret
);
1295 state
->repeat
= ctx
->u
.rep
;
1296 state
->ptr
= ctx
->ptr
;
1300 if (ctx
->count
>= ctx
->u
.rep
->pattern
[2]
1301 && ctx
->u
.rep
->pattern
[2] != 65535)
1304 ctx
->u
.rep
->count
= ctx
->count
;
1305 DO_JUMP(JUMP_MIN_UNTIL_3
,jump_min_until_3
,
1306 ctx
->u
.rep
->pattern
+3);
1308 RETURN_ON_ERROR(ret
);
1311 ctx
->u
.rep
->count
= ctx
->count
-1;
1312 state
->ptr
= ctx
->ptr
;
1315 case SRE_OP_GROUPREF
:
1316 /* match backreference */
1317 TRACE(("|%p|%p|GROUPREF %d\n", ctx
->pattern
,
1318 ctx
->ptr
, ctx
->pattern
[0]));
1319 i
= ctx
->pattern
[0];
1321 Py_ssize_t groupref
= i
+i
;
1322 if (groupref
>= state
->lastmark
) {
1325 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[groupref
];
1326 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[groupref
+1];
1327 if (!p
|| !e
|| e
< p
)
1330 if (ctx
->ptr
>= end
|| *ctx
->ptr
!= *p
)
1339 case SRE_OP_GROUPREF_IGNORE
:
1340 /* match backreference */
1341 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx
->pattern
,
1342 ctx
->ptr
, ctx
->pattern
[0]));
1343 i
= ctx
->pattern
[0];
1345 Py_ssize_t groupref
= i
+i
;
1346 if (groupref
>= state
->lastmark
) {
1349 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[groupref
];
1350 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[groupref
+1];
1351 if (!p
|| !e
|| e
< p
)
1354 if (ctx
->ptr
>= end
||
1355 state
->lower(*ctx
->ptr
) != state
->lower(*p
))
1364 case SRE_OP_GROUPREF_EXISTS
:
1365 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx
->pattern
,
1366 ctx
->ptr
, ctx
->pattern
[0]));
1367 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1368 i
= ctx
->pattern
[0];
1370 Py_ssize_t groupref
= i
+i
;
1371 if (groupref
>= state
->lastmark
) {
1372 ctx
->pattern
+= ctx
->pattern
[1];
1375 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[groupref
];
1376 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[groupref
+1];
1377 if (!p
|| !e
|| e
< p
) {
1378 ctx
->pattern
+= ctx
->pattern
[1];
1387 /* assert subpattern */
1388 /* <ASSERT> <skip> <back> <pattern> */
1389 TRACE(("|%p|%p|ASSERT %d\n", ctx
->pattern
,
1390 ctx
->ptr
, ctx
->pattern
[1]));
1391 state
->ptr
= ctx
->ptr
- ctx
->pattern
[1];
1392 if (state
->ptr
< state
->beginning
)
1394 DO_JUMP(JUMP_ASSERT
, jump_assert
, ctx
->pattern
+2);
1395 RETURN_ON_FAILURE(ret
);
1396 ctx
->pattern
+= ctx
->pattern
[0];
1399 case SRE_OP_ASSERT_NOT
:
1400 /* assert not subpattern */
1401 /* <ASSERT_NOT> <skip> <back> <pattern> */
1402 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx
->pattern
,
1403 ctx
->ptr
, ctx
->pattern
[1]));
1404 state
->ptr
= ctx
->ptr
- ctx
->pattern
[1];
1405 if (state
->ptr
>= state
->beginning
) {
1406 DO_JUMP(JUMP_ASSERT_NOT
, jump_assert_not
, ctx
->pattern
+2);
1408 RETURN_ON_ERROR(ret
);
1412 ctx
->pattern
+= ctx
->pattern
[0];
1415 case SRE_OP_FAILURE
:
1416 /* immediate failure */
1417 TRACE(("|%p|%p|FAILURE\n", ctx
->pattern
, ctx
->ptr
));
1421 TRACE(("|%p|%p|UNKNOWN %d\n", ctx
->pattern
, ctx
->ptr
,
1423 RETURN_ERROR(SRE_ERROR_ILLEGAL
);
1428 ctx_pos
= ctx
->last_ctx_pos
;
1430 DATA_POP_DISCARD(ctx
);
1433 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1436 case JUMP_MAX_UNTIL_2
:
1437 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx
->pattern
, ctx
->ptr
));
1438 goto jump_max_until_2
;
1439 case JUMP_MAX_UNTIL_3
:
1440 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx
->pattern
, ctx
->ptr
));
1441 goto jump_max_until_3
;
1442 case JUMP_MIN_UNTIL_2
:
1443 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx
->pattern
, ctx
->ptr
));
1444 goto jump_min_until_2
;
1445 case JUMP_MIN_UNTIL_3
:
1446 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx
->pattern
, ctx
->ptr
));
1447 goto jump_min_until_3
;
1449 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx
->pattern
, ctx
->ptr
));
1451 case JUMP_MAX_UNTIL_1
:
1452 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx
->pattern
, ctx
->ptr
));
1453 goto jump_max_until_1
;
1454 case JUMP_MIN_UNTIL_1
:
1455 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx
->pattern
, ctx
->ptr
));
1456 goto jump_min_until_1
;
1458 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx
->pattern
, ctx
->ptr
));
1460 case JUMP_REPEAT_ONE_1
:
1461 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx
->pattern
, ctx
->ptr
));
1462 goto jump_repeat_one_1
;
1463 case JUMP_REPEAT_ONE_2
:
1464 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx
->pattern
, ctx
->ptr
));
1465 goto jump_repeat_one_2
;
1466 case JUMP_MIN_REPEAT_ONE
:
1467 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx
->pattern
, ctx
->ptr
));
1468 goto jump_min_repeat_one
;
1470 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx
->pattern
, ctx
->ptr
));
1472 case JUMP_ASSERT_NOT
:
1473 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx
->pattern
, ctx
->ptr
));
1474 goto jump_assert_not
;
1476 TRACE(("|%p|%p|RETURN %d\n", ctx
->pattern
, ctx
->ptr
, ret
));
1480 return ret
; /* should never get here */
1484 SRE_SEARCH(SRE_STATE
* state
, SRE_CODE
* pattern
)
1486 SRE_CHAR
* ptr
= (SRE_CHAR
*)state
->start
;
1487 SRE_CHAR
* end
= (SRE_CHAR
*)state
->end
;
1488 Py_ssize_t status
= 0;
1489 Py_ssize_t prefix_len
= 0;
1490 Py_ssize_t prefix_skip
= 0;
1491 SRE_CODE
* prefix
= NULL
;
1492 SRE_CODE
* charset
= NULL
;
1493 SRE_CODE
* overlap
= NULL
;
1496 if (pattern
[0] == SRE_OP_INFO
) {
1497 /* optimization info block */
1498 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1502 if (pattern
[3] > 1) {
1503 /* adjust end point (but make sure we leave at least one
1504 character in there, so literal search will work) */
1505 end
-= pattern
[3]-1;
1510 if (flags
& SRE_INFO_PREFIX
) {
1511 /* pattern starts with a known prefix */
1512 /* <length> <skip> <prefix data> <overlap data> */
1513 prefix_len
= pattern
[5];
1514 prefix_skip
= pattern
[6];
1515 prefix
= pattern
+ 7;
1516 overlap
= prefix
+ prefix_len
- 1;
1517 } else if (flags
& SRE_INFO_CHARSET
)
1518 /* pattern starts with a character from a known set */
1520 charset
= pattern
+ 5;
1522 pattern
+= 1 + pattern
[1];
1525 TRACE(("prefix = %p %d %d\n", prefix
, prefix_len
, prefix_skip
));
1526 TRACE(("charset = %p\n", charset
));
1528 #if defined(USE_FAST_SEARCH)
1529 if (prefix_len
> 1) {
1530 /* pattern starts with a known prefix. use the overlap
1531 table to skip forward as fast as we possibly can */
1533 end
= (SRE_CHAR
*)state
->end
;
1536 if ((SRE_CODE
) ptr
[0] != prefix
[i
]) {
1542 if (++i
== prefix_len
) {
1543 /* found a potential match */
1544 TRACE(("|%p|%p|SEARCH SCAN\n", pattern
, ptr
));
1545 state
->start
= ptr
+ 1 - prefix_len
;
1546 state
->ptr
= ptr
+ 1 - prefix_len
+ prefix_skip
;
1547 if (flags
& SRE_INFO_LITERAL
)
1548 return 1; /* we got all of it */
1549 status
= SRE_MATCH(state
, pattern
+ 2*prefix_skip
);
1552 /* close but no cigar -- try again */
1564 if (pattern
[0] == SRE_OP_LITERAL
) {
1565 /* pattern starts with a literal character. this is used
1566 for short prefixes, and if fast search is disabled */
1567 SRE_CODE chr
= pattern
[1];
1568 end
= (SRE_CHAR
*)state
->end
;
1570 while (ptr
< end
&& (SRE_CODE
) ptr
[0] != chr
)
1574 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern
, ptr
));
1577 if (flags
& SRE_INFO_LITERAL
)
1578 return 1; /* we got all of it */
1579 status
= SRE_MATCH(state
, pattern
+ 2);
1583 } else if (charset
) {
1584 /* pattern starts with a character from a known set */
1585 end
= (SRE_CHAR
*)state
->end
;
1587 while (ptr
< end
&& !SRE_CHARSET(charset
, ptr
[0]))
1591 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern
, ptr
));
1594 status
= SRE_MATCH(state
, pattern
);
1601 while (ptr
<= end
) {
1602 TRACE(("|%p|%p|SEARCH\n", pattern
, ptr
));
1603 state
->start
= state
->ptr
= ptr
++;
1604 status
= SRE_MATCH(state
, pattern
);
1613 SRE_LITERAL_TEMPLATE(SRE_CHAR
* ptr
, Py_ssize_t len
)
1615 /* check if given string is a literal template (i.e. no escapes) */
1622 #if !defined(SRE_RECURSIVE)
1624 /* -------------------------------------------------------------------- */
1625 /* factories and destructors */
1627 /* see sre.h for object declarations */
1628 static PyObject
*pattern_new_match(PatternObject
*, SRE_STATE
*, int);
1629 static PyObject
*pattern_scanner(PatternObject
*, PyObject
*);
1632 sre_codesize(PyObject
* self
, PyObject
*unused
)
1634 return Py_BuildValue("l", sizeof(SRE_CODE
));
1638 sre_getlower(PyObject
* self
, PyObject
* args
)
1640 int character
, flags
;
1641 if (!PyArg_ParseTuple(args
, "ii", &character
, &flags
))
1643 if (flags
& SRE_FLAG_LOCALE
)
1644 return Py_BuildValue("i", sre_lower_locale(character
));
1645 if (flags
& SRE_FLAG_UNICODE
)
1646 #if defined(HAVE_UNICODE)
1647 return Py_BuildValue("i", sre_lower_unicode(character
));
1649 return Py_BuildValue("i", sre_lower_locale(character
));
1651 return Py_BuildValue("i", sre_lower(character
));
1655 state_reset(SRE_STATE
* state
)
1657 /* FIXME: dynamic! */
1658 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
1660 state
->lastmark
= -1;
1661 state
->lastindex
= -1;
1663 state
->repeat
= NULL
;
1665 data_stack_dealloc(state
);
1669 getstring(PyObject
* string
, Py_ssize_t
* p_length
, int* p_charsize
)
1671 /* given a python object, return a data pointer, a length (in
1672 characters), and a character size. return NULL if the object
1673 is not a string (or not compatible) */
1675 PyBufferProcs
*buffer
;
1676 Py_ssize_t size
, bytes
;
1680 #if defined(HAVE_UNICODE)
1681 if (PyUnicode_Check(string
)) {
1682 /* unicode strings doesn't always support the buffer interface */
1683 ptr
= (void*) PyUnicode_AS_DATA(string
);
1684 bytes
= PyUnicode_GET_DATA_SIZE(string
);
1685 size
= PyUnicode_GET_SIZE(string
);
1686 charsize
= sizeof(Py_UNICODE
);
1691 /* get pointer to string buffer */
1692 buffer
= string
->ob_type
->tp_as_buffer
;
1693 if (!buffer
|| !buffer
->bf_getreadbuffer
|| !buffer
->bf_getsegcount
||
1694 buffer
->bf_getsegcount(string
, NULL
) != 1) {
1695 PyErr_SetString(PyExc_TypeError
, "expected string or buffer");
1699 /* determine buffer size */
1700 bytes
= buffer
->bf_getreadbuffer(string
, 0, &ptr
);
1702 PyErr_SetString(PyExc_TypeError
, "buffer has negative size");
1706 /* determine character size */
1707 #if PY_VERSION_HEX >= 0x01060000
1708 size
= PyObject_Size(string
);
1710 size
= PyObject_Length(string
);
1713 if (PyString_Check(string
) || bytes
== size
)
1715 #if defined(HAVE_UNICODE)
1716 else if (bytes
== (Py_ssize_t
) (size
* sizeof(Py_UNICODE
)))
1717 charsize
= sizeof(Py_UNICODE
);
1720 PyErr_SetString(PyExc_TypeError
, "buffer size mismatch");
1724 #if defined(HAVE_UNICODE)
1729 *p_charsize
= charsize
;
1735 state_init(SRE_STATE
* state
, PatternObject
* pattern
, PyObject
* string
,
1736 Py_ssize_t start
, Py_ssize_t end
)
1738 /* prepare state object */
1744 memset(state
, 0, sizeof(SRE_STATE
));
1746 state
->lastmark
= -1;
1747 state
->lastindex
= -1;
1749 ptr
= getstring(string
, &length
, &charsize
);
1753 /* adjust boundaries */
1756 else if (start
> length
)
1761 else if (end
> length
)
1764 state
->charsize
= charsize
;
1766 state
->beginning
= ptr
;
1768 state
->start
= (void*) ((char*) ptr
+ start
* state
->charsize
);
1769 state
->end
= (void*) ((char*) ptr
+ end
* state
->charsize
);
1772 state
->string
= string
;
1774 state
->endpos
= end
;
1776 if (pattern
->flags
& SRE_FLAG_LOCALE
)
1777 state
->lower
= sre_lower_locale
;
1778 else if (pattern
->flags
& SRE_FLAG_UNICODE
)
1779 #if defined(HAVE_UNICODE)
1780 state
->lower
= sre_lower_unicode
;
1782 state
->lower
= sre_lower_locale
;
1785 state
->lower
= sre_lower
;
1791 state_fini(SRE_STATE
* state
)
1793 Py_XDECREF(state
->string
);
1794 data_stack_dealloc(state
);
1797 /* calculate offset from start of string */
1798 #define STATE_OFFSET(state, member)\
1799 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1802 state_getslice(SRE_STATE
* state
, Py_ssize_t index
, PyObject
* string
, int empty
)
1806 index
= (index
- 1) * 2;
1808 if (string
== Py_None
|| index
>= state
->lastmark
|| !state
->mark
[index
] || !state
->mark
[index
+1]) {
1810 /* want empty string */
1817 i
= STATE_OFFSET(state
, state
->mark
[index
]);
1818 j
= STATE_OFFSET(state
, state
->mark
[index
+1]);
1821 return PySequence_GetSlice(string
, i
, j
);
1825 pattern_error(int status
)
1828 case SRE_ERROR_RECURSION_LIMIT
:
1831 "maximum recursion limit exceeded"
1834 case SRE_ERROR_MEMORY
:
1838 /* other error codes indicate compiler/engine bugs */
1841 "internal error in regular expression engine"
1847 pattern_dealloc(PatternObject
* self
)
1849 if (self
->weakreflist
!= NULL
)
1850 PyObject_ClearWeakRefs((PyObject
*) self
);
1851 Py_XDECREF(self
->pattern
);
1852 Py_XDECREF(self
->groupindex
);
1853 Py_XDECREF(self
->indexgroup
);
1858 pattern_match(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1864 Py_ssize_t start
= 0;
1865 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1866 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1867 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|nn:match", kwlist
,
1868 &string
, &start
, &end
))
1871 string
= state_init(&state
, self
, string
, start
, end
);
1875 state
.ptr
= state
.start
;
1877 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self
), state
.ptr
));
1879 if (state
.charsize
== 1) {
1880 status
= sre_match(&state
, PatternObject_GetCode(self
));
1882 #if defined(HAVE_UNICODE)
1883 status
= sre_umatch(&state
, PatternObject_GetCode(self
));
1887 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1888 if (PyErr_Occurred())
1893 return pattern_new_match(self
, &state
, status
);
1897 pattern_search(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1903 Py_ssize_t start
= 0;
1904 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1905 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1906 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|nn:search", kwlist
,
1907 &string
, &start
, &end
))
1910 string
= state_init(&state
, self
, string
, start
, end
);
1914 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self
), state
.ptr
));
1916 if (state
.charsize
== 1) {
1917 status
= sre_search(&state
, PatternObject_GetCode(self
));
1919 #if defined(HAVE_UNICODE)
1920 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
1924 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1928 if (PyErr_Occurred())
1931 return pattern_new_match(self
, &state
, status
);
1935 call(char* module
, char* function
, PyObject
* args
)
1944 name
= PyString_FromString(module
);
1947 mod
= PyImport_Import(name
);
1951 func
= PyObject_GetAttrString(mod
, function
);
1955 result
= PyObject_CallObject(func
, args
);
1961 #ifdef USE_BUILTIN_COPY
1963 deepcopy(PyObject
** object
, PyObject
* memo
)
1969 PyTuple_Pack(2, *object
, memo
)
1977 return 1; /* success */
1982 join_list(PyObject
* list
, PyObject
* pattern
)
1984 /* join list elements */
1987 #if PY_VERSION_HEX >= 0x01060000
1993 switch (PyList_GET_SIZE(list
)) {
1996 return PySequence_GetSlice(pattern
, 0, 0);
1998 result
= PyList_GET_ITEM(list
, 0);
2004 /* two or more elements: slice out a suitable separator from the
2005 first member, and use that to join the entire list */
2007 joiner
= PySequence_GetSlice(pattern
, 0, 0);
2011 #if PY_VERSION_HEX >= 0x01060000
2012 function
= PyObject_GetAttrString(joiner
, "join");
2017 args
= PyTuple_New(1);
2019 Py_DECREF(function
);
2023 PyTuple_SET_ITEM(args
, 0, list
);
2024 result
= PyObject_CallObject(function
, args
);
2025 Py_DECREF(args
); /* also removes list */
2026 Py_DECREF(function
);
2030 PyTuple_Pack(2, list
, joiner
)
2039 pattern_findall(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2047 Py_ssize_t start
= 0;
2048 Py_ssize_t end
= PY_SSIZE_T_MAX
;
2049 static char* kwlist
[] = { "source", "pos", "endpos", NULL
};
2050 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|nn:findall", kwlist
,
2051 &string
, &start
, &end
))
2054 string
= state_init(&state
, self
, string
, start
, end
);
2058 list
= PyList_New(0);
2064 while (state
.start
<= state
.end
) {
2068 state_reset(&state
);
2070 state
.ptr
= state
.start
;
2072 if (state
.charsize
== 1) {
2073 status
= sre_search(&state
, PatternObject_GetCode(self
));
2075 #if defined(HAVE_UNICODE)
2076 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2080 if (PyErr_Occurred())
2086 pattern_error(status
);
2090 /* don't bother to build a match object */
2091 switch (self
->groups
) {
2093 b
= STATE_OFFSET(&state
, state
.start
);
2094 e
= STATE_OFFSET(&state
, state
.ptr
);
2095 item
= PySequence_GetSlice(string
, b
, e
);
2100 item
= state_getslice(&state
, 1, string
, 1);
2105 item
= PyTuple_New(self
->groups
);
2108 for (i
= 0; i
< self
->groups
; i
++) {
2109 PyObject
* o
= state_getslice(&state
, i
+1, string
, 1);
2114 PyTuple_SET_ITEM(item
, i
, o
);
2119 status
= PyList_Append(list
, item
);
2124 if (state
.ptr
== state
.start
)
2125 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2127 state
.start
= state
.ptr
;
2141 #if PY_VERSION_HEX >= 0x02020000
2143 pattern_finditer(PatternObject
* pattern
, PyObject
* args
)
2149 scanner
= pattern_scanner(pattern
, args
);
2153 search
= PyObject_GetAttrString(scanner
, "search");
2158 iterator
= PyCallIter_New(search
, Py_None
);
2166 pattern_split(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2177 Py_ssize_t maxsplit
= 0;
2178 static char* kwlist
[] = { "source", "maxsplit", NULL
};
2179 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|n:split", kwlist
,
2180 &string
, &maxsplit
))
2183 string
= state_init(&state
, self
, string
, 0, PY_SSIZE_T_MAX
);
2187 list
= PyList_New(0);
2196 while (!maxsplit
|| n
< maxsplit
) {
2198 state_reset(&state
);
2200 state
.ptr
= state
.start
;
2202 if (state
.charsize
== 1) {
2203 status
= sre_search(&state
, PatternObject_GetCode(self
));
2205 #if defined(HAVE_UNICODE)
2206 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2210 if (PyErr_Occurred())
2216 pattern_error(status
);
2220 if (state
.start
== state
.ptr
) {
2221 if (last
== state
.end
)
2223 /* skip one character */
2224 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2228 /* get segment before this match */
2229 item
= PySequence_GetSlice(
2230 string
, STATE_OFFSET(&state
, last
),
2231 STATE_OFFSET(&state
, state
.start
)
2235 status
= PyList_Append(list
, item
);
2240 /* add groups (if any) */
2241 for (i
= 0; i
< self
->groups
; i
++) {
2242 item
= state_getslice(&state
, i
+1, string
, 0);
2245 status
= PyList_Append(list
, item
);
2253 last
= state
.start
= state
.ptr
;
2257 /* get segment following last match (even if empty) */
2258 item
= PySequence_GetSlice(
2259 string
, STATE_OFFSET(&state
, last
), state
.endpos
2263 status
= PyList_Append(list
, item
);
2279 pattern_subx(PatternObject
* self
, PyObject
* ptemplate
, PyObject
* string
,
2280 Py_ssize_t count
, Py_ssize_t subn
)
2293 int filter_is_callable
;
2295 if (PyCallable_Check(ptemplate
)) {
2296 /* sub/subn takes either a function or a template */
2299 filter_is_callable
= 1;
2301 /* if not callable, check if it's a literal string */
2303 ptr
= getstring(ptemplate
, &n
, &bint
);
2307 literal
= sre_literal_template((unsigned char *)ptr
, n
);
2309 #if defined(HAVE_UNICODE)
2310 literal
= sre_uliteral_template((Py_UNICODE
*)ptr
, n
);
2320 filter_is_callable
= 0;
2322 /* not a literal; hand it over to the template compiler */
2324 SRE_PY_MODULE
, "_subx",
2325 PyTuple_Pack(2, self
, ptemplate
)
2329 filter_is_callable
= PyCallable_Check(filter
);
2333 string
= state_init(&state
, self
, string
, 0, PY_SSIZE_T_MAX
);
2339 list
= PyList_New(0);
2348 while (!count
|| n
< count
) {
2350 state_reset(&state
);
2352 state
.ptr
= state
.start
;
2354 if (state
.charsize
== 1) {
2355 status
= sre_search(&state
, PatternObject_GetCode(self
));
2357 #if defined(HAVE_UNICODE)
2358 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2362 if (PyErr_Occurred())
2368 pattern_error(status
);
2372 b
= STATE_OFFSET(&state
, state
.start
);
2373 e
= STATE_OFFSET(&state
, state
.ptr
);
2376 /* get segment before this match */
2377 item
= PySequence_GetSlice(string
, i
, b
);
2380 status
= PyList_Append(list
, item
);
2385 } else if (i
== b
&& i
== e
&& n
> 0)
2386 /* ignore empty match on latest position */
2389 if (filter_is_callable
) {
2390 /* pass match object through filter */
2391 match
= pattern_new_match(self
, &state
, 1);
2394 args
= PyTuple_Pack(1, match
);
2399 item
= PyObject_CallObject(filter
, args
);
2405 /* filter is literal string */
2411 if (item
!= Py_None
) {
2412 status
= PyList_Append(list
, item
);
2423 if (state
.ptr
== state
.start
)
2424 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2426 state
.start
= state
.ptr
;
2430 /* get segment following last match */
2431 if (i
< state
.endpos
) {
2432 item
= PySequence_GetSlice(string
, i
, state
.endpos
);
2435 status
= PyList_Append(list
, item
);
2445 /* convert list to single string (also removes list) */
2446 item
= join_list(list
, self
->pattern
);
2452 return Py_BuildValue("Ni", item
, n
);
2465 pattern_sub(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2467 PyObject
* ptemplate
;
2469 Py_ssize_t count
= 0;
2470 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2471 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|n:sub", kwlist
,
2472 &ptemplate
, &string
, &count
))
2475 return pattern_subx(self
, ptemplate
, string
, count
, 0);
2479 pattern_subn(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2481 PyObject
* ptemplate
;
2483 Py_ssize_t count
= 0;
2484 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2485 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|n:subn", kwlist
,
2486 &ptemplate
, &string
, &count
))
2489 return pattern_subx(self
, ptemplate
, string
, count
, 1);
2493 pattern_copy(PatternObject
* self
, PyObject
*unused
)
2495 #ifdef USE_BUILTIN_COPY
2496 PatternObject
* copy
;
2499 copy
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, self
->codesize
);
2503 offset
= offsetof(PatternObject
, groups
);
2505 Py_XINCREF(self
->groupindex
);
2506 Py_XINCREF(self
->indexgroup
);
2507 Py_XINCREF(self
->pattern
);
2509 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
2510 sizeof(PatternObject
) + self
->codesize
* sizeof(SRE_CODE
) - offset
);
2511 copy
->weakreflist
= NULL
;
2513 return (PyObject
*) copy
;
2515 PyErr_SetString(PyExc_TypeError
, "cannot copy this pattern object");
2521 pattern_deepcopy(PatternObject
* self
, PyObject
* memo
)
2523 #ifdef USE_BUILTIN_COPY
2524 PatternObject
* copy
;
2526 copy
= (PatternObject
*) pattern_copy(self
);
2530 if (!deepcopy(©
->groupindex
, memo
) ||
2531 !deepcopy(©
->indexgroup
, memo
) ||
2532 !deepcopy(©
->pattern
, memo
)) {
2538 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this pattern object");
2543 PyDoc_STRVAR(pattern_match_doc
,
2544 "match(string[, pos[, endpos]]) --> match object or None.\n\
2545 Matches zero or more characters at the beginning of the string");
2547 PyDoc_STRVAR(pattern_search_doc
,
2548 "search(string[, pos[, endpos]]) --> match object or None.\n\
2549 Scan through string looking for a match, and return a corresponding\n\
2550 MatchObject instance. Return None if no position in the string matches.");
2552 PyDoc_STRVAR(pattern_split_doc
,
2553 "split(string[, maxsplit = 0]) --> list.\n\
2554 Split string by the occurrences of pattern.");
2556 PyDoc_STRVAR(pattern_findall_doc
,
2557 "findall(string[, pos[, endpos]]) --> list.\n\
2558 Return a list of all non-overlapping matches of pattern in string.");
2560 PyDoc_STRVAR(pattern_finditer_doc
,
2561 "finditer(string[, pos[, endpos]]) --> iterator.\n\
2562 Return an iterator over all non-overlapping matches for the \n\
2563 RE pattern in string. For each match, the iterator returns a\n\
2566 PyDoc_STRVAR(pattern_sub_doc
,
2567 "sub(repl, string[, count = 0]) --> newstring\n\
2568 Return the string obtained by replacing the leftmost non-overlapping\n\
2569 occurrences of pattern in string by the replacement repl.");
2571 PyDoc_STRVAR(pattern_subn_doc
,
2572 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2573 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2574 the leftmost non-overlapping occurrences of pattern with the\n\
2575 replacement repl.");
2577 PyDoc_STRVAR(pattern_doc
, "Compiled regular expression objects");
2579 static PyMethodDef pattern_methods
[] = {
2580 {"match", (PyCFunction
) pattern_match
, METH_VARARGS
|METH_KEYWORDS
,
2582 {"search", (PyCFunction
) pattern_search
, METH_VARARGS
|METH_KEYWORDS
,
2583 pattern_search_doc
},
2584 {"sub", (PyCFunction
) pattern_sub
, METH_VARARGS
|METH_KEYWORDS
,
2586 {"subn", (PyCFunction
) pattern_subn
, METH_VARARGS
|METH_KEYWORDS
,
2588 {"split", (PyCFunction
) pattern_split
, METH_VARARGS
|METH_KEYWORDS
,
2590 {"findall", (PyCFunction
) pattern_findall
, METH_VARARGS
|METH_KEYWORDS
,
2591 pattern_findall_doc
},
2592 #if PY_VERSION_HEX >= 0x02020000
2593 {"finditer", (PyCFunction
) pattern_finditer
, METH_VARARGS
,
2594 pattern_finditer_doc
},
2596 {"scanner", (PyCFunction
) pattern_scanner
, METH_VARARGS
},
2597 {"__copy__", (PyCFunction
) pattern_copy
, METH_NOARGS
},
2598 {"__deepcopy__", (PyCFunction
) pattern_deepcopy
, METH_O
},
2603 pattern_getattr(PatternObject
* self
, char* name
)
2607 res
= Py_FindMethod(pattern_methods
, (PyObject
*) self
, name
);
2615 if (!strcmp(name
, "pattern")) {
2616 Py_INCREF(self
->pattern
);
2617 return self
->pattern
;
2620 if (!strcmp(name
, "flags"))
2621 return Py_BuildValue("i", self
->flags
);
2623 if (!strcmp(name
, "groups"))
2624 return Py_BuildValue("i", self
->groups
);
2626 if (!strcmp(name
, "groupindex") && self
->groupindex
) {
2627 Py_INCREF(self
->groupindex
);
2628 return self
->groupindex
;
2631 PyErr_SetString(PyExc_AttributeError
, name
);
2635 statichere PyTypeObject Pattern_Type
= {
2636 PyObject_HEAD_INIT(NULL
)
2637 0, "_" SRE_MODULE
".SRE_Pattern",
2638 sizeof(PatternObject
), sizeof(SRE_CODE
),
2639 (destructor
)pattern_dealloc
, /*tp_dealloc*/
2641 (getattrfunc
)pattern_getattr
, /*tp_getattr*/
2645 0, /* tp_as_number */
2646 0, /* tp_as_sequence */
2647 0, /* tp_as_mapping */
2651 0, /* tp_getattro */
2652 0, /* tp_setattro */
2653 0, /* tp_as_buffer */
2654 Py_TPFLAGS_HAVE_WEAKREFS
, /* tp_flags */
2655 pattern_doc
, /* tp_doc */
2656 0, /* tp_traverse */
2658 0, /* tp_richcompare */
2659 offsetof(PatternObject
, weakreflist
), /* tp_weaklistoffset */
2663 _compile(PyObject
* self_
, PyObject
* args
)
2665 /* "compile" pattern descriptor to pattern object */
2667 PatternObject
* self
;
2673 Py_ssize_t groups
= 0;
2674 PyObject
* groupindex
= NULL
;
2675 PyObject
* indexgroup
= NULL
;
2676 if (!PyArg_ParseTuple(args
, "OiO!|nOO", &pattern
, &flags
,
2677 &PyList_Type
, &code
, &groups
,
2678 &groupindex
, &indexgroup
))
2681 n
= PyList_GET_SIZE(code
);
2683 self
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, n
);
2689 for (i
= 0; i
< n
; i
++) {
2690 PyObject
*o
= PyList_GET_ITEM(code
, i
);
2691 unsigned long value
= PyInt_Check(o
) ? (unsigned long)PyInt_AsLong(o
)
2692 : PyLong_AsUnsignedLong(o
);
2693 self
->code
[i
] = (SRE_CODE
) value
;
2694 if ((unsigned long) self
->code
[i
] != value
) {
2695 PyErr_SetString(PyExc_OverflowError
,
2696 "regular expression code size limit exceeded");
2701 if (PyErr_Occurred()) {
2707 self
->pattern
= pattern
;
2709 self
->flags
= flags
;
2711 self
->groups
= groups
;
2713 Py_XINCREF(groupindex
);
2714 self
->groupindex
= groupindex
;
2716 Py_XINCREF(indexgroup
);
2717 self
->indexgroup
= indexgroup
;
2719 self
->weakreflist
= NULL
;
2721 return (PyObject
*) self
;
2724 /* -------------------------------------------------------------------- */
2728 match_dealloc(MatchObject
* self
)
2730 Py_XDECREF(self
->regs
);
2731 Py_XDECREF(self
->string
);
2732 Py_DECREF(self
->pattern
);
2737 match_getslice_by_index(MatchObject
* self
, Py_ssize_t index
, PyObject
* def
)
2739 if (index
< 0 || index
>= self
->groups
) {
2740 /* raise IndexError if we were given a bad group number */
2750 if (self
->string
== Py_None
|| self
->mark
[index
] < 0) {
2751 /* return default value if the string or group is undefined */
2756 return PySequence_GetSlice(
2757 self
->string
, self
->mark
[index
], self
->mark
[index
+1]
2762 match_getindex(MatchObject
* self
, PyObject
* index
)
2766 if (PyInt_Check(index
))
2767 return PyInt_AsSsize_t(index
);
2771 if (self
->pattern
->groupindex
) {
2772 index
= PyObject_GetItem(self
->pattern
->groupindex
, index
);
2774 if (PyInt_Check(index
) || PyLong_Check(index
))
2775 i
= PyInt_AsSsize_t(index
);
2785 match_getslice(MatchObject
* self
, PyObject
* index
, PyObject
* def
)
2787 return match_getslice_by_index(self
, match_getindex(self
, index
), def
);
2791 match_expand(MatchObject
* self
, PyObject
* ptemplate
)
2793 /* delegate to Python code */
2795 SRE_PY_MODULE
, "_expand",
2796 PyTuple_Pack(3, self
->pattern
, self
, ptemplate
)
2801 match_group(MatchObject
* self
, PyObject
* args
)
2806 size
= PyTuple_GET_SIZE(args
);
2810 result
= match_getslice(self
, Py_False
, Py_None
);
2813 result
= match_getslice(self
, PyTuple_GET_ITEM(args
, 0), Py_None
);
2816 /* fetch multiple items */
2817 result
= PyTuple_New(size
);
2820 for (i
= 0; i
< size
; i
++) {
2821 PyObject
* item
= match_getslice(
2822 self
, PyTuple_GET_ITEM(args
, i
), Py_None
2828 PyTuple_SET_ITEM(result
, i
, item
);
2836 match_groups(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2841 PyObject
* def
= Py_None
;
2842 static char* kwlist
[] = { "default", NULL
};
2843 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groups", kwlist
, &def
))
2846 result
= PyTuple_New(self
->groups
-1);
2850 for (index
= 1; index
< self
->groups
; index
++) {
2852 item
= match_getslice_by_index(self
, index
, def
);
2857 PyTuple_SET_ITEM(result
, index
-1, item
);
2864 match_groupdict(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2870 PyObject
* def
= Py_None
;
2871 static char* kwlist
[] = { "default", NULL
};
2872 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groupdict", kwlist
, &def
))
2875 result
= PyDict_New();
2876 if (!result
|| !self
->pattern
->groupindex
)
2879 keys
= PyMapping_Keys(self
->pattern
->groupindex
);
2883 for (index
= 0; index
< PyList_GET_SIZE(keys
); index
++) {
2887 key
= PyList_GET_ITEM(keys
, index
);
2890 value
= match_getslice(self
, key
, def
);
2895 status
= PyDict_SetItem(result
, key
, value
);
2912 match_start(MatchObject
* self
, PyObject
* args
)
2916 PyObject
* index_
= Py_False
; /* zero */
2917 if (!PyArg_UnpackTuple(args
, "start", 0, 1, &index_
))
2920 index
= match_getindex(self
, index_
);
2922 if (index
< 0 || index
>= self
->groups
) {
2930 /* mark is -1 if group is undefined */
2931 return Py_BuildValue("i", self
->mark
[index
*2]);
2935 match_end(MatchObject
* self
, PyObject
* args
)
2939 PyObject
* index_
= Py_False
; /* zero */
2940 if (!PyArg_UnpackTuple(args
, "end", 0, 1, &index_
))
2943 index
= match_getindex(self
, index_
);
2945 if (index
< 0 || index
>= self
->groups
) {
2953 /* mark is -1 if group is undefined */
2954 return Py_BuildValue("i", self
->mark
[index
*2+1]);
2958 _pair(Py_ssize_t i1
, Py_ssize_t i2
)
2963 pair
= PyTuple_New(2);
2967 item
= PyInt_FromSsize_t(i1
);
2970 PyTuple_SET_ITEM(pair
, 0, item
);
2972 item
= PyInt_FromSsize_t(i2
);
2975 PyTuple_SET_ITEM(pair
, 1, item
);
2985 match_span(MatchObject
* self
, PyObject
* args
)
2989 PyObject
* index_
= Py_False
; /* zero */
2990 if (!PyArg_UnpackTuple(args
, "span", 0, 1, &index_
))
2993 index
= match_getindex(self
, index_
);
2995 if (index
< 0 || index
>= self
->groups
) {
3003 /* marks are -1 if group is undefined */
3004 return _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
3008 match_regs(MatchObject
* self
)
3014 regs
= PyTuple_New(self
->groups
);
3018 for (index
= 0; index
< self
->groups
; index
++) {
3019 item
= _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
3024 PyTuple_SET_ITEM(regs
, index
, item
);
3034 match_copy(MatchObject
* self
, PyObject
*unused
)
3036 #ifdef USE_BUILTIN_COPY
3038 Py_ssize_t slots
, offset
;
3040 slots
= 2 * (self
->pattern
->groups
+1);
3042 copy
= PyObject_NEW_VAR(MatchObject
, &Match_Type
, slots
);
3046 /* this value a constant, but any compiler should be able to
3047 figure that out all by itself */
3048 offset
= offsetof(MatchObject
, string
);
3050 Py_XINCREF(self
->pattern
);
3051 Py_XINCREF(self
->string
);
3052 Py_XINCREF(self
->regs
);
3054 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
3055 sizeof(MatchObject
) + slots
* sizeof(Py_ssize_t
) - offset
);
3057 return (PyObject
*) copy
;
3059 PyErr_SetString(PyExc_TypeError
, "cannot copy this match object");
3065 match_deepcopy(MatchObject
* self
, PyObject
* memo
)
3067 #ifdef USE_BUILTIN_COPY
3070 copy
= (MatchObject
*) match_copy(self
);
3074 if (!deepcopy((PyObject
**) ©
->pattern
, memo
) ||
3075 !deepcopy(©
->string
, memo
) ||
3076 !deepcopy(©
->regs
, memo
)) {
3082 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this match object");
3087 static PyMethodDef match_methods
[] = {
3088 {"group", (PyCFunction
) match_group
, METH_VARARGS
},
3089 {"start", (PyCFunction
) match_start
, METH_VARARGS
},
3090 {"end", (PyCFunction
) match_end
, METH_VARARGS
},
3091 {"span", (PyCFunction
) match_span
, METH_VARARGS
},
3092 {"groups", (PyCFunction
) match_groups
, METH_VARARGS
|METH_KEYWORDS
},
3093 {"groupdict", (PyCFunction
) match_groupdict
, METH_VARARGS
|METH_KEYWORDS
},
3094 {"expand", (PyCFunction
) match_expand
, METH_O
},
3095 {"__copy__", (PyCFunction
) match_copy
, METH_NOARGS
},
3096 {"__deepcopy__", (PyCFunction
) match_deepcopy
, METH_O
},
3101 match_getattr(MatchObject
* self
, char* name
)
3105 res
= Py_FindMethod(match_methods
, (PyObject
*) self
, name
);
3111 if (!strcmp(name
, "lastindex")) {
3112 if (self
->lastindex
>= 0)
3113 return Py_BuildValue("i", self
->lastindex
);
3118 if (!strcmp(name
, "lastgroup")) {
3119 if (self
->pattern
->indexgroup
&& self
->lastindex
>= 0) {
3120 PyObject
* result
= PySequence_GetItem(
3121 self
->pattern
->indexgroup
, self
->lastindex
3131 if (!strcmp(name
, "string")) {
3133 Py_INCREF(self
->string
);
3134 return self
->string
;
3141 if (!strcmp(name
, "regs")) {
3143 Py_INCREF(self
->regs
);
3146 return match_regs(self
);
3149 if (!strcmp(name
, "re")) {
3150 Py_INCREF(self
->pattern
);
3151 return (PyObject
*) self
->pattern
;
3154 if (!strcmp(name
, "pos"))
3155 return Py_BuildValue("i", self
->pos
);
3157 if (!strcmp(name
, "endpos"))
3158 return Py_BuildValue("i", self
->endpos
);
3160 PyErr_SetString(PyExc_AttributeError
, name
);
3164 /* FIXME: implement setattr("string", None) as a special case (to
3165 detach the associated string, if any */
3167 statichere PyTypeObject Match_Type
= {
3168 PyObject_HEAD_INIT(NULL
)
3169 0, "_" SRE_MODULE
".SRE_Match",
3170 sizeof(MatchObject
), sizeof(Py_ssize_t
),
3171 (destructor
)match_dealloc
, /*tp_dealloc*/
3173 (getattrfunc
)match_getattr
/*tp_getattr*/
3177 pattern_new_match(PatternObject
* pattern
, SRE_STATE
* state
, int status
)
3179 /* create match object (from state object) */
3188 /* create match object (with room for extra group marks) */
3189 match
= PyObject_NEW_VAR(MatchObject
, &Match_Type
,
3190 2*(pattern
->groups
+1));
3195 match
->pattern
= pattern
;
3197 Py_INCREF(state
->string
);
3198 match
->string
= state
->string
;
3201 match
->groups
= pattern
->groups
+1;
3203 /* fill in group slices */
3205 base
= (char*) state
->beginning
;
3206 n
= state
->charsize
;
3208 match
->mark
[0] = ((char*) state
->start
- base
) / n
;
3209 match
->mark
[1] = ((char*) state
->ptr
- base
) / n
;
3211 for (i
= j
= 0; i
< pattern
->groups
; i
++, j
+=2)
3212 if (j
+1 <= state
->lastmark
&& state
->mark
[j
] && state
->mark
[j
+1]) {
3213 match
->mark
[j
+2] = ((char*) state
->mark
[j
] - base
) / n
;
3214 match
->mark
[j
+3] = ((char*) state
->mark
[j
+1] - base
) / n
;
3216 match
->mark
[j
+2] = match
->mark
[j
+3] = -1; /* undefined */
3218 match
->pos
= state
->pos
;
3219 match
->endpos
= state
->endpos
;
3221 match
->lastindex
= state
->lastindex
;
3223 return (PyObject
*) match
;
3225 } else if (status
== 0) {
3233 /* internal error */
3234 pattern_error(status
);
3239 /* -------------------------------------------------------------------- */
3240 /* scanner methods (experimental) */
3243 scanner_dealloc(ScannerObject
* self
)
3245 state_fini(&self
->state
);
3246 Py_DECREF(self
->pattern
);
3251 scanner_match(ScannerObject
* self
, PyObject
*unused
)
3253 SRE_STATE
* state
= &self
->state
;
3259 state
->ptr
= state
->start
;
3261 if (state
->charsize
== 1) {
3262 status
= sre_match(state
, PatternObject_GetCode(self
->pattern
));
3264 #if defined(HAVE_UNICODE)
3265 status
= sre_umatch(state
, PatternObject_GetCode(self
->pattern
));
3268 if (PyErr_Occurred())
3271 match
= pattern_new_match((PatternObject
*) self
->pattern
,
3274 if (status
== 0 || state
->ptr
== state
->start
)
3275 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
3277 state
->start
= state
->ptr
;
3284 scanner_search(ScannerObject
* self
, PyObject
*unused
)
3286 SRE_STATE
* state
= &self
->state
;
3292 state
->ptr
= state
->start
;
3294 if (state
->charsize
== 1) {
3295 status
= sre_search(state
, PatternObject_GetCode(self
->pattern
));
3297 #if defined(HAVE_UNICODE)
3298 status
= sre_usearch(state
, PatternObject_GetCode(self
->pattern
));
3301 if (PyErr_Occurred())
3304 match
= pattern_new_match((PatternObject
*) self
->pattern
,
3307 if (status
== 0 || state
->ptr
== state
->start
)
3308 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
3310 state
->start
= state
->ptr
;
3315 static PyMethodDef scanner_methods
[] = {
3316 {"match", (PyCFunction
) scanner_match
, METH_NOARGS
},
3317 {"search", (PyCFunction
) scanner_search
, METH_NOARGS
},
3322 scanner_getattr(ScannerObject
* self
, char* name
)
3326 res
= Py_FindMethod(scanner_methods
, (PyObject
*) self
, name
);
3333 if (!strcmp(name
, "pattern")) {
3334 Py_INCREF(self
->pattern
);
3335 return self
->pattern
;
3338 PyErr_SetString(PyExc_AttributeError
, name
);
3342 statichere PyTypeObject Scanner_Type
= {
3343 PyObject_HEAD_INIT(NULL
)
3344 0, "_" SRE_MODULE
".SRE_Scanner",
3345 sizeof(ScannerObject
), 0,
3346 (destructor
)scanner_dealloc
, /*tp_dealloc*/
3348 (getattrfunc
)scanner_getattr
, /*tp_getattr*/
3352 pattern_scanner(PatternObject
* pattern
, PyObject
* args
)
3354 /* create search state object */
3356 ScannerObject
* self
;
3359 Py_ssize_t start
= 0;
3360 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3361 if (!PyArg_ParseTuple(args
, "O|nn:scanner", &string
, &start
, &end
))
3364 /* create scanner object */
3365 self
= PyObject_NEW(ScannerObject
, &Scanner_Type
);
3369 string
= state_init(&self
->state
, pattern
, string
, start
, end
);
3376 self
->pattern
= (PyObject
*) pattern
;
3378 return (PyObject
*) self
;
3381 static PyMethodDef _functions
[] = {
3382 {"compile", _compile
, METH_VARARGS
},
3383 {"getcodesize", sre_codesize
, METH_NOARGS
},
3384 {"getlower", sre_getlower
, METH_VARARGS
},
3388 #if PY_VERSION_HEX < 0x02030000
3389 DL_EXPORT(void) init_sre(void)
3391 PyMODINIT_FUNC
init_sre(void)
3398 /* Patch object types */
3399 Pattern_Type
.ob_type
= Match_Type
.ob_type
=
3400 Scanner_Type
.ob_type
= &PyType_Type
;
3402 m
= Py_InitModule("_" SRE_MODULE
, _functions
);
3405 d
= PyModule_GetDict(m
);
3407 x
= PyInt_FromLong(SRE_MAGIC
);
3409 PyDict_SetItemString(d
, "MAGIC", x
);
3413 x
= PyInt_FromLong(sizeof(SRE_CODE
));
3415 PyDict_SetItemString(d
, "CODESIZE", x
);
3419 x
= PyString_FromString(copyright
);
3421 PyDict_SetItemString(d
, "copyright", x
);
3426 #endif /* !defined(SRE_RECURSIVE) */