2 * Secret Labs' Regular Expression Engine
4 * regular expression matching engine
7 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
26 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
28 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
32 * Portions of this engine have been developed in cooperation with
33 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
34 * other compatibility work.
39 static char copyright
[] =
40 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
42 #define PY_SSIZE_T_CLEAN
45 #include "structmember.h" /* offsetof */
51 /* name of this module, minus the leading underscore */
52 #if !defined(SRE_MODULE)
53 #define SRE_MODULE "sre"
56 #define SRE_PY_MODULE "re"
58 /* defining this one enables tracing */
61 #if PY_VERSION_HEX >= 0x01060000
62 #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
63 /* defining this enables unicode support (default under 1.6a1 and later) */
68 /* -------------------------------------------------------------------- */
69 /* optional features */
71 /* enables fast searching */
72 #define USE_FAST_SEARCH
74 /* enables aggressive inlining (always on for Visual C) */
77 /* enables copy/deepcopy handling (work in progress) */
78 #undef USE_BUILTIN_COPY
80 #if PY_VERSION_HEX < 0x01060000
81 #define PyObject_DEL(op) PyMem_DEL((op))
84 /* -------------------------------------------------------------------- */
87 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
88 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
89 /* fastest possible local call under MSVC */
90 #define LOCAL(type) static __inline type __fastcall
91 #elif defined(USE_INLINE)
92 #define LOCAL(type) static inline type
94 #define LOCAL(type) static type
98 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
99 #define SRE_ERROR_STATE -2 /* illegal state */
100 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
101 #define SRE_ERROR_MEMORY -9 /* out of memory */
104 #define TRACE(v) printf v
109 /* -------------------------------------------------------------------- */
110 /* search engine state */
112 /* default character predicates (run sre_chars.py to regenerate tables) */
114 #define SRE_DIGIT_MASK 1
115 #define SRE_SPACE_MASK 2
116 #define SRE_LINEBREAK_MASK 4
117 #define SRE_ALNUM_MASK 8
118 #define SRE_WORD_MASK 16
120 /* FIXME: this assumes ASCII. create tables in init_sre() instead */
122 static char sre_char_info
[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
123 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
125 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
126 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
127 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
128 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
130 static char sre_char_lower
[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
131 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
132 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
133 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
134 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
135 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
136 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
137 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
138 120, 121, 122, 123, 124, 125, 126, 127 };
140 #define SRE_IS_DIGIT(ch)\
141 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
142 #define SRE_IS_SPACE(ch)\
143 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
144 #define SRE_IS_LINEBREAK(ch)\
145 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
146 #define SRE_IS_ALNUM(ch)\
147 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
148 #define SRE_IS_WORD(ch)\
149 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
151 static unsigned int sre_lower(unsigned int ch
)
153 return ((ch
) < 128 ? (unsigned int)sre_char_lower
[ch
] : ch
);
156 /* locale-specific character predicates */
157 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
158 * warnings when c's type supports only numbers < N+1 */
159 #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
160 #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
161 #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
162 #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
163 #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
165 static unsigned int sre_lower_locale(unsigned int ch
)
167 return ((ch
) < 256 ? (unsigned int)tolower((ch
)) : ch
);
170 /* unicode-specific character predicates */
172 #if defined(HAVE_UNICODE)
174 #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
175 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
176 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
177 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
178 #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
180 static unsigned int sre_lower_unicode(unsigned int ch
)
182 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE
)(ch
));
188 sre_category(SRE_CODE category
, unsigned int ch
)
192 case SRE_CATEGORY_DIGIT
:
193 return SRE_IS_DIGIT(ch
);
194 case SRE_CATEGORY_NOT_DIGIT
:
195 return !SRE_IS_DIGIT(ch
);
196 case SRE_CATEGORY_SPACE
:
197 return SRE_IS_SPACE(ch
);
198 case SRE_CATEGORY_NOT_SPACE
:
199 return !SRE_IS_SPACE(ch
);
200 case SRE_CATEGORY_WORD
:
201 return SRE_IS_WORD(ch
);
202 case SRE_CATEGORY_NOT_WORD
:
203 return !SRE_IS_WORD(ch
);
204 case SRE_CATEGORY_LINEBREAK
:
205 return SRE_IS_LINEBREAK(ch
);
206 case SRE_CATEGORY_NOT_LINEBREAK
:
207 return !SRE_IS_LINEBREAK(ch
);
209 case SRE_CATEGORY_LOC_WORD
:
210 return SRE_LOC_IS_WORD(ch
);
211 case SRE_CATEGORY_LOC_NOT_WORD
:
212 return !SRE_LOC_IS_WORD(ch
);
214 #if defined(HAVE_UNICODE)
215 case SRE_CATEGORY_UNI_DIGIT
:
216 return SRE_UNI_IS_DIGIT(ch
);
217 case SRE_CATEGORY_UNI_NOT_DIGIT
:
218 return !SRE_UNI_IS_DIGIT(ch
);
219 case SRE_CATEGORY_UNI_SPACE
:
220 return SRE_UNI_IS_SPACE(ch
);
221 case SRE_CATEGORY_UNI_NOT_SPACE
:
222 return !SRE_UNI_IS_SPACE(ch
);
223 case SRE_CATEGORY_UNI_WORD
:
224 return SRE_UNI_IS_WORD(ch
);
225 case SRE_CATEGORY_UNI_NOT_WORD
:
226 return !SRE_UNI_IS_WORD(ch
);
227 case SRE_CATEGORY_UNI_LINEBREAK
:
228 return SRE_UNI_IS_LINEBREAK(ch
);
229 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
230 return !SRE_UNI_IS_LINEBREAK(ch
);
232 case SRE_CATEGORY_UNI_DIGIT
:
233 return SRE_IS_DIGIT(ch
);
234 case SRE_CATEGORY_UNI_NOT_DIGIT
:
235 return !SRE_IS_DIGIT(ch
);
236 case SRE_CATEGORY_UNI_SPACE
:
237 return SRE_IS_SPACE(ch
);
238 case SRE_CATEGORY_UNI_NOT_SPACE
:
239 return !SRE_IS_SPACE(ch
);
240 case SRE_CATEGORY_UNI_WORD
:
241 return SRE_LOC_IS_WORD(ch
);
242 case SRE_CATEGORY_UNI_NOT_WORD
:
243 return !SRE_LOC_IS_WORD(ch
);
244 case SRE_CATEGORY_UNI_LINEBREAK
:
245 return SRE_IS_LINEBREAK(ch
);
246 case SRE_CATEGORY_UNI_NOT_LINEBREAK
:
247 return !SRE_IS_LINEBREAK(ch
);
256 data_stack_dealloc(SRE_STATE
* state
)
258 if (state
->data_stack
) {
259 PyMem_FREE(state
->data_stack
);
260 state
->data_stack
= NULL
;
262 state
->data_stack_size
= state
->data_stack_base
= 0;
266 data_stack_grow(SRE_STATE
* state
, Py_ssize_t size
)
268 Py_ssize_t minsize
, cursize
;
269 minsize
= state
->data_stack_base
+size
;
270 cursize
= state
->data_stack_size
;
271 if (cursize
< minsize
) {
273 cursize
= minsize
+minsize
/4+1024;
274 TRACE(("allocate/grow stack %d\n", cursize
));
275 stack
= PyMem_REALLOC(state
->data_stack
, cursize
);
277 data_stack_dealloc(state
);
278 return SRE_ERROR_MEMORY
;
280 state
->data_stack
= (char *)stack
;
281 state
->data_stack_size
= cursize
;
286 /* generate 8-bit version */
288 #define SRE_CHAR unsigned char
289 #define SRE_AT sre_at
290 #define SRE_COUNT sre_count
291 #define SRE_CHARSET sre_charset
292 #define SRE_INFO sre_info
293 #define SRE_MATCH sre_match
294 #define SRE_MATCH_CONTEXT sre_match_context
295 #define SRE_SEARCH sre_search
296 #define SRE_LITERAL_TEMPLATE sre_literal_template
298 #if defined(HAVE_UNICODE)
300 #define SRE_RECURSIVE
304 #undef SRE_LITERAL_TEMPLATE
307 #undef SRE_MATCH_CONTEXT
314 /* generate 16-bit unicode version */
316 #define SRE_CHAR Py_UNICODE
317 #define SRE_AT sre_uat
318 #define SRE_COUNT sre_ucount
319 #define SRE_CHARSET sre_ucharset
320 #define SRE_INFO sre_uinfo
321 #define SRE_MATCH sre_umatch
322 #define SRE_MATCH_CONTEXT sre_umatch_context
323 #define SRE_SEARCH sre_usearch
324 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
327 #endif /* SRE_RECURSIVE */
329 /* -------------------------------------------------------------------- */
330 /* String matching engine */
332 /* the following section is compiled twice, with different character
336 SRE_AT(SRE_STATE
* state
, SRE_CHAR
* ptr
, SRE_CODE at
)
338 /* check if pointer is at given position */
340 Py_ssize_t thisp
, thatp
;
344 case SRE_AT_BEGINNING
:
345 case SRE_AT_BEGINNING_STRING
:
346 return ((void*) ptr
== state
->beginning
);
348 case SRE_AT_BEGINNING_LINE
:
349 return ((void*) ptr
== state
->beginning
||
350 SRE_IS_LINEBREAK((int) ptr
[-1]));
353 return (((void*) (ptr
+1) == state
->end
&&
354 SRE_IS_LINEBREAK((int) ptr
[0])) ||
355 ((void*) ptr
== state
->end
));
357 case SRE_AT_END_LINE
:
358 return ((void*) ptr
== state
->end
||
359 SRE_IS_LINEBREAK((int) ptr
[0]));
361 case SRE_AT_END_STRING
:
362 return ((void*) ptr
== state
->end
);
364 case SRE_AT_BOUNDARY
:
365 if (state
->beginning
== state
->end
)
367 thatp
= ((void*) ptr
> state
->beginning
) ?
368 SRE_IS_WORD((int) ptr
[-1]) : 0;
369 thisp
= ((void*) ptr
< state
->end
) ?
370 SRE_IS_WORD((int) ptr
[0]) : 0;
371 return thisp
!= thatp
;
373 case SRE_AT_NON_BOUNDARY
:
374 if (state
->beginning
== state
->end
)
376 thatp
= ((void*) ptr
> state
->beginning
) ?
377 SRE_IS_WORD((int) ptr
[-1]) : 0;
378 thisp
= ((void*) ptr
< state
->end
) ?
379 SRE_IS_WORD((int) ptr
[0]) : 0;
380 return thisp
== thatp
;
382 case SRE_AT_LOC_BOUNDARY
:
383 if (state
->beginning
== state
->end
)
385 thatp
= ((void*) ptr
> state
->beginning
) ?
386 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
387 thisp
= ((void*) ptr
< state
->end
) ?
388 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
389 return thisp
!= thatp
;
391 case SRE_AT_LOC_NON_BOUNDARY
:
392 if (state
->beginning
== state
->end
)
394 thatp
= ((void*) ptr
> state
->beginning
) ?
395 SRE_LOC_IS_WORD((int) ptr
[-1]) : 0;
396 thisp
= ((void*) ptr
< state
->end
) ?
397 SRE_LOC_IS_WORD((int) ptr
[0]) : 0;
398 return thisp
== thatp
;
400 #if defined(HAVE_UNICODE)
401 case SRE_AT_UNI_BOUNDARY
:
402 if (state
->beginning
== state
->end
)
404 thatp
= ((void*) ptr
> state
->beginning
) ?
405 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
406 thisp
= ((void*) ptr
< state
->end
) ?
407 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
408 return thisp
!= thatp
;
410 case SRE_AT_UNI_NON_BOUNDARY
:
411 if (state
->beginning
== state
->end
)
413 thatp
= ((void*) ptr
> state
->beginning
) ?
414 SRE_UNI_IS_WORD((int) ptr
[-1]) : 0;
415 thisp
= ((void*) ptr
< state
->end
) ?
416 SRE_UNI_IS_WORD((int) ptr
[0]) : 0;
417 return thisp
== thatp
;
426 SRE_CHARSET(SRE_CODE
* set
, SRE_CODE ch
)
428 /* check if character is a member of the given set */
439 /* <LITERAL> <code> */
445 case SRE_OP_CATEGORY
:
446 /* <CATEGORY> <code> */
447 if (sre_category(set
[0], (int) ch
))
453 if (sizeof(SRE_CODE
) == 2) {
454 /* <CHARSET> <bitmap> (16 bits per code word) */
455 if (ch
< 256 && (set
[ch
>> 4] & (1 << (ch
& 15))))
460 /* <CHARSET> <bitmap> (32 bits per code word) */
461 if (ch
< 256 && (set
[ch
>> 5] & (1 << (ch
& 31))))
468 /* <RANGE> <lower> <upper> */
469 if (set
[0] <= ch
&& ch
<= set
[1])
478 case SRE_OP_BIGCHARSET
:
479 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
481 Py_ssize_t count
, block
;
484 if (sizeof(SRE_CODE
) == 2) {
485 block
= ((unsigned char*)set
)[ch
>> 8];
487 if (set
[block
*16 + ((ch
& 255)>>4)] & (1 << (ch
& 15)))
492 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
493 * warnings when c's type supports only numbers < N+1 */
495 block
= ((unsigned char*)set
)[ch
>> 8];
500 (set
[block
*8 + ((ch
& 255)>>5)] & (1 << (ch
& 31))))
508 /* internal error -- there's not much we can do about it
509 here, so let's just pretend it didn't match... */
515 LOCAL(Py_ssize_t
) SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
);
518 SRE_COUNT(SRE_STATE
* state
, SRE_CODE
* pattern
, Py_ssize_t maxcount
)
521 SRE_CHAR
* ptr
= (SRE_CHAR
*)state
->ptr
;
522 SRE_CHAR
* end
= (SRE_CHAR
*)state
->end
;
526 if (maxcount
< end
- ptr
&& maxcount
!= 65535)
527 end
= ptr
+ maxcount
;
529 switch (pattern
[0]) {
533 TRACE(("|%p|%p|COUNT IN\n", pattern
, ptr
));
534 while (ptr
< end
&& SRE_CHARSET(pattern
+ 2, *ptr
))
539 /* repeated dot wildcard. */
540 TRACE(("|%p|%p|COUNT ANY\n", pattern
, ptr
));
541 while (ptr
< end
&& !SRE_IS_LINEBREAK(*ptr
))
546 /* repeated dot wildcard. skip to the end of the target
547 string, and backtrack from there */
548 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern
, ptr
));
553 /* repeated literal */
555 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern
, ptr
, chr
));
556 while (ptr
< end
&& (SRE_CODE
) *ptr
== chr
)
560 case SRE_OP_LITERAL_IGNORE
:
561 /* repeated literal */
563 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
564 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) == chr
)
568 case SRE_OP_NOT_LITERAL
:
569 /* repeated non-literal */
571 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern
, ptr
, chr
));
572 while (ptr
< end
&& (SRE_CODE
) *ptr
!= chr
)
576 case SRE_OP_NOT_LITERAL_IGNORE
:
577 /* repeated non-literal */
579 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern
, ptr
, chr
));
580 while (ptr
< end
&& (SRE_CODE
) state
->lower(*ptr
) != chr
)
585 /* repeated single character pattern */
586 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern
, ptr
));
587 while ((SRE_CHAR
*) state
->ptr
< end
) {
588 i
= SRE_MATCH(state
, pattern
);
594 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
,
595 (SRE_CHAR
*) state
->ptr
- ptr
));
596 return (SRE_CHAR
*) state
->ptr
- ptr
;
599 TRACE(("|%p|%p|COUNT %d\n", pattern
, ptr
, ptr
- (SRE_CHAR
*) state
->ptr
));
600 return ptr
- (SRE_CHAR
*) state
->ptr
;
603 #if 0 /* not used in this release */
605 SRE_INFO(SRE_STATE
* state
, SRE_CODE
* pattern
)
607 /* check if an SRE_OP_INFO block matches at the current position.
608 returns the number of SRE_CODE objects to skip if successful, 0
611 SRE_CHAR
* end
= state
->end
;
612 SRE_CHAR
* ptr
= state
->ptr
;
615 /* check minimal length */
616 if (pattern
[3] && (end
- ptr
) < pattern
[3])
619 /* check known prefix */
620 if (pattern
[2] & SRE_INFO_PREFIX
&& pattern
[5] > 1) {
621 /* <length> <skip> <prefix data> <overlap data> */
622 for (i
= 0; i
< pattern
[5]; i
++)
623 if ((SRE_CODE
) ptr
[i
] != pattern
[7 + i
])
625 return pattern
[0] + 2 * pattern
[6];
631 /* The macros below should be used to protect recursive SRE_MATCH()
632 * calls that *failed* and do *not* return immediately (IOW, those
633 * that will backtrack). Explaining:
635 * - Recursive SRE_MATCH() returned true: that's usually a success
636 * (besides atypical cases like ASSERT_NOT), therefore there's no
637 * reason to restore lastmark;
639 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
640 * is returning to the caller: If the current SRE_MATCH() is the
641 * top function of the recursion, returning false will be a matching
642 * failure, and it doesn't matter where lastmark is pointing to.
643 * If it's *not* the top function, it will be a recursive SRE_MATCH()
644 * failure by itself, and the calling SRE_MATCH() will have to deal
645 * with the failure by the same rules explained here (it will restore
646 * lastmark by itself if necessary);
648 * - Recursive SRE_MATCH() returned false, and will continue the
649 * outside 'for' loop: must be protected when breaking, since the next
650 * OP could potentially depend on lastmark;
652 * - Recursive SRE_MATCH() returned false, and will be called again
653 * inside a local for/while loop: must be protected between each
654 * loop iteration, since the recursive SRE_MATCH() could do anything,
655 * and could potentially depend on lastmark.
657 * For more information, check the discussion at SF patch #712900.
659 #define LASTMARK_SAVE() \
661 ctx->lastmark = state->lastmark; \
662 ctx->lastindex = state->lastindex; \
664 #define LASTMARK_RESTORE() \
666 state->lastmark = ctx->lastmark; \
667 state->lastindex = ctx->lastindex; \
670 #define RETURN_ERROR(i) do { return i; } while(0)
671 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
672 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
674 #define RETURN_ON_ERROR(i) \
675 do { if (i < 0) RETURN_ERROR(i); } while (0)
676 #define RETURN_ON_SUCCESS(i) \
677 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
678 #define RETURN_ON_FAILURE(i) \
679 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
683 #define DATA_STACK_ALLOC(state, type, ptr) \
685 alloc_pos = state->data_stack_base; \
686 TRACE(("allocating %s in %d (%d)\n", \
687 SFY(type), alloc_pos, sizeof(type))); \
688 if (state->data_stack_size < alloc_pos+sizeof(type)) { \
689 int j = data_stack_grow(state, sizeof(type)); \
690 if (j < 0) return j; \
692 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
694 ptr = (type*)(state->data_stack+alloc_pos); \
695 state->data_stack_base += sizeof(type); \
698 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
700 TRACE(("looking up %s at %d\n", SFY(type), pos)); \
701 ptr = (type*)(state->data_stack+pos); \
704 #define DATA_STACK_PUSH(state, data, size) \
706 TRACE(("copy data in %p to %d (%d)\n", \
707 data, state->data_stack_base, size)); \
708 if (state->data_stack_size < state->data_stack_base+size) { \
709 int j = data_stack_grow(state, size); \
710 if (j < 0) return j; \
712 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
714 memcpy(state->data_stack+state->data_stack_base, data, size); \
715 state->data_stack_base += size; \
718 #define DATA_STACK_POP(state, data, size, discard) \
720 TRACE(("copy data to %p from %d (%d)\n", \
721 data, state->data_stack_base-size, size)); \
722 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
724 state->data_stack_base -= size; \
727 #define DATA_STACK_POP_DISCARD(state, size) \
729 TRACE(("discard data from %d (%d)\n", \
730 state->data_stack_base-size, size)); \
731 state->data_stack_base -= size; \
734 #define DATA_PUSH(x) \
735 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
736 #define DATA_POP(x) \
737 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
738 #define DATA_POP_DISCARD(x) \
739 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
740 #define DATA_ALLOC(t,p) \
741 DATA_STACK_ALLOC(state, t, p)
742 #define DATA_LOOKUP_AT(t,p,pos) \
743 DATA_STACK_LOOKUP_AT(state,t,p,pos)
745 #define MARK_PUSH(lastmark) \
746 do if (lastmark > 0) { \
747 i = lastmark; /* ctx->lastmark may change if reallocated */ \
748 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
750 #define MARK_POP(lastmark) \
751 do if (lastmark > 0) { \
752 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
754 #define MARK_POP_KEEP(lastmark) \
755 do if (lastmark > 0) { \
756 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
758 #define MARK_POP_DISCARD(lastmark) \
759 do if (lastmark > 0) { \
760 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
764 #define JUMP_MAX_UNTIL_1 1
765 #define JUMP_MAX_UNTIL_2 2
766 #define JUMP_MAX_UNTIL_3 3
767 #define JUMP_MIN_UNTIL_1 4
768 #define JUMP_MIN_UNTIL_2 5
769 #define JUMP_MIN_UNTIL_3 6
770 #define JUMP_REPEAT 7
771 #define JUMP_REPEAT_ONE_1 8
772 #define JUMP_REPEAT_ONE_2 9
773 #define JUMP_MIN_REPEAT_ONE 10
774 #define JUMP_BRANCH 11
775 #define JUMP_ASSERT 12
776 #define JUMP_ASSERT_NOT 13
778 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
779 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
780 nextctx->last_ctx_pos = ctx_pos; \
781 nextctx->jump = jumpvalue; \
782 nextctx->pattern = nextpattern; \
783 ctx_pos = alloc_pos; \
787 while (0) /* gcc doesn't like labels at end of scopes */ \
790 Py_ssize_t last_ctx_pos
;
796 Py_ssize_t lastindex
;
803 /* check if string matches the given pattern. returns <0 for
804 error, 0 for failure, and 1 for success */
806 SRE_MATCH(SRE_STATE
* state
, SRE_CODE
* pattern
)
808 SRE_CHAR
* end
= (SRE_CHAR
*)state
->end
;
809 Py_ssize_t alloc_pos
, ctx_pos
= -1;
810 Py_ssize_t i
, ret
= 0;
813 SRE_MATCH_CONTEXT
* ctx
;
814 SRE_MATCH_CONTEXT
* nextctx
;
816 TRACE(("|%p|%p|ENTER\n", pattern
, state
->ptr
));
818 DATA_ALLOC(SRE_MATCH_CONTEXT
, ctx
);
819 ctx
->last_ctx_pos
= -1;
820 ctx
->jump
= JUMP_NONE
;
821 ctx
->pattern
= pattern
;
826 ctx
->ptr
= (SRE_CHAR
*)state
->ptr
;
828 if (ctx
->pattern
[0] == SRE_OP_INFO
) {
829 /* optimization info block */
830 /* <INFO> <1=skip> <2=flags> <3=min> ... */
831 if (ctx
->pattern
[3] && (end
- ctx
->ptr
) < ctx
->pattern
[3]) {
832 TRACE(("reject (got %d chars, need %d)\n",
833 (end
- ctx
->ptr
), ctx
->pattern
[3]));
836 ctx
->pattern
+= ctx
->pattern
[1] + 1;
841 switch (*ctx
->pattern
++) {
846 TRACE(("|%p|%p|MARK %d\n", ctx
->pattern
,
847 ctx
->ptr
, ctx
->pattern
[0]));
850 state
->lastindex
= i
/2 + 1;
851 if (i
> state
->lastmark
) {
852 /* state->lastmark is the highest valid index in the
853 state->mark array. If it is increased by more than 1,
854 the intervening marks must be set to NULL to signal
855 that these marks have not been encountered. */
856 Py_ssize_t j
= state
->lastmark
+ 1;
858 state
->mark
[j
++] = NULL
;
861 state
->mark
[i
] = ctx
->ptr
;
866 /* match literal string */
867 /* <LITERAL> <code> */
868 TRACE(("|%p|%p|LITERAL %d\n", ctx
->pattern
,
869 ctx
->ptr
, *ctx
->pattern
));
870 if (ctx
->ptr
>= end
|| (SRE_CODE
) ctx
->ptr
[0] != ctx
->pattern
[0])
876 case SRE_OP_NOT_LITERAL
:
877 /* match anything that is not literal character */
878 /* <NOT_LITERAL> <code> */
879 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx
->pattern
,
880 ctx
->ptr
, *ctx
->pattern
));
881 if (ctx
->ptr
>= end
|| (SRE_CODE
) ctx
->ptr
[0] == ctx
->pattern
[0])
889 TRACE(("|%p|%p|SUCCESS\n", ctx
->pattern
, ctx
->ptr
));
890 state
->ptr
= ctx
->ptr
;
894 /* match at given position */
896 TRACE(("|%p|%p|AT %d\n", ctx
->pattern
, ctx
->ptr
, *ctx
->pattern
));
897 if (!SRE_AT(state
, ctx
->ptr
, *ctx
->pattern
))
902 case SRE_OP_CATEGORY
:
903 /* match at given category */
904 /* <CATEGORY> <code> */
905 TRACE(("|%p|%p|CATEGORY %d\n", ctx
->pattern
,
906 ctx
->ptr
, *ctx
->pattern
));
907 if (ctx
->ptr
>= end
|| !sre_category(ctx
->pattern
[0], ctx
->ptr
[0]))
914 /* match anything (except a newline) */
916 TRACE(("|%p|%p|ANY\n", ctx
->pattern
, ctx
->ptr
));
917 if (ctx
->ptr
>= end
|| SRE_IS_LINEBREAK(ctx
->ptr
[0]))
925 TRACE(("|%p|%p|ANY_ALL\n", ctx
->pattern
, ctx
->ptr
));
932 /* match set member (or non_member) */
933 /* <IN> <skip> <set> */
934 TRACE(("|%p|%p|IN\n", ctx
->pattern
, ctx
->ptr
));
935 if (ctx
->ptr
>= end
|| !SRE_CHARSET(ctx
->pattern
+ 1, *ctx
->ptr
))
937 ctx
->pattern
+= ctx
->pattern
[0];
941 case SRE_OP_LITERAL_IGNORE
:
942 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
943 ctx
->pattern
, ctx
->ptr
, ctx
->pattern
[0]));
944 if (ctx
->ptr
>= end
||
945 state
->lower(*ctx
->ptr
) != state
->lower(*ctx
->pattern
))
951 case SRE_OP_NOT_LITERAL_IGNORE
:
952 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
953 ctx
->pattern
, ctx
->ptr
, *ctx
->pattern
));
954 if (ctx
->ptr
>= end
||
955 state
->lower(*ctx
->ptr
) == state
->lower(*ctx
->pattern
))
961 case SRE_OP_IN_IGNORE
:
962 TRACE(("|%p|%p|IN_IGNORE\n", ctx
->pattern
, ctx
->ptr
));
964 || !SRE_CHARSET(ctx
->pattern
+1,
965 (SRE_CODE
)state
->lower(*ctx
->ptr
)))
967 ctx
->pattern
+= ctx
->pattern
[0];
974 /* <JUMP> <offset> */
975 TRACE(("|%p|%p|JUMP %d\n", ctx
->pattern
,
976 ctx
->ptr
, ctx
->pattern
[0]));
977 ctx
->pattern
+= ctx
->pattern
[0];
982 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
983 TRACE(("|%p|%p|BRANCH\n", ctx
->pattern
, ctx
->ptr
));
985 ctx
->u
.rep
= state
->repeat
;
987 MARK_PUSH(ctx
->lastmark
);
988 for (; ctx
->pattern
[0]; ctx
->pattern
+= ctx
->pattern
[0]) {
989 if (ctx
->pattern
[1] == SRE_OP_LITERAL
&&
991 (SRE_CODE
) *ctx
->ptr
!= ctx
->pattern
[2]))
993 if (ctx
->pattern
[1] == SRE_OP_IN
&&
995 !SRE_CHARSET(ctx
->pattern
+ 3, (SRE_CODE
) *ctx
->ptr
)))
997 state
->ptr
= ctx
->ptr
;
998 DO_JUMP(JUMP_BRANCH
, jump_branch
, ctx
->pattern
+1);
1001 MARK_POP_DISCARD(ctx
->lastmark
);
1002 RETURN_ON_ERROR(ret
);
1006 MARK_POP_KEEP(ctx
->lastmark
);
1010 MARK_POP_DISCARD(ctx
->lastmark
);
1013 case SRE_OP_REPEAT_ONE
:
1014 /* match repeated sequence (maximizing regexp) */
1016 /* this operator only works if the repeated item is
1017 exactly one character wide, and we're not already
1018 collecting backtracking points. for other cases,
1019 use the MAX_REPEAT operator */
1021 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1023 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx
->pattern
, ctx
->ptr
,
1024 ctx
->pattern
[1], ctx
->pattern
[2]));
1026 if (ctx
->ptr
+ ctx
->pattern
[1] > end
)
1027 RETURN_FAILURE
; /* cannot match */
1029 state
->ptr
= ctx
->ptr
;
1031 ret
= SRE_COUNT(state
, ctx
->pattern
+3, ctx
->pattern
[2]);
1032 RETURN_ON_ERROR(ret
);
1033 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1035 ctx
->ptr
+= ctx
->count
;
1037 /* when we arrive here, count contains the number of
1038 matches, and ctx->ptr points to the tail of the target
1039 string. check if the rest of the pattern matches,
1040 and backtrack if not. */
1042 if (ctx
->count
< (Py_ssize_t
) ctx
->pattern
[1])
1045 if (ctx
->pattern
[ctx
->pattern
[0]] == SRE_OP_SUCCESS
) {
1046 /* tail is empty. we're finished */
1047 state
->ptr
= ctx
->ptr
;
1053 if (ctx
->pattern
[ctx
->pattern
[0]] == SRE_OP_LITERAL
) {
1054 /* tail starts with a literal. skip positions where
1055 the rest of the pattern cannot possibly match */
1056 ctx
->u
.chr
= ctx
->pattern
[ctx
->pattern
[0]+1];
1058 while (ctx
->count
>= (Py_ssize_t
) ctx
->pattern
[1] &&
1059 (ctx
->ptr
>= end
|| *ctx
->ptr
!= ctx
->u
.chr
)) {
1063 if (ctx
->count
< (Py_ssize_t
) ctx
->pattern
[1])
1065 state
->ptr
= ctx
->ptr
;
1066 DO_JUMP(JUMP_REPEAT_ONE_1
, jump_repeat_one_1
,
1067 ctx
->pattern
+ctx
->pattern
[0]);
1069 RETURN_ON_ERROR(ret
);
1081 while (ctx
->count
>= (Py_ssize_t
) ctx
->pattern
[1]) {
1082 state
->ptr
= ctx
->ptr
;
1083 DO_JUMP(JUMP_REPEAT_ONE_2
, jump_repeat_one_2
,
1084 ctx
->pattern
+ctx
->pattern
[0]);
1086 RETURN_ON_ERROR(ret
);
1096 case SRE_OP_MIN_REPEAT_ONE
:
1097 /* match repeated sequence (minimizing regexp) */
1099 /* this operator only works if the repeated item is
1100 exactly one character wide, and we're not already
1101 collecting backtracking points. for other cases,
1102 use the MIN_REPEAT operator */
1104 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1106 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx
->pattern
, ctx
->ptr
,
1107 ctx
->pattern
[1], ctx
->pattern
[2]));
1109 if (ctx
->ptr
+ ctx
->pattern
[1] > end
)
1110 RETURN_FAILURE
; /* cannot match */
1112 state
->ptr
= ctx
->ptr
;
1114 if (ctx
->pattern
[1] == 0)
1117 /* count using pattern min as the maximum */
1118 ret
= SRE_COUNT(state
, ctx
->pattern
+3, ctx
->pattern
[1]);
1119 RETURN_ON_ERROR(ret
);
1120 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1121 if (ret
< (Py_ssize_t
) ctx
->pattern
[1])
1122 /* didn't match minimum number of times */
1124 /* advance past minimum matches of repeat */
1126 ctx
->ptr
+= ctx
->count
;
1129 if (ctx
->pattern
[ctx
->pattern
[0]] == SRE_OP_SUCCESS
) {
1130 /* tail is empty. we're finished */
1131 state
->ptr
= ctx
->ptr
;
1137 while ((Py_ssize_t
)ctx
->pattern
[2] == 65535
1138 || ctx
->count
<= (Py_ssize_t
)ctx
->pattern
[2]) {
1139 state
->ptr
= ctx
->ptr
;
1140 DO_JUMP(JUMP_MIN_REPEAT_ONE
,jump_min_repeat_one
,
1141 ctx
->pattern
+ctx
->pattern
[0]);
1143 RETURN_ON_ERROR(ret
);
1146 state
->ptr
= ctx
->ptr
;
1147 ret
= SRE_COUNT(state
, ctx
->pattern
+3, 1);
1148 RETURN_ON_ERROR(ret
);
1149 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1161 /* create repeat context. all the hard work is done
1162 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1163 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1164 TRACE(("|%p|%p|REPEAT %d %d\n", ctx
->pattern
, ctx
->ptr
,
1165 ctx
->pattern
[1], ctx
->pattern
[2]));
1167 /* install new repeat context */
1168 ctx
->u
.rep
= (SRE_REPEAT
*) PyObject_MALLOC(sizeof(*ctx
->u
.rep
));
1169 ctx
->u
.rep
->count
= -1;
1170 ctx
->u
.rep
->pattern
= ctx
->pattern
;
1171 ctx
->u
.rep
->prev
= state
->repeat
;
1172 ctx
->u
.rep
->last_ptr
= NULL
;
1173 state
->repeat
= ctx
->u
.rep
;
1175 state
->ptr
= ctx
->ptr
;
1176 DO_JUMP(JUMP_REPEAT
, jump_repeat
, ctx
->pattern
+ctx
->pattern
[0]);
1177 state
->repeat
= ctx
->u
.rep
->prev
;
1178 PyObject_FREE(ctx
->u
.rep
);
1181 RETURN_ON_ERROR(ret
);
1186 case SRE_OP_MAX_UNTIL
:
1187 /* maximizing repeat */
1188 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1190 /* FIXME: we probably need to deal with zero-width
1191 matches in here... */
1193 ctx
->u
.rep
= state
->repeat
;
1195 RETURN_ERROR(SRE_ERROR_STATE
);
1197 state
->ptr
= ctx
->ptr
;
1199 ctx
->count
= ctx
->u
.rep
->count
+1;
1201 TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx
->pattern
,
1202 ctx
->ptr
, ctx
->count
));
1204 if (ctx
->count
< ctx
->u
.rep
->pattern
[1]) {
1205 /* not enough matches */
1206 ctx
->u
.rep
->count
= ctx
->count
;
1207 DO_JUMP(JUMP_MAX_UNTIL_1
, jump_max_until_1
,
1208 ctx
->u
.rep
->pattern
+3);
1210 RETURN_ON_ERROR(ret
);
1213 ctx
->u
.rep
->count
= ctx
->count
-1;
1214 state
->ptr
= ctx
->ptr
;
1218 if ((ctx
->count
< ctx
->u
.rep
->pattern
[2] ||
1219 ctx
->u
.rep
->pattern
[2] == 65535) &&
1220 state
->ptr
!= ctx
->u
.rep
->last_ptr
) {
1221 /* we may have enough matches, but if we can
1222 match another item, do so */
1223 ctx
->u
.rep
->count
= ctx
->count
;
1225 MARK_PUSH(ctx
->lastmark
);
1226 /* zero-width match protection */
1227 DATA_PUSH(&ctx
->u
.rep
->last_ptr
);
1228 ctx
->u
.rep
->last_ptr
= state
->ptr
;
1229 DO_JUMP(JUMP_MAX_UNTIL_2
, jump_max_until_2
,
1230 ctx
->u
.rep
->pattern
+3);
1231 DATA_POP(&ctx
->u
.rep
->last_ptr
);
1233 MARK_POP_DISCARD(ctx
->lastmark
);
1234 RETURN_ON_ERROR(ret
);
1237 MARK_POP(ctx
->lastmark
);
1239 ctx
->u
.rep
->count
= ctx
->count
-1;
1240 state
->ptr
= ctx
->ptr
;
1243 /* cannot match more repeated items here. make sure the
1245 state
->repeat
= ctx
->u
.rep
->prev
;
1246 DO_JUMP(JUMP_MAX_UNTIL_3
, jump_max_until_3
, ctx
->pattern
);
1247 RETURN_ON_SUCCESS(ret
);
1248 state
->repeat
= ctx
->u
.rep
;
1249 state
->ptr
= ctx
->ptr
;
1252 case SRE_OP_MIN_UNTIL
:
1253 /* minimizing repeat */
1254 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1256 ctx
->u
.rep
= state
->repeat
;
1258 RETURN_ERROR(SRE_ERROR_STATE
);
1260 state
->ptr
= ctx
->ptr
;
1262 ctx
->count
= ctx
->u
.rep
->count
+1;
1264 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx
->pattern
,
1265 ctx
->ptr
, ctx
->count
, ctx
->u
.rep
->pattern
));
1267 if (ctx
->count
< ctx
->u
.rep
->pattern
[1]) {
1268 /* not enough matches */
1269 ctx
->u
.rep
->count
= ctx
->count
;
1270 DO_JUMP(JUMP_MIN_UNTIL_1
, jump_min_until_1
,
1271 ctx
->u
.rep
->pattern
+3);
1273 RETURN_ON_ERROR(ret
);
1276 ctx
->u
.rep
->count
= ctx
->count
-1;
1277 state
->ptr
= ctx
->ptr
;
1283 /* see if the tail matches */
1284 state
->repeat
= ctx
->u
.rep
->prev
;
1285 DO_JUMP(JUMP_MIN_UNTIL_2
, jump_min_until_2
, ctx
->pattern
);
1287 RETURN_ON_ERROR(ret
);
1291 state
->repeat
= ctx
->u
.rep
;
1292 state
->ptr
= ctx
->ptr
;
1296 if (ctx
->count
>= ctx
->u
.rep
->pattern
[2]
1297 && ctx
->u
.rep
->pattern
[2] != 65535)
1300 ctx
->u
.rep
->count
= ctx
->count
;
1301 DO_JUMP(JUMP_MIN_UNTIL_3
,jump_min_until_3
,
1302 ctx
->u
.rep
->pattern
+3);
1304 RETURN_ON_ERROR(ret
);
1307 ctx
->u
.rep
->count
= ctx
->count
-1;
1308 state
->ptr
= ctx
->ptr
;
1311 case SRE_OP_GROUPREF
:
1312 /* match backreference */
1313 TRACE(("|%p|%p|GROUPREF %d\n", ctx
->pattern
,
1314 ctx
->ptr
, ctx
->pattern
[0]));
1315 i
= ctx
->pattern
[0];
1317 Py_ssize_t groupref
= i
+i
;
1318 if (groupref
>= state
->lastmark
) {
1321 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[groupref
];
1322 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[groupref
+1];
1323 if (!p
|| !e
|| e
< p
)
1326 if (ctx
->ptr
>= end
|| *ctx
->ptr
!= *p
)
1335 case SRE_OP_GROUPREF_IGNORE
:
1336 /* match backreference */
1337 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx
->pattern
,
1338 ctx
->ptr
, ctx
->pattern
[0]));
1339 i
= ctx
->pattern
[0];
1341 Py_ssize_t groupref
= i
+i
;
1342 if (groupref
>= state
->lastmark
) {
1345 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[groupref
];
1346 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[groupref
+1];
1347 if (!p
|| !e
|| e
< p
)
1350 if (ctx
->ptr
>= end
||
1351 state
->lower(*ctx
->ptr
) != state
->lower(*p
))
1360 case SRE_OP_GROUPREF_EXISTS
:
1361 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx
->pattern
,
1362 ctx
->ptr
, ctx
->pattern
[0]));
1363 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1364 i
= ctx
->pattern
[0];
1366 Py_ssize_t groupref
= i
+i
;
1367 if (groupref
>= state
->lastmark
) {
1368 ctx
->pattern
+= ctx
->pattern
[1];
1371 SRE_CHAR
* p
= (SRE_CHAR
*) state
->mark
[groupref
];
1372 SRE_CHAR
* e
= (SRE_CHAR
*) state
->mark
[groupref
+1];
1373 if (!p
|| !e
|| e
< p
) {
1374 ctx
->pattern
+= ctx
->pattern
[1];
1383 /* assert subpattern */
1384 /* <ASSERT> <skip> <back> <pattern> */
1385 TRACE(("|%p|%p|ASSERT %d\n", ctx
->pattern
,
1386 ctx
->ptr
, ctx
->pattern
[1]));
1387 state
->ptr
= ctx
->ptr
- ctx
->pattern
[1];
1388 if (state
->ptr
< state
->beginning
)
1390 DO_JUMP(JUMP_ASSERT
, jump_assert
, ctx
->pattern
+2);
1391 RETURN_ON_FAILURE(ret
);
1392 ctx
->pattern
+= ctx
->pattern
[0];
1395 case SRE_OP_ASSERT_NOT
:
1396 /* assert not subpattern */
1397 /* <ASSERT_NOT> <skip> <back> <pattern> */
1398 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx
->pattern
,
1399 ctx
->ptr
, ctx
->pattern
[1]));
1400 state
->ptr
= ctx
->ptr
- ctx
->pattern
[1];
1401 if (state
->ptr
>= state
->beginning
) {
1402 DO_JUMP(JUMP_ASSERT_NOT
, jump_assert_not
, ctx
->pattern
+2);
1404 RETURN_ON_ERROR(ret
);
1408 ctx
->pattern
+= ctx
->pattern
[0];
1411 case SRE_OP_FAILURE
:
1412 /* immediate failure */
1413 TRACE(("|%p|%p|FAILURE\n", ctx
->pattern
, ctx
->ptr
));
1417 TRACE(("|%p|%p|UNKNOWN %d\n", ctx
->pattern
, ctx
->ptr
,
1419 RETURN_ERROR(SRE_ERROR_ILLEGAL
);
1424 ctx_pos
= ctx
->last_ctx_pos
;
1426 DATA_POP_DISCARD(ctx
);
1429 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT
, ctx
, ctx_pos
);
1432 case JUMP_MAX_UNTIL_2
:
1433 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx
->pattern
, ctx
->ptr
));
1434 goto jump_max_until_2
;
1435 case JUMP_MAX_UNTIL_3
:
1436 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx
->pattern
, ctx
->ptr
));
1437 goto jump_max_until_3
;
1438 case JUMP_MIN_UNTIL_2
:
1439 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx
->pattern
, ctx
->ptr
));
1440 goto jump_min_until_2
;
1441 case JUMP_MIN_UNTIL_3
:
1442 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx
->pattern
, ctx
->ptr
));
1443 goto jump_min_until_3
;
1445 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx
->pattern
, ctx
->ptr
));
1447 case JUMP_MAX_UNTIL_1
:
1448 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx
->pattern
, ctx
->ptr
));
1449 goto jump_max_until_1
;
1450 case JUMP_MIN_UNTIL_1
:
1451 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx
->pattern
, ctx
->ptr
));
1452 goto jump_min_until_1
;
1454 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx
->pattern
, ctx
->ptr
));
1456 case JUMP_REPEAT_ONE_1
:
1457 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx
->pattern
, ctx
->ptr
));
1458 goto jump_repeat_one_1
;
1459 case JUMP_REPEAT_ONE_2
:
1460 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx
->pattern
, ctx
->ptr
));
1461 goto jump_repeat_one_2
;
1462 case JUMP_MIN_REPEAT_ONE
:
1463 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx
->pattern
, ctx
->ptr
));
1464 goto jump_min_repeat_one
;
1466 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx
->pattern
, ctx
->ptr
));
1468 case JUMP_ASSERT_NOT
:
1469 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx
->pattern
, ctx
->ptr
));
1470 goto jump_assert_not
;
1472 TRACE(("|%p|%p|RETURN %d\n", ctx
->pattern
, ctx
->ptr
, ret
));
1476 return ret
; /* should never get here */
1480 SRE_SEARCH(SRE_STATE
* state
, SRE_CODE
* pattern
)
1482 SRE_CHAR
* ptr
= (SRE_CHAR
*)state
->start
;
1483 SRE_CHAR
* end
= (SRE_CHAR
*)state
->end
;
1484 Py_ssize_t status
= 0;
1485 Py_ssize_t prefix_len
= 0;
1486 Py_ssize_t prefix_skip
= 0;
1487 SRE_CODE
* prefix
= NULL
;
1488 SRE_CODE
* charset
= NULL
;
1489 SRE_CODE
* overlap
= NULL
;
1492 if (pattern
[0] == SRE_OP_INFO
) {
1493 /* optimization info block */
1494 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1498 if (pattern
[3] > 1) {
1499 /* adjust end point (but make sure we leave at least one
1500 character in there, so literal search will work) */
1501 end
-= pattern
[3]-1;
1506 if (flags
& SRE_INFO_PREFIX
) {
1507 /* pattern starts with a known prefix */
1508 /* <length> <skip> <prefix data> <overlap data> */
1509 prefix_len
= pattern
[5];
1510 prefix_skip
= pattern
[6];
1511 prefix
= pattern
+ 7;
1512 overlap
= prefix
+ prefix_len
- 1;
1513 } else if (flags
& SRE_INFO_CHARSET
)
1514 /* pattern starts with a character from a known set */
1516 charset
= pattern
+ 5;
1518 pattern
+= 1 + pattern
[1];
1521 TRACE(("prefix = %p %d %d\n", prefix
, prefix_len
, prefix_skip
));
1522 TRACE(("charset = %p\n", charset
));
1524 #if defined(USE_FAST_SEARCH)
1525 if (prefix_len
> 1) {
1526 /* pattern starts with a known prefix. use the overlap
1527 table to skip forward as fast as we possibly can */
1529 end
= (SRE_CHAR
*)state
->end
;
1532 if ((SRE_CODE
) ptr
[0] != prefix
[i
]) {
1538 if (++i
== prefix_len
) {
1539 /* found a potential match */
1540 TRACE(("|%p|%p|SEARCH SCAN\n", pattern
, ptr
));
1541 state
->start
= ptr
+ 1 - prefix_len
;
1542 state
->ptr
= ptr
+ 1 - prefix_len
+ prefix_skip
;
1543 if (flags
& SRE_INFO_LITERAL
)
1544 return 1; /* we got all of it */
1545 status
= SRE_MATCH(state
, pattern
+ 2*prefix_skip
);
1548 /* close but no cigar -- try again */
1560 if (pattern
[0] == SRE_OP_LITERAL
) {
1561 /* pattern starts with a literal character. this is used
1562 for short prefixes, and if fast search is disabled */
1563 SRE_CODE chr
= pattern
[1];
1564 end
= (SRE_CHAR
*)state
->end
;
1566 while (ptr
< end
&& (SRE_CODE
) ptr
[0] != chr
)
1570 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern
, ptr
));
1573 if (flags
& SRE_INFO_LITERAL
)
1574 return 1; /* we got all of it */
1575 status
= SRE_MATCH(state
, pattern
+ 2);
1579 } else if (charset
) {
1580 /* pattern starts with a character from a known set */
1581 end
= (SRE_CHAR
*)state
->end
;
1583 while (ptr
< end
&& !SRE_CHARSET(charset
, ptr
[0]))
1587 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern
, ptr
));
1590 status
= SRE_MATCH(state
, pattern
);
1597 while (ptr
<= end
) {
1598 TRACE(("|%p|%p|SEARCH\n", pattern
, ptr
));
1599 state
->start
= state
->ptr
= ptr
++;
1600 status
= SRE_MATCH(state
, pattern
);
1609 SRE_LITERAL_TEMPLATE(SRE_CHAR
* ptr
, Py_ssize_t len
)
1611 /* check if given string is a literal template (i.e. no escapes) */
1618 #if !defined(SRE_RECURSIVE)
1620 /* -------------------------------------------------------------------- */
1621 /* factories and destructors */
1623 /* see sre.h for object declarations */
1624 static PyObject
*pattern_new_match(PatternObject
*, SRE_STATE
*, int);
1625 static PyObject
*pattern_scanner(PatternObject
*, PyObject
*);
1628 sre_codesize(PyObject
* self
, PyObject
*unused
)
1630 return Py_BuildValue("l", sizeof(SRE_CODE
));
1634 sre_getlower(PyObject
* self
, PyObject
* args
)
1636 int character
, flags
;
1637 if (!PyArg_ParseTuple(args
, "ii", &character
, &flags
))
1639 if (flags
& SRE_FLAG_LOCALE
)
1640 return Py_BuildValue("i", sre_lower_locale(character
));
1641 if (flags
& SRE_FLAG_UNICODE
)
1642 #if defined(HAVE_UNICODE)
1643 return Py_BuildValue("i", sre_lower_unicode(character
));
1645 return Py_BuildValue("i", sre_lower_locale(character
));
1647 return Py_BuildValue("i", sre_lower(character
));
1651 state_reset(SRE_STATE
* state
)
1653 /* FIXME: dynamic! */
1654 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
1656 state
->lastmark
= -1;
1657 state
->lastindex
= -1;
1659 state
->repeat
= NULL
;
1661 data_stack_dealloc(state
);
1665 getstring(PyObject
* string
, Py_ssize_t
* p_length
, int* p_charsize
)
1667 /* given a python object, return a data pointer, a length (in
1668 characters), and a character size. return NULL if the object
1669 is not a string (or not compatible) */
1671 PyBufferProcs
*buffer
;
1672 Py_ssize_t size
, bytes
;
1676 #if defined(HAVE_UNICODE)
1677 if (PyUnicode_Check(string
)) {
1678 /* unicode strings doesn't always support the buffer interface */
1679 ptr
= (void*) PyUnicode_AS_DATA(string
);
1680 bytes
= PyUnicode_GET_DATA_SIZE(string
);
1681 size
= PyUnicode_GET_SIZE(string
);
1682 charsize
= sizeof(Py_UNICODE
);
1687 /* get pointer to string buffer */
1688 buffer
= string
->ob_type
->tp_as_buffer
;
1689 if (!buffer
|| !buffer
->bf_getreadbuffer
|| !buffer
->bf_getsegcount
||
1690 buffer
->bf_getsegcount(string
, NULL
) != 1) {
1691 PyErr_SetString(PyExc_TypeError
, "expected string or buffer");
1695 /* determine buffer size */
1696 bytes
= buffer
->bf_getreadbuffer(string
, 0, &ptr
);
1698 PyErr_SetString(PyExc_TypeError
, "buffer has negative size");
1702 /* determine character size */
1703 #if PY_VERSION_HEX >= 0x01060000
1704 size
= PyObject_Size(string
);
1706 size
= PyObject_Length(string
);
1709 if (PyString_Check(string
) || bytes
== size
)
1711 #if defined(HAVE_UNICODE)
1712 else if (bytes
== (Py_ssize_t
) (size
* sizeof(Py_UNICODE
)))
1713 charsize
= sizeof(Py_UNICODE
);
1716 PyErr_SetString(PyExc_TypeError
, "buffer size mismatch");
1720 #if defined(HAVE_UNICODE)
1725 *p_charsize
= charsize
;
1731 state_init(SRE_STATE
* state
, PatternObject
* pattern
, PyObject
* string
,
1732 Py_ssize_t start
, Py_ssize_t end
)
1734 /* prepare state object */
1740 memset(state
, 0, sizeof(SRE_STATE
));
1742 state
->lastmark
= -1;
1743 state
->lastindex
= -1;
1745 ptr
= getstring(string
, &length
, &charsize
);
1749 /* adjust boundaries */
1752 else if (start
> length
)
1757 else if (end
> length
)
1760 state
->charsize
= charsize
;
1762 state
->beginning
= ptr
;
1764 state
->start
= (void*) ((char*) ptr
+ start
* state
->charsize
);
1765 state
->end
= (void*) ((char*) ptr
+ end
* state
->charsize
);
1768 state
->string
= string
;
1770 state
->endpos
= end
;
1772 if (pattern
->flags
& SRE_FLAG_LOCALE
)
1773 state
->lower
= sre_lower_locale
;
1774 else if (pattern
->flags
& SRE_FLAG_UNICODE
)
1775 #if defined(HAVE_UNICODE)
1776 state
->lower
= sre_lower_unicode
;
1778 state
->lower
= sre_lower_locale
;
1781 state
->lower
= sre_lower
;
1787 state_fini(SRE_STATE
* state
)
1789 Py_XDECREF(state
->string
);
1790 data_stack_dealloc(state
);
1793 /* calculate offset from start of string */
1794 #define STATE_OFFSET(state, member)\
1795 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1798 state_getslice(SRE_STATE
* state
, Py_ssize_t index
, PyObject
* string
, int empty
)
1802 index
= (index
- 1) * 2;
1804 if (string
== Py_None
|| index
>= state
->lastmark
|| !state
->mark
[index
] || !state
->mark
[index
+1]) {
1806 /* want empty string */
1813 i
= STATE_OFFSET(state
, state
->mark
[index
]);
1814 j
= STATE_OFFSET(state
, state
->mark
[index
+1]);
1817 return PySequence_GetSlice(string
, i
, j
);
1821 pattern_error(int status
)
1824 case SRE_ERROR_RECURSION_LIMIT
:
1827 "maximum recursion limit exceeded"
1830 case SRE_ERROR_MEMORY
:
1834 /* other error codes indicate compiler/engine bugs */
1837 "internal error in regular expression engine"
1843 pattern_dealloc(PatternObject
* self
)
1845 if (self
->weakreflist
!= NULL
)
1846 PyObject_ClearWeakRefs((PyObject
*) self
);
1847 Py_XDECREF(self
->pattern
);
1848 Py_XDECREF(self
->groupindex
);
1849 Py_XDECREF(self
->indexgroup
);
1854 pattern_match(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1860 Py_ssize_t start
= 0;
1861 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1862 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1863 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|nn:match", kwlist
,
1864 &string
, &start
, &end
))
1867 string
= state_init(&state
, self
, string
, start
, end
);
1871 state
.ptr
= state
.start
;
1873 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self
), state
.ptr
));
1875 if (state
.charsize
== 1) {
1876 status
= sre_match(&state
, PatternObject_GetCode(self
));
1878 #if defined(HAVE_UNICODE)
1879 status
= sre_umatch(&state
, PatternObject_GetCode(self
));
1883 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1887 return pattern_new_match(self
, &state
, status
);
1891 pattern_search(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
1897 Py_ssize_t start
= 0;
1898 Py_ssize_t end
= PY_SSIZE_T_MAX
;
1899 static char* kwlist
[] = { "pattern", "pos", "endpos", NULL
};
1900 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|nn:search", kwlist
,
1901 &string
, &start
, &end
))
1904 string
= state_init(&state
, self
, string
, start
, end
);
1908 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self
), state
.ptr
));
1910 if (state
.charsize
== 1) {
1911 status
= sre_search(&state
, PatternObject_GetCode(self
));
1913 #if defined(HAVE_UNICODE)
1914 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
1918 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self
), state
.ptr
));
1922 return pattern_new_match(self
, &state
, status
);
1926 call(char* module
, char* function
, PyObject
* args
)
1935 name
= PyString_FromString(module
);
1938 mod
= PyImport_Import(name
);
1942 func
= PyObject_GetAttrString(mod
, function
);
1946 result
= PyObject_CallObject(func
, args
);
1952 #ifdef USE_BUILTIN_COPY
1954 deepcopy(PyObject
** object
, PyObject
* memo
)
1960 PyTuple_Pack(2, *object
, memo
)
1968 return 1; /* success */
1973 join_list(PyObject
* list
, PyObject
* pattern
)
1975 /* join list elements */
1978 #if PY_VERSION_HEX >= 0x01060000
1984 switch (PyList_GET_SIZE(list
)) {
1987 return PySequence_GetSlice(pattern
, 0, 0);
1989 result
= PyList_GET_ITEM(list
, 0);
1995 /* two or more elements: slice out a suitable separator from the
1996 first member, and use that to join the entire list */
1998 joiner
= PySequence_GetSlice(pattern
, 0, 0);
2002 #if PY_VERSION_HEX >= 0x01060000
2003 function
= PyObject_GetAttrString(joiner
, "join");
2008 args
= PyTuple_New(1);
2010 Py_DECREF(function
);
2014 PyTuple_SET_ITEM(args
, 0, list
);
2015 result
= PyObject_CallObject(function
, args
);
2016 Py_DECREF(args
); /* also removes list */
2017 Py_DECREF(function
);
2021 PyTuple_Pack(2, list
, joiner
)
2030 pattern_findall(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2038 Py_ssize_t start
= 0;
2039 Py_ssize_t end
= PY_SSIZE_T_MAX
;
2040 static char* kwlist
[] = { "source", "pos", "endpos", NULL
};
2041 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|nn:findall", kwlist
,
2042 &string
, &start
, &end
))
2045 string
= state_init(&state
, self
, string
, start
, end
);
2049 list
= PyList_New(0);
2055 while (state
.start
<= state
.end
) {
2059 state_reset(&state
);
2061 state
.ptr
= state
.start
;
2063 if (state
.charsize
== 1) {
2064 status
= sre_search(&state
, PatternObject_GetCode(self
));
2066 #if defined(HAVE_UNICODE)
2067 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2074 pattern_error(status
);
2078 /* don't bother to build a match object */
2079 switch (self
->groups
) {
2081 b
= STATE_OFFSET(&state
, state
.start
);
2082 e
= STATE_OFFSET(&state
, state
.ptr
);
2083 item
= PySequence_GetSlice(string
, b
, e
);
2088 item
= state_getslice(&state
, 1, string
, 1);
2093 item
= PyTuple_New(self
->groups
);
2096 for (i
= 0; i
< self
->groups
; i
++) {
2097 PyObject
* o
= state_getslice(&state
, i
+1, string
, 1);
2102 PyTuple_SET_ITEM(item
, i
, o
);
2107 status
= PyList_Append(list
, item
);
2112 if (state
.ptr
== state
.start
)
2113 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2115 state
.start
= state
.ptr
;
2129 #if PY_VERSION_HEX >= 0x02020000
2131 pattern_finditer(PatternObject
* pattern
, PyObject
* args
)
2137 scanner
= pattern_scanner(pattern
, args
);
2141 search
= PyObject_GetAttrString(scanner
, "search");
2146 iterator
= PyCallIter_New(search
, Py_None
);
2154 pattern_split(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2165 Py_ssize_t maxsplit
= 0;
2166 static char* kwlist
[] = { "source", "maxsplit", NULL
};
2167 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "O|n:split", kwlist
,
2168 &string
, &maxsplit
))
2171 string
= state_init(&state
, self
, string
, 0, PY_SSIZE_T_MAX
);
2175 list
= PyList_New(0);
2184 while (!maxsplit
|| n
< maxsplit
) {
2186 state_reset(&state
);
2188 state
.ptr
= state
.start
;
2190 if (state
.charsize
== 1) {
2191 status
= sre_search(&state
, PatternObject_GetCode(self
));
2193 #if defined(HAVE_UNICODE)
2194 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2201 pattern_error(status
);
2205 if (state
.start
== state
.ptr
) {
2206 if (last
== state
.end
)
2208 /* skip one character */
2209 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2213 /* get segment before this match */
2214 item
= PySequence_GetSlice(
2215 string
, STATE_OFFSET(&state
, last
),
2216 STATE_OFFSET(&state
, state
.start
)
2220 status
= PyList_Append(list
, item
);
2225 /* add groups (if any) */
2226 for (i
= 0; i
< self
->groups
; i
++) {
2227 item
= state_getslice(&state
, i
+1, string
, 0);
2230 status
= PyList_Append(list
, item
);
2238 last
= state
.start
= state
.ptr
;
2242 /* get segment following last match (even if empty) */
2243 item
= PySequence_GetSlice(
2244 string
, STATE_OFFSET(&state
, last
), state
.endpos
2248 status
= PyList_Append(list
, item
);
2264 pattern_subx(PatternObject
* self
, PyObject
* ptemplate
, PyObject
* string
,
2265 Py_ssize_t count
, Py_ssize_t subn
)
2278 int filter_is_callable
;
2280 if (PyCallable_Check(ptemplate
)) {
2281 /* sub/subn takes either a function or a template */
2284 filter_is_callable
= 1;
2286 /* if not callable, check if it's a literal string */
2288 ptr
= getstring(ptemplate
, &n
, &bint
);
2292 literal
= sre_literal_template((unsigned char *)ptr
, n
);
2294 #if defined(HAVE_UNICODE)
2295 literal
= sre_uliteral_template((Py_UNICODE
*)ptr
, n
);
2305 filter_is_callable
= 0;
2307 /* not a literal; hand it over to the template compiler */
2309 SRE_PY_MODULE
, "_subx",
2310 PyTuple_Pack(2, self
, ptemplate
)
2314 filter_is_callable
= PyCallable_Check(filter
);
2318 string
= state_init(&state
, self
, string
, 0, PY_SSIZE_T_MAX
);
2324 list
= PyList_New(0);
2333 while (!count
|| n
< count
) {
2335 state_reset(&state
);
2337 state
.ptr
= state
.start
;
2339 if (state
.charsize
== 1) {
2340 status
= sre_search(&state
, PatternObject_GetCode(self
));
2342 #if defined(HAVE_UNICODE)
2343 status
= sre_usearch(&state
, PatternObject_GetCode(self
));
2350 pattern_error(status
);
2354 b
= STATE_OFFSET(&state
, state
.start
);
2355 e
= STATE_OFFSET(&state
, state
.ptr
);
2358 /* get segment before this match */
2359 item
= PySequence_GetSlice(string
, i
, b
);
2362 status
= PyList_Append(list
, item
);
2367 } else if (i
== b
&& i
== e
&& n
> 0)
2368 /* ignore empty match on latest position */
2371 if (filter_is_callable
) {
2372 /* pass match object through filter */
2373 match
= pattern_new_match(self
, &state
, 1);
2376 args
= PyTuple_Pack(1, match
);
2381 item
= PyObject_CallObject(filter
, args
);
2387 /* filter is literal string */
2393 if (item
!= Py_None
) {
2394 status
= PyList_Append(list
, item
);
2405 if (state
.ptr
== state
.start
)
2406 state
.start
= (void*) ((char*) state
.ptr
+ state
.charsize
);
2408 state
.start
= state
.ptr
;
2412 /* get segment following last match */
2413 if (i
< state
.endpos
) {
2414 item
= PySequence_GetSlice(string
, i
, state
.endpos
);
2417 status
= PyList_Append(list
, item
);
2427 /* convert list to single string (also removes list) */
2428 item
= join_list(list
, self
->pattern
);
2434 return Py_BuildValue("Ni", item
, n
);
2447 pattern_sub(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2449 PyObject
* ptemplate
;
2451 Py_ssize_t count
= 0;
2452 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2453 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|n:sub", kwlist
,
2454 &ptemplate
, &string
, &count
))
2457 return pattern_subx(self
, ptemplate
, string
, count
, 0);
2461 pattern_subn(PatternObject
* self
, PyObject
* args
, PyObject
* kw
)
2463 PyObject
* ptemplate
;
2465 Py_ssize_t count
= 0;
2466 static char* kwlist
[] = { "repl", "string", "count", NULL
};
2467 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "OO|n:subn", kwlist
,
2468 &ptemplate
, &string
, &count
))
2471 return pattern_subx(self
, ptemplate
, string
, count
, 1);
2475 pattern_copy(PatternObject
* self
, PyObject
*unused
)
2477 #ifdef USE_BUILTIN_COPY
2478 PatternObject
* copy
;
2481 copy
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, self
->codesize
);
2485 offset
= offsetof(PatternObject
, groups
);
2487 Py_XINCREF(self
->groupindex
);
2488 Py_XINCREF(self
->indexgroup
);
2489 Py_XINCREF(self
->pattern
);
2491 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
2492 sizeof(PatternObject
) + self
->codesize
* sizeof(SRE_CODE
) - offset
);
2493 copy
->weakreflist
= NULL
;
2495 return (PyObject
*) copy
;
2497 PyErr_SetString(PyExc_TypeError
, "cannot copy this pattern object");
2503 pattern_deepcopy(PatternObject
* self
, PyObject
* memo
)
2505 #ifdef USE_BUILTIN_COPY
2506 PatternObject
* copy
;
2508 copy
= (PatternObject
*) pattern_copy(self
);
2512 if (!deepcopy(©
->groupindex
, memo
) ||
2513 !deepcopy(©
->indexgroup
, memo
) ||
2514 !deepcopy(©
->pattern
, memo
)) {
2520 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this pattern object");
2525 PyDoc_STRVAR(pattern_match_doc
,
2526 "match(string[, pos[, endpos]]) --> match object or None.\n\
2527 Matches zero or more characters at the beginning of the string");
2529 PyDoc_STRVAR(pattern_search_doc
,
2530 "search(string[, pos[, endpos]]) --> match object or None.\n\
2531 Scan through string looking for a match, and return a corresponding\n\
2532 MatchObject instance. Return None if no position in the string matches.");
2534 PyDoc_STRVAR(pattern_split_doc
,
2535 "split(string[, maxsplit = 0]) --> list.\n\
2536 Split string by the occurrences of pattern.");
2538 PyDoc_STRVAR(pattern_findall_doc
,
2539 "findall(string[, pos[, endpos]]) --> list.\n\
2540 Return a list of all non-overlapping matches of pattern in string.");
2542 PyDoc_STRVAR(pattern_finditer_doc
,
2543 "finditer(string[, pos[, endpos]]) --> iterator.\n\
2544 Return an iterator over all non-overlapping matches for the \n\
2545 RE pattern in string. For each match, the iterator returns a\n\
2548 PyDoc_STRVAR(pattern_sub_doc
,
2549 "sub(repl, string[, count = 0]) --> newstring\n\
2550 Return the string obtained by replacing the leftmost non-overlapping\n\
2551 occurrences of pattern in string by the replacement repl.");
2553 PyDoc_STRVAR(pattern_subn_doc
,
2554 "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2555 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2556 the leftmost non-overlapping occurrences of pattern with the\n\
2557 replacement repl.");
2559 PyDoc_STRVAR(pattern_doc
, "Compiled regular expression objects");
2561 static PyMethodDef pattern_methods
[] = {
2562 {"match", (PyCFunction
) pattern_match
, METH_VARARGS
|METH_KEYWORDS
,
2564 {"search", (PyCFunction
) pattern_search
, METH_VARARGS
|METH_KEYWORDS
,
2565 pattern_search_doc
},
2566 {"sub", (PyCFunction
) pattern_sub
, METH_VARARGS
|METH_KEYWORDS
,
2568 {"subn", (PyCFunction
) pattern_subn
, METH_VARARGS
|METH_KEYWORDS
,
2570 {"split", (PyCFunction
) pattern_split
, METH_VARARGS
|METH_KEYWORDS
,
2572 {"findall", (PyCFunction
) pattern_findall
, METH_VARARGS
|METH_KEYWORDS
,
2573 pattern_findall_doc
},
2574 #if PY_VERSION_HEX >= 0x02020000
2575 {"finditer", (PyCFunction
) pattern_finditer
, METH_VARARGS
,
2576 pattern_finditer_doc
},
2578 {"scanner", (PyCFunction
) pattern_scanner
, METH_VARARGS
},
2579 {"__copy__", (PyCFunction
) pattern_copy
, METH_NOARGS
},
2580 {"__deepcopy__", (PyCFunction
) pattern_deepcopy
, METH_O
},
2585 pattern_getattr(PatternObject
* self
, char* name
)
2589 res
= Py_FindMethod(pattern_methods
, (PyObject
*) self
, name
);
2597 if (!strcmp(name
, "pattern")) {
2598 Py_INCREF(self
->pattern
);
2599 return self
->pattern
;
2602 if (!strcmp(name
, "flags"))
2603 return Py_BuildValue("i", self
->flags
);
2605 if (!strcmp(name
, "groups"))
2606 return Py_BuildValue("i", self
->groups
);
2608 if (!strcmp(name
, "groupindex") && self
->groupindex
) {
2609 Py_INCREF(self
->groupindex
);
2610 return self
->groupindex
;
2613 PyErr_SetString(PyExc_AttributeError
, name
);
2617 statichere PyTypeObject Pattern_Type
= {
2618 PyObject_HEAD_INIT(NULL
)
2619 0, "_" SRE_MODULE
".SRE_Pattern",
2620 sizeof(PatternObject
), sizeof(SRE_CODE
),
2621 (destructor
)pattern_dealloc
, /*tp_dealloc*/
2623 (getattrfunc
)pattern_getattr
, /*tp_getattr*/
2627 0, /* tp_as_number */
2628 0, /* tp_as_sequence */
2629 0, /* tp_as_mapping */
2633 0, /* tp_getattro */
2634 0, /* tp_setattro */
2635 0, /* tp_as_buffer */
2636 Py_TPFLAGS_HAVE_WEAKREFS
, /* tp_flags */
2637 pattern_doc
, /* tp_doc */
2638 0, /* tp_traverse */
2640 0, /* tp_richcompare */
2641 offsetof(PatternObject
, weakreflist
), /* tp_weaklistoffset */
2645 _compile(PyObject
* self_
, PyObject
* args
)
2647 /* "compile" pattern descriptor to pattern object */
2649 PatternObject
* self
;
2655 Py_ssize_t groups
= 0;
2656 PyObject
* groupindex
= NULL
;
2657 PyObject
* indexgroup
= NULL
;
2658 if (!PyArg_ParseTuple(args
, "OiO!|nOO", &pattern
, &flags
,
2659 &PyList_Type
, &code
, &groups
,
2660 &groupindex
, &indexgroup
))
2663 n
= PyList_GET_SIZE(code
);
2665 self
= PyObject_NEW_VAR(PatternObject
, &Pattern_Type
, n
);
2671 for (i
= 0; i
< n
; i
++) {
2672 PyObject
*o
= PyList_GET_ITEM(code
, i
);
2673 unsigned long value
= PyInt_Check(o
) ? (unsigned long)PyInt_AsLong(o
)
2674 : PyLong_AsUnsignedLong(o
);
2675 self
->code
[i
] = (SRE_CODE
) value
;
2676 if ((unsigned long) self
->code
[i
] != value
) {
2677 PyErr_SetString(PyExc_OverflowError
,
2678 "regular expression code size limit exceeded");
2683 if (PyErr_Occurred()) {
2689 self
->pattern
= pattern
;
2691 self
->flags
= flags
;
2693 self
->groups
= groups
;
2695 Py_XINCREF(groupindex
);
2696 self
->groupindex
= groupindex
;
2698 Py_XINCREF(indexgroup
);
2699 self
->indexgroup
= indexgroup
;
2701 self
->weakreflist
= NULL
;
2703 return (PyObject
*) self
;
2706 /* -------------------------------------------------------------------- */
2710 match_dealloc(MatchObject
* self
)
2712 Py_XDECREF(self
->regs
);
2713 Py_XDECREF(self
->string
);
2714 Py_DECREF(self
->pattern
);
2719 match_getslice_by_index(MatchObject
* self
, Py_ssize_t index
, PyObject
* def
)
2721 if (index
< 0 || index
>= self
->groups
) {
2722 /* raise IndexError if we were given a bad group number */
2732 if (self
->string
== Py_None
|| self
->mark
[index
] < 0) {
2733 /* return default value if the string or group is undefined */
2738 return PySequence_GetSlice(
2739 self
->string
, self
->mark
[index
], self
->mark
[index
+1]
2744 match_getindex(MatchObject
* self
, PyObject
* index
)
2748 if (PyInt_Check(index
))
2749 return PyInt_AsSsize_t(index
);
2753 if (self
->pattern
->groupindex
) {
2754 index
= PyObject_GetItem(self
->pattern
->groupindex
, index
);
2756 if (PyInt_Check(index
) || PyLong_Check(index
))
2757 i
= PyInt_AsSsize_t(index
);
2767 match_getslice(MatchObject
* self
, PyObject
* index
, PyObject
* def
)
2769 return match_getslice_by_index(self
, match_getindex(self
, index
), def
);
2773 match_expand(MatchObject
* self
, PyObject
* ptemplate
)
2775 /* delegate to Python code */
2777 SRE_PY_MODULE
, "_expand",
2778 PyTuple_Pack(3, self
->pattern
, self
, ptemplate
)
2783 match_group(MatchObject
* self
, PyObject
* args
)
2788 size
= PyTuple_GET_SIZE(args
);
2792 result
= match_getslice(self
, Py_False
, Py_None
);
2795 result
= match_getslice(self
, PyTuple_GET_ITEM(args
, 0), Py_None
);
2798 /* fetch multiple items */
2799 result
= PyTuple_New(size
);
2802 for (i
= 0; i
< size
; i
++) {
2803 PyObject
* item
= match_getslice(
2804 self
, PyTuple_GET_ITEM(args
, i
), Py_None
2810 PyTuple_SET_ITEM(result
, i
, item
);
2818 match_groups(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2823 PyObject
* def
= Py_None
;
2824 static char* kwlist
[] = { "default", NULL
};
2825 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groups", kwlist
, &def
))
2828 result
= PyTuple_New(self
->groups
-1);
2832 for (index
= 1; index
< self
->groups
; index
++) {
2834 item
= match_getslice_by_index(self
, index
, def
);
2839 PyTuple_SET_ITEM(result
, index
-1, item
);
2846 match_groupdict(MatchObject
* self
, PyObject
* args
, PyObject
* kw
)
2852 PyObject
* def
= Py_None
;
2853 static char* kwlist
[] = { "default", NULL
};
2854 if (!PyArg_ParseTupleAndKeywords(args
, kw
, "|O:groupdict", kwlist
, &def
))
2857 result
= PyDict_New();
2858 if (!result
|| !self
->pattern
->groupindex
)
2861 keys
= PyMapping_Keys(self
->pattern
->groupindex
);
2865 for (index
= 0; index
< PyList_GET_SIZE(keys
); index
++) {
2869 key
= PyList_GET_ITEM(keys
, index
);
2872 value
= match_getslice(self
, key
, def
);
2877 status
= PyDict_SetItem(result
, key
, value
);
2894 match_start(MatchObject
* self
, PyObject
* args
)
2898 PyObject
* index_
= Py_False
; /* zero */
2899 if (!PyArg_UnpackTuple(args
, "start", 0, 1, &index_
))
2902 index
= match_getindex(self
, index_
);
2904 if (index
< 0 || index
>= self
->groups
) {
2912 /* mark is -1 if group is undefined */
2913 return Py_BuildValue("i", self
->mark
[index
*2]);
2917 match_end(MatchObject
* self
, PyObject
* args
)
2921 PyObject
* index_
= Py_False
; /* zero */
2922 if (!PyArg_UnpackTuple(args
, "end", 0, 1, &index_
))
2925 index
= match_getindex(self
, index_
);
2927 if (index
< 0 || index
>= self
->groups
) {
2935 /* mark is -1 if group is undefined */
2936 return Py_BuildValue("i", self
->mark
[index
*2+1]);
2940 _pair(Py_ssize_t i1
, Py_ssize_t i2
)
2945 pair
= PyTuple_New(2);
2949 item
= PyInt_FromSsize_t(i1
);
2952 PyTuple_SET_ITEM(pair
, 0, item
);
2954 item
= PyInt_FromSsize_t(i2
);
2957 PyTuple_SET_ITEM(pair
, 1, item
);
2967 match_span(MatchObject
* self
, PyObject
* args
)
2971 PyObject
* index_
= Py_False
; /* zero */
2972 if (!PyArg_UnpackTuple(args
, "span", 0, 1, &index_
))
2975 index
= match_getindex(self
, index_
);
2977 if (index
< 0 || index
>= self
->groups
) {
2985 /* marks are -1 if group is undefined */
2986 return _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
2990 match_regs(MatchObject
* self
)
2996 regs
= PyTuple_New(self
->groups
);
3000 for (index
= 0; index
< self
->groups
; index
++) {
3001 item
= _pair(self
->mark
[index
*2], self
->mark
[index
*2+1]);
3006 PyTuple_SET_ITEM(regs
, index
, item
);
3016 match_copy(MatchObject
* self
, PyObject
*unused
)
3018 #ifdef USE_BUILTIN_COPY
3020 Py_ssize_t slots
, offset
;
3022 slots
= 2 * (self
->pattern
->groups
+1);
3024 copy
= PyObject_NEW_VAR(MatchObject
, &Match_Type
, slots
);
3028 /* this value a constant, but any compiler should be able to
3029 figure that out all by itself */
3030 offset
= offsetof(MatchObject
, string
);
3032 Py_XINCREF(self
->pattern
);
3033 Py_XINCREF(self
->string
);
3034 Py_XINCREF(self
->regs
);
3036 memcpy((char*) copy
+ offset
, (char*) self
+ offset
,
3037 sizeof(MatchObject
) + slots
* sizeof(Py_ssize_t
) - offset
);
3039 return (PyObject
*) copy
;
3041 PyErr_SetString(PyExc_TypeError
, "cannot copy this match object");
3047 match_deepcopy(MatchObject
* self
, PyObject
* memo
)
3049 #ifdef USE_BUILTIN_COPY
3052 copy
= (MatchObject
*) match_copy(self
);
3056 if (!deepcopy((PyObject
**) ©
->pattern
, memo
) ||
3057 !deepcopy(©
->string
, memo
) ||
3058 !deepcopy(©
->regs
, memo
)) {
3064 PyErr_SetString(PyExc_TypeError
, "cannot deepcopy this match object");
3069 static PyMethodDef match_methods
[] = {
3070 {"group", (PyCFunction
) match_group
, METH_VARARGS
},
3071 {"start", (PyCFunction
) match_start
, METH_VARARGS
},
3072 {"end", (PyCFunction
) match_end
, METH_VARARGS
},
3073 {"span", (PyCFunction
) match_span
, METH_VARARGS
},
3074 {"groups", (PyCFunction
) match_groups
, METH_VARARGS
|METH_KEYWORDS
},
3075 {"groupdict", (PyCFunction
) match_groupdict
, METH_VARARGS
|METH_KEYWORDS
},
3076 {"expand", (PyCFunction
) match_expand
, METH_O
},
3077 {"__copy__", (PyCFunction
) match_copy
, METH_NOARGS
},
3078 {"__deepcopy__", (PyCFunction
) match_deepcopy
, METH_O
},
3083 match_getattr(MatchObject
* self
, char* name
)
3087 res
= Py_FindMethod(match_methods
, (PyObject
*) self
, name
);
3093 if (!strcmp(name
, "lastindex")) {
3094 if (self
->lastindex
>= 0)
3095 return Py_BuildValue("i", self
->lastindex
);
3100 if (!strcmp(name
, "lastgroup")) {
3101 if (self
->pattern
->indexgroup
&& self
->lastindex
>= 0) {
3102 PyObject
* result
= PySequence_GetItem(
3103 self
->pattern
->indexgroup
, self
->lastindex
3113 if (!strcmp(name
, "string")) {
3115 Py_INCREF(self
->string
);
3116 return self
->string
;
3123 if (!strcmp(name
, "regs")) {
3125 Py_INCREF(self
->regs
);
3128 return match_regs(self
);
3131 if (!strcmp(name
, "re")) {
3132 Py_INCREF(self
->pattern
);
3133 return (PyObject
*) self
->pattern
;
3136 if (!strcmp(name
, "pos"))
3137 return Py_BuildValue("i", self
->pos
);
3139 if (!strcmp(name
, "endpos"))
3140 return Py_BuildValue("i", self
->endpos
);
3142 PyErr_SetString(PyExc_AttributeError
, name
);
3146 /* FIXME: implement setattr("string", None) as a special case (to
3147 detach the associated string, if any */
3149 statichere PyTypeObject Match_Type
= {
3150 PyObject_HEAD_INIT(NULL
)
3151 0, "_" SRE_MODULE
".SRE_Match",
3152 sizeof(MatchObject
), sizeof(Py_ssize_t
),
3153 (destructor
)match_dealloc
, /*tp_dealloc*/
3155 (getattrfunc
)match_getattr
/*tp_getattr*/
3159 pattern_new_match(PatternObject
* pattern
, SRE_STATE
* state
, int status
)
3161 /* create match object (from state object) */
3170 /* create match object (with room for extra group marks) */
3171 match
= PyObject_NEW_VAR(MatchObject
, &Match_Type
,
3172 2*(pattern
->groups
+1));
3177 match
->pattern
= pattern
;
3179 Py_INCREF(state
->string
);
3180 match
->string
= state
->string
;
3183 match
->groups
= pattern
->groups
+1;
3185 /* fill in group slices */
3187 base
= (char*) state
->beginning
;
3188 n
= state
->charsize
;
3190 match
->mark
[0] = ((char*) state
->start
- base
) / n
;
3191 match
->mark
[1] = ((char*) state
->ptr
- base
) / n
;
3193 for (i
= j
= 0; i
< pattern
->groups
; i
++, j
+=2)
3194 if (j
+1 <= state
->lastmark
&& state
->mark
[j
] && state
->mark
[j
+1]) {
3195 match
->mark
[j
+2] = ((char*) state
->mark
[j
] - base
) / n
;
3196 match
->mark
[j
+3] = ((char*) state
->mark
[j
+1] - base
) / n
;
3198 match
->mark
[j
+2] = match
->mark
[j
+3] = -1; /* undefined */
3200 match
->pos
= state
->pos
;
3201 match
->endpos
= state
->endpos
;
3203 match
->lastindex
= state
->lastindex
;
3205 return (PyObject
*) match
;
3207 } else if (status
== 0) {
3215 /* internal error */
3216 pattern_error(status
);
3221 /* -------------------------------------------------------------------- */
3222 /* scanner methods (experimental) */
3225 scanner_dealloc(ScannerObject
* self
)
3227 state_fini(&self
->state
);
3228 Py_DECREF(self
->pattern
);
3233 scanner_match(ScannerObject
* self
, PyObject
*unused
)
3235 SRE_STATE
* state
= &self
->state
;
3241 state
->ptr
= state
->start
;
3243 if (state
->charsize
== 1) {
3244 status
= sre_match(state
, PatternObject_GetCode(self
->pattern
));
3246 #if defined(HAVE_UNICODE)
3247 status
= sre_umatch(state
, PatternObject_GetCode(self
->pattern
));
3251 match
= pattern_new_match((PatternObject
*) self
->pattern
,
3254 if (status
== 0 || state
->ptr
== state
->start
)
3255 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
3257 state
->start
= state
->ptr
;
3264 scanner_search(ScannerObject
* self
, PyObject
*unused
)
3266 SRE_STATE
* state
= &self
->state
;
3272 state
->ptr
= state
->start
;
3274 if (state
->charsize
== 1) {
3275 status
= sre_search(state
, PatternObject_GetCode(self
->pattern
));
3277 #if defined(HAVE_UNICODE)
3278 status
= sre_usearch(state
, PatternObject_GetCode(self
->pattern
));
3282 match
= pattern_new_match((PatternObject
*) self
->pattern
,
3285 if (status
== 0 || state
->ptr
== state
->start
)
3286 state
->start
= (void*) ((char*) state
->ptr
+ state
->charsize
);
3288 state
->start
= state
->ptr
;
3293 static PyMethodDef scanner_methods
[] = {
3294 {"match", (PyCFunction
) scanner_match
, METH_NOARGS
},
3295 {"search", (PyCFunction
) scanner_search
, METH_NOARGS
},
3300 scanner_getattr(ScannerObject
* self
, char* name
)
3304 res
= Py_FindMethod(scanner_methods
, (PyObject
*) self
, name
);
3311 if (!strcmp(name
, "pattern")) {
3312 Py_INCREF(self
->pattern
);
3313 return self
->pattern
;
3316 PyErr_SetString(PyExc_AttributeError
, name
);
3320 statichere PyTypeObject Scanner_Type
= {
3321 PyObject_HEAD_INIT(NULL
)
3322 0, "_" SRE_MODULE
".SRE_Scanner",
3323 sizeof(ScannerObject
), 0,
3324 (destructor
)scanner_dealloc
, /*tp_dealloc*/
3326 (getattrfunc
)scanner_getattr
, /*tp_getattr*/
3330 pattern_scanner(PatternObject
* pattern
, PyObject
* args
)
3332 /* create search state object */
3334 ScannerObject
* self
;
3337 Py_ssize_t start
= 0;
3338 Py_ssize_t end
= PY_SSIZE_T_MAX
;
3339 if (!PyArg_ParseTuple(args
, "O|nn:scanner", &string
, &start
, &end
))
3342 /* create scanner object */
3343 self
= PyObject_NEW(ScannerObject
, &Scanner_Type
);
3347 string
= state_init(&self
->state
, pattern
, string
, start
, end
);
3354 self
->pattern
= (PyObject
*) pattern
;
3356 return (PyObject
*) self
;
3359 static PyMethodDef _functions
[] = {
3360 {"compile", _compile
, METH_VARARGS
},
3361 {"getcodesize", sre_codesize
, METH_NOARGS
},
3362 {"getlower", sre_getlower
, METH_VARARGS
},
3366 #if PY_VERSION_HEX < 0x02030000
3367 DL_EXPORT(void) init_sre(void)
3369 PyMODINIT_FUNC
init_sre(void)
3376 /* Patch object types */
3377 Pattern_Type
.ob_type
= Match_Type
.ob_type
=
3378 Scanner_Type
.ob_type
= &PyType_Type
;
3380 m
= Py_InitModule("_" SRE_MODULE
, _functions
);
3383 d
= PyModule_GetDict(m
);
3385 x
= PyInt_FromLong(SRE_MAGIC
);
3387 PyDict_SetItemString(d
, "MAGIC", x
);
3391 x
= PyInt_FromLong(sizeof(SRE_CODE
));
3393 PyDict_SetItemString(d
, "CODESIZE", x
);
3397 x
= PyString_FromString(copyright
);
3399 PyDict_SetItemString(d
, "copyright", x
);
3404 #endif /* !defined(SRE_RECURSIVE) */