Refactoring in order to cache more operation.
[xy_vsfilter.git] / include / atl / atlrx.h
blob1f2b07a1f0923e6b7bc7c0c5bae536a6ce30536e
1 // This is a part of the Active Template Library.
2 // Copyright (C) Microsoft Corporation
3 // All rights reserved.
4 //
5 // This source code is only intended as a supplement to the
6 // Active Template Library Reference and related
7 // electronic documentation provided with the library.
8 // See these sources for detailed information regarding the
9 // Active Template Library product.
11 #ifndef __ATLRX_H__
12 #define __ATLRX_H__
14 #pragma once
16 #include <atlbase.h>
17 #include <atlcoll.h>
18 #include <mbstring.h>
20 #ifndef ATL_REGEXP_MIN_STACK
21 #define ATL_REGEXP_MIN_STACK 256
22 #endif
24 /*
25 Regular Expression Grammar
27 R - top level grammar rule
28 RE - regular expression
29 AltE - Alternative expression
30 E - expression
31 SE - simple expression
33 R -> RE
34 '^'RE (matches begining of string)
36 RE -> AltE RE
37 AltE
40 AltE -> E
41 E '|' AltE
42 E -> SE (RepeatOp '?'?)?
43 SE -> Arg
44 Group
45 CharClass
46 '\'Abbrev (see below)
47 '\'EscapedChar (any character including reserved symbols)
48 '\'Digit+ (Arg back reference)
49 '!' (not)
50 '.' (any char)
51 '$' (end of input)
52 Symbol (any non-reserved character)
53 Arg -> '{'RE'}'
54 Group -> '('RE')'
55 CharClass -> '[' '^'? CharSet ']'
56 CharSet -> CharItem+
57 CharItem -> Char('-'Char)?
58 RepeatOp -> '*'
59 '+'
60 '?'
61 Abbrev -> Abbreviation defined in CAtlRECharTraits
62 Abbrev Expansion Meaning
63 a ([a-zA-Z0-9]) alpha numeric
64 b ([ \\t]) white space (blank)
65 c ([a-zA-Z]) alpha
66 d ([0-9]) digit
67 h ([0-9a-fA-F]) hex digit
68 n (\r|(\r?\n)) newline
69 q (\"[^\"]*\")|(\'[^\']*\') quoted string
70 w ([a-zA-Z]+) simple word
71 z ([0-9]+) integer
74 #pragma pack(push,_ATL_PACKING)
75 namespace ATL {
77 //Convertion utility classes used to convert char* to RECHAR.
78 //Used by rx debugging printing.
79 template <typename RECHARTYPE=char>
80 class CAToREChar
82 public:
83 CAToREChar(const char* psz) throw()
84 : m_psz(psz)
87 operator const RECHARTYPE*() const throw() { return m_psz; }
88 const char* m_psz;
91 template<>
92 class CAToREChar<wchar_t>
94 public:
95 CAToREChar(const char* psz) throw()
96 : m_a2w(psz)
99 operator const wchar_t*() const throw() { return (wchar_t*)m_a2w; }
101 private:
102 CA2W m_a2w;
105 class CAtlRECharTraitsA
107 public:
108 typedef char RECHARTYPE;
110 static size_t GetBitFieldForRangeArrayIndex(const RECHARTYPE *sz) throw()
112 #ifndef ATL_NO_CHECK_BIT_FIELD
113 ATLASSERT(UseBitFieldForRange());
114 #endif
115 return static_cast<size_t>(static_cast<unsigned char>(*sz));
117 static RECHARTYPE *Next(const RECHARTYPE *sz) throw()
119 return (RECHARTYPE *) (sz+1);
122 static int Strncmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw()
124 return strncmp(szLeft, szRight, nCount);
127 static int Strnicmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw()
129 return _strnicmp(szLeft, szRight, nCount);
132 _ATL_INSECURE_DEPRECATE("CAtlRECharTraitsA::Strlwr must be passed a buffer size.")
133 static RECHARTYPE *Strlwr(RECHARTYPE *sz) throw()
135 #pragma warning (push)
136 #pragma warning(disable : 4996)
137 return _strlwr(sz);
138 #pragma warning (pop)
141 static RECHARTYPE *Strlwr(RECHARTYPE *sz, int nSize) throw()
143 Checked::strlwr_s(sz, nSize);
144 return sz;
147 static long Strtol(const RECHARTYPE *sz, RECHARTYPE **szEnd, int nBase) throw()
149 return strtol(sz, szEnd, nBase);
152 static int Isdigit(RECHARTYPE ch) throw()
154 return isdigit(static_cast<unsigned char>(ch));
157 static const RECHARTYPE** GetAbbrevs()
159 static const RECHARTYPE *s_szAbbrevs[] =
161 "a([a-zA-Z0-9])", // alpha numeric
162 "b([ \\t])", // white space (blank)
163 "c([a-zA-Z])", // alpha
164 "d([0-9])", // digit
165 "h([0-9a-fA-F])", // hex digit
166 "n(\r|(\r?\n))", // newline
167 "q(\"[^\"]*\")|(\'[^\']*\')", // quoted string
168 "w([a-zA-Z]+)", // simple word
169 "z([0-9]+)", // integer
170 NULL
173 return s_szAbbrevs;
176 static BOOL UseBitFieldForRange() throw()
178 return TRUE;
181 static int ByteLen(const RECHARTYPE *sz) throw()
183 return int(strlen(sz));
187 class CAtlRECharTraitsW
189 public:
190 typedef WCHAR RECHARTYPE;
192 static size_t GetBitFieldForRangeArrayIndex(const RECHARTYPE *sz) throw()
194 #ifndef ATL_NO_CHECK_BIT_FIELD
195 ATLASSERT(UseBitFieldForRange());
196 #endif
197 return static_cast<size_t>(*sz);
199 static RECHARTYPE *Next(const RECHARTYPE *sz) throw()
201 return (RECHARTYPE *) (sz+1);
204 static int Strncmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw()
206 return wcsncmp(szLeft, szRight, nCount);
209 static int Strnicmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw()
211 return _wcsnicmp(szLeft, szRight, nCount);
214 _ATL_INSECURE_DEPRECATE("CAtlRECharTraitsW::Strlwr must be passed a buffer size.")
215 static RECHARTYPE *Strlwr(RECHARTYPE *sz) throw()
217 #pragma warning (push)
218 #pragma warning(disable : 4996)
219 return _wcslwr(sz);
220 #pragma warning (pop)
223 static RECHARTYPE *Strlwr(RECHARTYPE *sz, int nSize) throw()
225 Checked::wcslwr_s(sz, nSize);
226 return sz;
229 static long Strtol(const RECHARTYPE *sz, RECHARTYPE **szEnd, int nBase) throw()
231 return wcstol(sz, szEnd, nBase);
234 static int Isdigit(RECHARTYPE ch) throw()
236 return iswdigit(ch);
239 static const RECHARTYPE** GetAbbrevs()
241 static const RECHARTYPE *s_szAbbrevs[] =
243 L"a([a-zA-Z0-9])", // alpha numeric
244 L"b([ \\t])", // white space (blank)
245 L"c([a-zA-Z])", // alpha
246 L"d([0-9])", // digit
247 L"h([0-9a-fA-F])", // hex digit
248 L"n(\r|(\r?\n))", // newline
249 L"q(\"[^\"]*\")|(\'[^\']*\')", // quoted string
250 L"w([a-zA-Z]+)", // simple word
251 L"z([0-9]+)", // integer
252 NULL
255 return s_szAbbrevs;
258 static BOOL UseBitFieldForRange() throw()
260 return FALSE;
263 static int ByteLen(const RECHARTYPE *sz) throw()
265 return int(wcslen(sz)*sizeof(WCHAR));
269 class CAtlRECharTraitsMB
271 public:
272 typedef unsigned char RECHARTYPE;
274 static size_t GetBitFieldForRangeArrayIndex(const RECHARTYPE *sz) throw()
276 #ifndef ATL_NO_CHECK_BIT_FIELD
277 ATLASSERT(UseBitFieldForRange());
278 #endif
280 return static_cast<size_t>(*sz);
283 static RECHARTYPE *Next(const RECHARTYPE *sz) throw()
285 return _mbsinc(sz);
288 static int Strncmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw()
290 return _mbsncmp(szLeft, szRight, nCount);
293 static int Strnicmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw()
295 return _mbsnicmp(szLeft, szRight, nCount);
298 _ATL_INSECURE_DEPRECATE("CAtlRECharTraitsMB::Strlwr must be passed a buffer size.")
299 static RECHARTYPE *Strlwr(RECHARTYPE *sz) throw()
301 #pragma warning (push)
302 #pragma warning(disable : 4996)
303 return _mbslwr(sz);
304 #pragma warning (pop)
307 static RECHARTYPE *Strlwr(RECHARTYPE *sz, int nSize) throw()
309 Checked::mbslwr_s(sz, nSize);
310 return sz;
313 static long Strtol(const RECHARTYPE *sz, RECHARTYPE **szEnd, int nBase) throw()
315 return strtol((const char *) sz, (char **) szEnd, nBase);
318 static int Isdigit(RECHARTYPE ch) throw()
320 return _ismbcdigit((unsigned int) ch);
323 static const RECHARTYPE** GetAbbrevs()
325 return reinterpret_cast<const RECHARTYPE **>(CAtlRECharTraitsA::GetAbbrevs());
328 static BOOL UseBitFieldForRange() throw()
330 return FALSE;
333 static int ByteLen(const RECHARTYPE *sz) throw()
335 return (int)strlen((const char *) sz);
339 #ifndef _UNICODE
340 typedef CAtlRECharTraitsA CAtlRECharTraits;
341 #else // _UNICODE
342 typedef CAtlRECharTraitsW CAtlRECharTraits;
343 #endif // !_UNICODE
344 // Note: If you want to use CAtlRECharTraitsMB you must pass it in
345 // as a template argument
347 template <class CharTraits=CAtlRECharTraits>
348 class CAtlRegExp; // forward declaration
350 template <class CharTraits=CAtlRECharTraits>
351 class CAtlREMatchContext
353 public:
354 friend CAtlRegExp<CharTraits>;
355 typedef typename CharTraits::RECHARTYPE RECHAR;
357 struct MatchGroup
359 const RECHAR *szStart;
360 const RECHAR *szEnd;
363 UINT m_uNumGroups;
365 MatchGroup m_Match;
367 void GetMatch(UINT nIndex, const RECHAR **szStart, const RECHAR **szEnd)
369 ATLENSURE(szStart != NULL);
370 ATLENSURE(szEnd != NULL);
371 ATLENSURE(nIndex >=0 && nIndex < m_uNumGroups);
372 *szStart = m_Matches[nIndex].szStart;
373 *szEnd = m_Matches[nIndex].szEnd;
376 void GetMatch(UINT nIndex, MatchGroup *pGroup)
379 ATLENSURE(pGroup != NULL);
380 ATLENSURE(nIndex >=0&&(static_cast<UINT>(nIndex))< m_uNumGroups);
381 pGroup->szStart = m_Matches[nIndex].szStart;
382 pGroup->szEnd = m_Matches[nIndex].szEnd;
385 protected:
386 CAutoVectorPtr<void *> m_Mem;
387 CAutoVectorPtr<MatchGroup> m_Matches;
388 CAtlArray<void *> m_stack;
389 size_t m_nTos;
391 public:
392 CAtlREMatchContext(size_t nInitStackSize=ATL_REGEXP_MIN_STACK)
394 m_uNumGroups = 0;
395 m_nTos = 0;
396 m_stack.SetCount(nInitStackSize);
397 m_Match.szStart = NULL;
398 m_Match.szEnd = NULL;
401 protected:
402 BOOL Initialize(UINT uRequiredMem, UINT uNumGroups) throw()
404 m_nTos = 0;
406 m_uNumGroups = 0;
407 m_Matches.Free();
409 if (!m_Matches.Allocate(uNumGroups))
410 return FALSE;
412 m_uNumGroups = uNumGroups;
414 m_Mem.Free();
416 if (!m_Mem.Allocate(uRequiredMem))
417 return FALSE;
419 memset(m_Mem.m_p, 0x00, uRequiredMem*sizeof(void *));
421 memset(m_Matches, 0x00, m_uNumGroups * sizeof(MatchGroup));
422 return TRUE;
425 BOOL Push(void *p)
427 m_nTos++;
428 if (m_stack.GetCount() <= (UINT) m_nTos)
430 if (!m_stack.SetCount((m_nTos+1)*2))
432 m_nTos--;
433 return FALSE;
436 m_stack[m_nTos] = p;
437 return TRUE;
440 BOOL Push(size_t n)
442 return Push((void *) n);
445 void *Pop() throw()
447 if (m_nTos==0)
449 // stack underflow
450 // this should never happen at match time.
451 // (the parsing succeeded when it shouldn't have)
452 ATLASSERT(FALSE);
453 return NULL;
455 void *p = m_stack[m_nTos];
456 m_nTos--;
457 return p;
461 enum REParseError {
462 REPARSE_ERROR_OK = 0, // No error occurred
463 REPARSE_ERROR_OUTOFMEMORY, // Out of memory
464 REPARSE_ERROR_BRACE_EXPECTED, // A closing brace was expected
465 REPARSE_ERROR_PAREN_EXPECTED, // A closing parenthesis was expected
466 REPARSE_ERROR_BRACKET_EXPECTED, // A closing bracket was expected
467 REPARSE_ERROR_UNEXPECTED, // An unspecified fatal error occurred
468 REPARSE_ERROR_EMPTY_RANGE, // A range expression was empty
469 REPARSE_ERROR_INVALID_GROUP, // A backreference was made to a group
470 // that did not exist
471 REPARSE_ERROR_INVALID_RANGE, // An invalid range was specified
472 REPARSE_ERROR_EMPTY_REPEATOP, // A possibly empty * or + was detected
473 REPARSE_ERROR_INVALID_INPUT, // The input string was invalid
476 template <class CharTraits /* =CAtlRECharTraits */>
477 class CAtlRegExp
479 public:
480 CAtlRegExp() throw()
482 m_uNumGroups = 0;
483 m_uRequiredMem = 0;
484 m_bCaseSensitive = TRUE;
485 m_LastError = REPARSE_ERROR_OK;
488 typedef typename CharTraits::RECHARTYPE RECHAR;
490 // CAtlRegExp::Parse
491 // Parses the regular expression
492 // returns REPARSE_ERROR_OK if successful, an REParseError otherwise
493 REParseError Parse(const RECHAR *szRE, BOOL bCaseSensitive=TRUE)
495 ATLASSERT(szRE);
496 if (!szRE)
497 return REPARSE_ERROR_INVALID_INPUT;
499 Reset();
501 m_bCaseSensitive = bCaseSensitive;
503 const RECHAR *szInput = szRE;
505 if (!bCaseSensitive)
507 // copy the string
508 int nSize = CharTraits::ByteLen(szRE)+sizeof(RECHAR);
509 szInput = (const RECHAR *) malloc(nSize);
510 if (!szInput)
511 return REPARSE_ERROR_OUTOFMEMORY;
513 Checked::memcpy_s((char *) szInput, nSize, szRE, nSize);
515 CharTraits::Strlwr(const_cast<RECHAR *>(szInput), nSize/sizeof(RECHAR));
517 const RECHAR *sz = szInput;
519 int nCall = AddInstruction(RE_CALL);
520 if (nCall < 0)
521 return REPARSE_ERROR_OUTOFMEMORY;
523 if (*sz == '^')
525 if (AddInstruction(RE_FAIL) < 0)
526 return REPARSE_ERROR_OUTOFMEMORY;
527 sz++;
529 else
531 if (AddInstruction(RE_ADVANCE) < 0)
532 return REPARSE_ERROR_OUTOFMEMORY;
535 bool bEmpty = true;
536 ParseRE(&sz, bEmpty);
537 if (!GetLastParseError())
539 GetInstruction(nCall).call.nTarget = 2;
541 if (AddInstruction(RE_MATCH) < 0)
542 return REPARSE_ERROR_OUTOFMEMORY;
545 if (szInput != szRE)
546 free((void *) szInput);
548 return GetLastParseError();
551 BOOL Match(const RECHAR *szIn, CAtlREMatchContext<CharTraits> *pContext, const RECHAR **ppszEnd=NULL)
553 ATLASSERT(szIn);
554 ATLASSERT(pContext);
556 if (!szIn || !pContext)
557 return FALSE;
559 if (ppszEnd)
560 *ppszEnd = NULL;
562 const RECHAR *szInput = szIn;
564 if (!m_bCaseSensitive)
566 int nSize = CharTraits::ByteLen(szIn)+sizeof(RECHAR);
567 szInput = (const RECHAR *) malloc(nSize);
568 if (!szInput)
569 return FALSE;
571 Checked::memcpy_s((char *) szInput, nSize, szIn, nSize);
572 CharTraits::Strlwr(const_cast<RECHAR *>(szInput), nSize/sizeof(RECHAR));
575 if (!pContext->Initialize(m_uRequiredMem, m_uNumGroups))
577 if (szInput != szIn)
578 free((void *) szInput);
579 return FALSE;
582 size_t ip = 0;
584 const RECHAR *sz = szInput;
585 const RECHAR *szCurrInput = szInput;
587 #pragma warning(push)
588 #pragma warning(disable:4127) // conditional expression is constant
590 while (1)
592 #ifdef ATLRX_DEBUG
593 OnDebugEvent(ip, szInput, sz, pContext);
594 #endif
595 if (ip == 0)
596 pContext->m_Match.szStart = sz;
598 switch (GetInstruction(ip).type)
600 case RE_NOP:
601 ip++;
602 break;
604 case RE_SYMBOL:
605 if (GetInstruction(ip).symbol.nSymbol == static_cast<size_t>(static_cast<_TUCHAR>(*sz)))
607 sz = CharTraits::Next(sz);
608 ip++;
610 else
612 ip = (size_t) pContext->Pop();
614 break;
616 case RE_ANY:
617 if (*sz)
619 sz = CharTraits::Next(sz);
620 ip++;
622 else
624 ip = (size_t) pContext->Pop();
626 break;
628 case RE_GROUP_START:
629 pContext->m_Matches[GetInstruction(ip).group.nGroup].szStart = sz;
630 ip++;
631 break;
633 case RE_GROUP_END:
634 pContext->m_Matches[GetInstruction(ip).group.nGroup].szEnd = sz;
635 ip++;
636 break;
638 case RE_PUSH_CHARPOS:
639 pContext->Push((void *) sz);
640 ip++;
641 break;
643 case RE_POP_CHARPOS:
644 sz = (RECHAR *) pContext->Pop();
645 ip++;
646 break;
648 case RE_CALL:
649 pContext->Push(ip+1);
650 ip = GetInstruction(ip).call.nTarget;
651 break;
653 case RE_JMP:
654 ip = GetInstruction(ip).jmp.nTarget;
655 break;
657 case RE_RETURN:
658 ip = (size_t) pContext->Pop();
659 break;
661 case RE_PUSH_MEMORY:
662 pContext->Push((void *) (pContext->m_Mem[GetInstruction(ip).memory.nIndex]));
663 ip++;
664 break;
666 case RE_POP_MEMORY:
667 pContext->m_Mem[GetInstruction(ip).memory.nIndex] = pContext->Pop();
668 ip++;
669 break;
671 case RE_STORE_CHARPOS:
672 pContext->m_Mem[GetInstruction(ip).memory.nIndex] = (void *) sz;
673 ip++;
674 break;
676 case RE_GET_CHARPOS:
677 sz = (RECHAR *) pContext->m_Mem[GetInstruction(ip).memory.nIndex];
678 ip++;
679 break;
681 case RE_STORE_STACKPOS:
682 pContext->m_Mem[GetInstruction(ip).memory.nIndex] = (void *) pContext->m_nTos;
683 ip++;
684 break;
686 case RE_GET_STACKPOS:
687 pContext->m_nTos = (size_t) pContext->m_Mem[GetInstruction(ip).memory.nIndex];
688 ip++;
689 break;
691 case RE_RET_NOMATCH:
692 if (sz == (RECHAR *) pContext->m_Mem[GetInstruction(ip).memory.nIndex])
694 // do a return
695 ip = (size_t) pContext->Pop();
697 else
698 ip++;
699 break;
701 case RE_ADVANCE:
702 sz = CharTraits::Next(szCurrInput);
703 szCurrInput = sz;
704 if (*sz == '\0')
705 goto Error;
706 ip = 0;
707 pContext->m_nTos = 0;
708 break;
710 case RE_FAIL:
711 goto Error;
713 case RE_RANGE:
715 if (*sz == '\0')
717 ip = (size_t) pContext->Pop();
718 break;
721 RECHAR *pBits = reinterpret_cast<RECHAR *>((&m_Instructions[ip]+1));
722 size_t u = CharTraits::GetBitFieldForRangeArrayIndex(sz);
723 if (pBits[u >> 3] & 1 << (u & 0x7))
725 ip += InstructionsPerRangeBitField();
726 ip++;
727 sz = CharTraits::Next(sz);
729 else
731 ip = (size_t) pContext->Pop();
734 break;
736 case RE_NOTRANGE:
738 if (*sz == '\0')
740 ip = (size_t) pContext->Pop();
741 break;
744 RECHAR *pBits = reinterpret_cast<RECHAR *>((&m_Instructions[ip]+1));
745 size_t u = static_cast<size_t>(static_cast<_TUCHAR>(* ((RECHAR *) sz)));
746 if (pBits[u >> 3] & 1 << (u & 0x7))
748 ip = (size_t) pContext->Pop();
750 else
752 ip += InstructionsPerRangeBitField();
753 ip++;
754 sz = CharTraits::Next(sz);
757 break;
759 case RE_RANGE_EX:
761 if (*sz == '\0')
763 ip = (size_t) pContext->Pop();
764 break;
767 BOOL bMatch = FALSE;
768 size_t inEnd = GetInstruction(ip).range.nTarget;
769 ip++;
771 while (ip < inEnd)
773 if (static_cast<size_t>(static_cast<_TUCHAR>(*sz)) >= GetInstruction(ip).memory.nIndex &&
774 static_cast<size_t>(static_cast<_TUCHAR>(*sz)) <= GetInstruction(ip+1).memory.nIndex)
776 // if we match, we jump to the end
777 sz = CharTraits::Next(sz);
778 ip = inEnd;
779 bMatch = TRUE;
781 else
783 ip += 2;
786 if (!bMatch)
788 ip = (size_t) pContext->Pop();
791 break;
793 case RE_NOTRANGE_EX:
795 if (*sz == '\0')
797 ip = (size_t) pContext->Pop();
798 break;
801 BOOL bMatch = TRUE;
802 size_t inEnd = GetInstruction(ip).range.nTarget;
803 ip++;
805 while (ip < inEnd)
807 if (static_cast<size_t>(static_cast<_TUCHAR>(*sz)) >= GetInstruction(ip).memory.nIndex &&
808 static_cast<size_t>(static_cast<_TUCHAR>(*sz)) <= GetInstruction(ip+1).memory.nIndex)
810 ip = (size_t) pContext->Pop();
811 bMatch = FALSE;
812 break;
814 else
816 // if we match, we jump to the end
817 ip += 2;
820 if (bMatch)
821 sz = CharTraits::Next(sz);
823 break;
825 case RE_PREVIOUS:
827 BOOL bMatch = FALSE;
828 if (m_bCaseSensitive)
830 bMatch = !CharTraits::Strncmp(sz, pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart,
831 pContext->m_Matches[GetInstruction(ip).prev.nGroup].szEnd-pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart);
833 else
835 bMatch = !CharTraits::Strnicmp(sz, pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart,
836 pContext->m_Matches[GetInstruction(ip).prev.nGroup].szEnd-pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart);
838 if (bMatch)
840 sz += pContext->m_Matches[GetInstruction(ip).prev.nGroup].szEnd-pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart;
841 ip++;
842 break;
844 ip = (size_t) pContext->Pop();
846 break;
848 case RE_MATCH:
849 pContext->m_Match.szEnd = sz;
850 if (!m_bCaseSensitive)
851 FixupMatchContext(pContext, szIn, szInput);
852 if (ppszEnd)
853 *ppszEnd = szIn + (sz - szInput);
854 if (szInput != szIn)
855 free((void *) szInput);
856 return TRUE;
857 break;
859 case RE_PUSH_GROUP:
860 pContext->Push((void *) pContext->m_Matches[GetInstruction(ip).group.nGroup].szStart);
861 pContext->Push((void *) pContext->m_Matches[GetInstruction(ip).group.nGroup].szEnd);
862 ip++;
863 break;
865 case RE_POP_GROUP:
866 pContext->m_Matches[GetInstruction(ip).group.nGroup].szEnd = (const RECHAR *) pContext->Pop();
867 pContext->m_Matches[GetInstruction(ip).group.nGroup].szStart = (const RECHAR *) pContext->Pop();
868 ip++;
869 break;
871 default:
872 ATLASSERT(FALSE);
873 break;
877 #pragma warning(pop) // 4127
879 ATLASSERT(FALSE);
880 Error:
881 pContext->m_Match.szEnd = sz;
882 if (!m_bCaseSensitive)
883 FixupMatchContext(pContext, szIn, szInput);
884 if (ppszEnd)
885 *ppszEnd = szIn + (sz - szInput);
886 if (szInput != szIn)
887 free((void *) szInput);
888 return FALSE;
891 protected:
892 REParseError m_LastError;
894 REParseError GetLastParseError() throw()
896 return m_LastError;
899 void SetLastParseError(REParseError Error) throw()
901 m_LastError = Error;
903 // CAtlRegExp::Reset
904 // Removes all instructions to allow reparsing into the same instance
905 void Reset() throw()
907 m_Instructions.RemoveAll();
908 m_uRequiredMem = 0;
909 m_bCaseSensitive = TRUE;
910 m_uNumGroups = 0;
911 SetLastParseError(REPARSE_ERROR_OK);
915 enum REInstructionType {
916 RE_NOP,
917 RE_GROUP_START,
918 RE_GROUP_END,
919 RE_SYMBOL,
920 RE_ANY,
921 RE_RANGE,
922 RE_NOTRANGE,
923 RE_RANGE_EX,
924 RE_NOTRANGE_EX,
925 RE_PLUS,
926 RE_NG_PLUS,
927 RE_QUESTION,
928 RE_NG_QUESTION,
929 RE_JMP,
930 RE_PUSH_CHARPOS,
931 RE_POP_CHARPOS,
932 RE_CALL,
933 RE_RETURN,
934 RE_STAR_BEGIN,
935 RE_NG_STAR_BEGIN,
936 RE_PUSH_MEMORY,
937 RE_POP_MEMORY,
938 RE_STORE_CHARPOS,
939 RE_STORE_STACKPOS,
940 RE_GET_CHARPOS,
941 RE_GET_STACKPOS,
942 RE_RET_NOMATCH,
943 RE_PREVIOUS,
944 RE_FAIL,
945 RE_ADVANCE,
946 RE_MATCH,
947 RE_PUSH_GROUP,
948 RE_POP_GROUP,
951 struct INSTRUCTION_SYMBOL
953 size_t nSymbol;
956 struct INSTRUCTION_JMP
958 size_t nTarget;
961 struct INSTRUCTION_GROUP
963 size_t nGroup;
966 struct INSTRUCTION_CALL
968 size_t nTarget;
971 struct INSTRUCTION_MEMORY
973 size_t nIndex;
976 struct INSTRUCTION_PREVIOUS
978 size_t nGroup;
981 struct INSTRUCTION_RANGE_EX
983 size_t nTarget;
986 struct INSTRUCTION
988 REInstructionType type;
989 union
991 INSTRUCTION_SYMBOL symbol;
992 INSTRUCTION_JMP jmp;
993 INSTRUCTION_GROUP group;
994 INSTRUCTION_CALL call;
995 INSTRUCTION_MEMORY memory;
996 INSTRUCTION_PREVIOUS prev;
997 INSTRUCTION_RANGE_EX range;
1001 inline int InstructionsPerRangeBitField() throw()
1003 return (256/8) / sizeof(INSTRUCTION) + (((256/8) % sizeof(INSTRUCTION)) ? 1 : 0);
1006 CAtlArray<INSTRUCTION> m_Instructions;
1008 UINT m_uNumGroups;
1009 UINT m_uRequiredMem;
1010 BOOL m_bCaseSensitive;
1013 // class used internally to restore
1014 // parsing state when unwinding
1015 class CParseState
1017 public:
1018 int m_nNumInstructions;
1019 UINT m_uNumGroups;
1020 UINT m_uRequiredMem;
1022 CParseState(CAtlRegExp *pRegExp) throw()
1024 m_nNumInstructions = (int) pRegExp->m_Instructions.GetCount();
1025 m_uNumGroups = pRegExp->m_uNumGroups;
1026 m_uRequiredMem = pRegExp->m_uRequiredMem;
1029 void Restore(CAtlRegExp *pRegExp)
1031 pRegExp->m_Instructions.SetCount(m_nNumInstructions);
1032 pRegExp->m_uNumGroups = m_uNumGroups;
1033 pRegExp->m_uRequiredMem = m_uRequiredMem;
1037 int AddInstruction(REInstructionType type)
1039 if (!m_Instructions.SetCount(m_Instructions.GetCount()+1))
1041 SetLastParseError(REPARSE_ERROR_OUTOFMEMORY);
1042 return -1;
1045 m_Instructions[m_Instructions.GetCount()-1].type = type;
1046 return (int) m_Instructions.GetCount()-1;
1049 BOOL PeekToken(const RECHAR **ppszRE, int ch) throw()
1051 if (**ppszRE != ch)
1052 return FALSE;
1053 return TRUE;
1056 BOOL MatchToken(const RECHAR **ppszRE, int ch) throw()
1058 if (!PeekToken(ppszRE, ch))
1059 return FALSE;
1060 *ppszRE = CharTraits::Next(*ppszRE);
1061 return TRUE;
1064 INSTRUCTION &GetInstruction(size_t nIndex) throw()
1066 return m_Instructions[nIndex];
1069 // ParseArg: parse grammar rule Arg
1070 int ParseArg(const RECHAR **ppszRE, bool &bEmpty)
1072 int nPushGroup = AddInstruction(RE_PUSH_GROUP);
1073 if (nPushGroup < 0)
1074 return -1;
1076 GetInstruction(nPushGroup).group.nGroup = m_uNumGroups;
1078 int p = AddInstruction(RE_GROUP_START);
1079 if (p < 0)
1080 return -1;
1081 GetInstruction(p).group.nGroup = m_uNumGroups++;
1083 int nCall = AddInstruction(RE_CALL);
1084 if (nCall < 0)
1085 return -1;
1087 int nPopGroup = AddInstruction(RE_POP_GROUP);
1088 if (nPopGroup < 0)
1089 return -1;
1090 GetInstruction(nPopGroup).group.nGroup = GetInstruction(nPushGroup).group.nGroup;
1092 if (AddInstruction(RE_RETURN) < 0)
1093 return -1;
1095 int nAlt = ParseRE(ppszRE, bEmpty);
1096 if (nAlt < 0)
1098 if (GetLastParseError())
1099 return -1;
1101 if (!PeekToken(ppszRE, '}'))
1103 SetLastParseError(REPARSE_ERROR_BRACE_EXPECTED);
1104 return -1;
1107 // in the case of an empty group, we add a nop
1108 nAlt = AddInstruction(RE_NOP);
1109 if (nAlt < 0)
1110 return -1;
1113 GetInstruction(nCall).call.nTarget = nAlt;
1115 if (!MatchToken(ppszRE, '}'))
1117 SetLastParseError(REPARSE_ERROR_BRACE_EXPECTED);
1118 return -1;
1121 int nEnd = AddInstruction(RE_GROUP_END);
1122 if (nEnd < 0)
1123 return -1;
1124 GetInstruction(nEnd).group.nGroup = GetInstruction(p).group.nGroup;
1125 return nPushGroup;
1128 // ParseGroup: parse grammar rule Group
1129 int ParseGroup(const RECHAR **ppszRE, bool &bEmpty)
1131 int nCall = AddInstruction(RE_CALL);
1132 if (nCall < 0)
1133 return -1;
1135 if (AddInstruction(RE_RETURN) < 0)
1136 return -1;
1138 int nAlt = ParseRE(ppszRE, bEmpty);
1139 if (nAlt < 0)
1141 if (GetLastParseError())
1142 return -1;
1144 if (!PeekToken(ppszRE, ')'))
1146 SetLastParseError(REPARSE_ERROR_PAREN_EXPECTED);
1147 return -1;
1150 // in the case of an empty group, we add a nop
1151 nAlt = AddInstruction(RE_NOP);
1152 if (nAlt < 0)
1153 return -1;
1156 GetInstruction(nCall).call.nTarget = nAlt;
1158 if (!MatchToken(ppszRE, ')'))
1160 SetLastParseError(REPARSE_ERROR_PAREN_EXPECTED);
1161 return -1;
1164 return nCall;
1167 RECHAR GetEscapedChar(RECHAR ch) throw()
1169 if (ch == 't')
1170 return '\t';
1171 return ch;
1174 // ParseCharItem: parse grammar rule CharItem
1175 int ParseCharItem(const RECHAR **ppszRE, RECHAR *pchStartChar, RECHAR *pchEndChar) throw()
1177 if (**ppszRE == '\\')
1179 *ppszRE = CharTraits::Next(*ppszRE);
1180 *pchStartChar = GetEscapedChar(**ppszRE);
1182 else
1183 *pchStartChar = **ppszRE;
1184 *ppszRE = CharTraits::Next(*ppszRE);
1186 if (!MatchToken(ppszRE, '-'))
1188 *pchEndChar = *pchStartChar;
1189 return 0;
1192 // check for unterminated range
1193 if (!**ppszRE || PeekToken(ppszRE, ']'))
1195 SetLastParseError(REPARSE_ERROR_BRACKET_EXPECTED);
1196 return -1;
1199 *pchEndChar = **ppszRE;
1200 *ppszRE = CharTraits::Next(*ppszRE);
1202 if (*pchEndChar < *pchStartChar)
1204 SetLastParseError(REPARSE_ERROR_INVALID_RANGE);
1205 return -1;
1207 return 0;
1210 int AddInstructions(int nNumInstructions)
1212 size_t nCurr = m_Instructions.GetCount();
1213 if (!m_Instructions.SetCount(nCurr+nNumInstructions))
1215 SetLastParseError(REPARSE_ERROR_OUTOFMEMORY);
1216 return -1;
1218 return (int) nCurr;
1221 // ParseCharSet: parse grammar rule CharSet
1222 int ParseCharSet(const RECHAR **ppszRE, BOOL bNot)
1224 int p = -1;
1226 unsigned char *pBits = NULL;
1228 if (CharTraits::UseBitFieldForRange())
1230 // we use a bit field to represent the characters
1231 // a 1 bit means match against the character
1232 // the last 5 bits are used as an index into
1233 // the byte array, and the first 3 bits
1234 // are used to index into the selected byte
1236 p = AddInstruction(bNot ? RE_NOTRANGE : RE_RANGE);
1237 if (p < 0)
1238 return -1;
1240 // add the required space to hold the character
1241 // set. We use one bit per character for ansi
1242 if (AddInstructions(InstructionsPerRangeBitField()) < 0)
1243 return -1;
1245 pBits = (unsigned char *) (&m_Instructions[p+1]);
1246 memset(pBits, 0x00, 256/8);
1248 else
1250 p = AddInstruction(bNot ? RE_NOTRANGE_EX : RE_RANGE_EX);
1251 if (p < 0)
1252 return -1;
1255 RECHAR chStart;
1256 RECHAR chEnd;
1258 while (**ppszRE && **ppszRE != ']')
1260 if (ParseCharItem(ppszRE, &chStart, &chEnd))
1261 return -1;
1263 if (CharTraits::UseBitFieldForRange())
1265 for (int i=chStart; i<=chEnd; i++)
1266 pBits[i >> 3] |= 1 << (i & 0x7);
1268 else
1270 int nStart = AddInstruction(RE_NOP);
1271 if (nStart < 0)
1272 return -1;
1274 int nEnd = AddInstruction(RE_NOP);
1275 if (nEnd < 0)
1276 return -1;
1278 GetInstruction(nStart).memory.nIndex = (int) chStart;
1279 GetInstruction(nEnd).memory.nIndex = (int) chEnd;
1283 if (!CharTraits::UseBitFieldForRange())
1284 GetInstruction(p).range.nTarget = m_Instructions.GetCount();
1286 return p;
1289 // ParseCharClass: parse grammar rule CharClass
1290 int ParseCharClass(const RECHAR **ppszRE, bool &bEmpty)
1292 bEmpty = false;
1293 if (MatchToken(ppszRE, ']'))
1295 SetLastParseError(REPARSE_ERROR_EMPTY_RANGE);
1296 return -1;
1299 BOOL bNot = FALSE;
1300 if (MatchToken(ppszRE, '^'))
1301 bNot = TRUE;
1303 if (MatchToken(ppszRE, ']'))
1305 SetLastParseError(REPARSE_ERROR_EMPTY_RANGE);
1306 return -1;
1309 int p = ParseCharSet(ppszRE, bNot);
1310 if (p < 0)
1311 return p;
1312 if (!MatchToken(ppszRE, ']'))
1314 SetLastParseError(REPARSE_ERROR_BRACKET_EXPECTED);
1315 return -1;
1318 return p;
1321 int AddMemInstruction(REInstructionType type)
1323 int p = AddInstruction(type);
1324 if (p < 0)
1325 return p;
1326 GetInstruction(p).memory.nIndex = m_uRequiredMem++;
1327 return p;
1330 // helper for parsing !SE
1331 int ParseNot(const RECHAR **ppszRE, bool &bEmpty)
1333 int nStoreCP = AddMemInstruction(RE_STORE_CHARPOS);
1334 int nStoreSP = AddMemInstruction(RE_STORE_STACKPOS);
1336 int nCall = AddInstruction(RE_CALL);
1337 if (nCall < 0)
1338 return -1;
1340 int nGetCP = AddInstruction(RE_GET_CHARPOS);
1341 if (nGetCP < 0)
1342 return -1;
1343 GetInstruction(nGetCP).memory.nIndex = GetInstruction(nStoreCP).memory.nIndex;
1345 int nGetSP = AddInstruction(RE_GET_STACKPOS);
1346 if (nGetSP < 0)
1347 return -1;
1348 GetInstruction(nGetSP).memory.nIndex = GetInstruction(nStoreSP).memory.nIndex;
1350 int nJmp = AddInstruction(RE_JMP);
1351 if (nJmp < 0)
1352 return -1;
1354 int nSE = ParseSE(ppszRE, bEmpty);
1355 if (nSE < 0)
1356 return nSE;
1358 // patch the call
1359 GetInstruction(nCall).call.nTarget = nSE;
1361 int nGetCP1 = AddInstruction(RE_GET_CHARPOS);
1362 if (nGetCP1 < 0)
1363 return -1;
1364 GetInstruction(nGetCP1).memory.nIndex = GetInstruction(nStoreCP).memory.nIndex;
1366 int nGetSP1 = AddInstruction(RE_GET_STACKPOS);
1367 if (nGetSP1 < 0)
1368 return -1;
1369 GetInstruction(nGetSP1).memory.nIndex = GetInstruction(nStoreSP).memory.nIndex;
1371 int nRet = AddInstruction(RE_RETURN);
1372 if (nRet < 0)
1373 return -1;
1375 GetInstruction(nJmp).jmp.nTarget = nRet+1;
1377 return nStoreCP;
1380 // ParseAbbrev: parse grammar rule Abbrev
1381 int ParseAbbrev(const RECHAR **ppszRE, bool &bEmpty)
1383 const RECHAR **szAbbrevs = CharTraits::GetAbbrevs();
1385 while (*szAbbrevs)
1387 if (**ppszRE == **szAbbrevs)
1389 const RECHAR *szAbbrev = (*szAbbrevs)+1;
1390 int p = ParseE(&szAbbrev, bEmpty);
1391 if (p < 0)
1393 SetLastParseError(REPARSE_ERROR_UNEXPECTED);
1394 return p;
1396 *ppszRE = CharTraits::Next(*ppszRE);
1397 return p;
1399 szAbbrevs++;
1401 return -1;
1404 // ParseSE: parse grammar rule SE (simple expression)
1405 int ParseSE(const RECHAR **ppszRE, bool &bEmpty)
1408 if (MatchToken(ppszRE, '{'))
1409 return ParseArg(ppszRE, bEmpty);
1410 if (MatchToken(ppszRE, '('))
1411 return ParseGroup(ppszRE, bEmpty);
1412 if (MatchToken(ppszRE, '['))
1413 return ParseCharClass(ppszRE, bEmpty);
1415 if (MatchToken(ppszRE, '\\'))
1417 if (!CharTraits::Isdigit(**ppszRE))
1419 // check for abbreviations
1420 int p;
1421 p = ParseAbbrev(ppszRE, bEmpty);
1422 if (p >= 0)
1423 return p;
1425 if (GetLastParseError())
1426 return -1;
1428 // escaped char
1429 p = AddInstruction(RE_SYMBOL);
1430 if (p < 0)
1431 return -1;
1432 GetInstruction(p).symbol.nSymbol = (int) **ppszRE;
1433 *ppszRE = CharTraits::Next(*ppszRE);
1434 return p;
1436 // previous match
1437 bEmpty = false;
1438 int nPrev = AddInstruction(RE_PREVIOUS);
1439 if (nPrev < 0)
1440 return -1;
1442 UINT uValue = (UINT) CharTraits::Strtol(*ppszRE, (RECHAR **) ppszRE, 10);
1443 if (uValue >= m_uNumGroups)
1445 SetLastParseError(REPARSE_ERROR_INVALID_GROUP);
1446 return -1;
1448 GetInstruction(nPrev).prev.nGroup = (size_t) uValue;
1449 return nPrev;
1452 if (MatchToken(ppszRE, '!'))
1453 return ParseNot(ppszRE, bEmpty);
1455 if (**ppszRE == '}' || **ppszRE == ']' || **ppszRE == ')')
1457 return -1;
1460 if (**ppszRE == '\0')
1462 return -1;
1465 int p;
1466 if (**ppszRE == '.')
1468 p = AddInstruction(RE_ANY);
1469 if (p < 0)
1470 return -1;
1471 bEmpty = false;
1473 else if (**ppszRE == '$' && (*ppszRE)[1] == '\0')
1475 p = AddInstruction(RE_SYMBOL);
1476 if (p < 0)
1477 return -1;
1478 GetInstruction(p).symbol.nSymbol = 0;
1479 bEmpty = false;
1481 else
1483 p = AddInstruction(RE_SYMBOL);
1484 if (p < 0)
1485 return -1;
1486 GetInstruction(p).symbol.nSymbol = (int) **ppszRE;
1487 bEmpty = false;
1489 *ppszRE = CharTraits::Next(*ppszRE);
1490 return p;
1493 // ParseE: parse grammar rule E (expression)
1494 int ParseE(const RECHAR **ppszRE, bool &bEmpty)
1496 CParseState ParseState(this);
1497 const RECHAR *sz = *ppszRE;
1499 int nSE;
1501 int nFirst = ParseSE(ppszRE, bEmpty);
1502 if (nFirst < 0)
1503 return nFirst;
1505 REInstructionType type = RE_MATCH;
1507 if (MatchToken(ppszRE, '*'))
1508 if(MatchToken(ppszRE, '?'))
1509 type = RE_NG_STAR_BEGIN;
1510 else
1511 type = RE_STAR_BEGIN;
1514 else if (MatchToken(ppszRE, '+'))
1515 if(MatchToken(ppszRE, '?'))
1516 type = RE_NG_PLUS;
1517 else
1518 type = RE_PLUS;
1520 else if (MatchToken(ppszRE, '?'))
1521 if(MatchToken(ppszRE, '?'))
1522 type = RE_NG_QUESTION;
1523 else
1524 type = RE_QUESTION;
1527 if (type == RE_MATCH)
1528 return nFirst;
1530 if (type == RE_STAR_BEGIN || type == RE_QUESTION|| type == RE_NG_STAR_BEGIN || type == RE_NG_QUESTION)
1532 ParseState.Restore(this);
1534 else
1536 m_uNumGroups = ParseState.m_uNumGroups;
1538 *ppszRE = sz;
1540 int nE;
1542 if (type == RE_NG_STAR_BEGIN || type == RE_NG_PLUS || type == RE_NG_QUESTION) // Non-Greedy
1544 int nCall = AddInstruction(RE_CALL);
1545 if (nCall < 0)
1546 return -1;
1548 bEmpty = false;
1550 nSE = ParseSE(ppszRE, bEmpty);
1551 if (nSE < 0)
1552 return nSE;
1554 if (bEmpty && (type == RE_NG_STAR_BEGIN || type == RE_NG_PLUS))
1556 SetLastParseError(REPARSE_ERROR_EMPTY_REPEATOP);
1557 return -1;
1559 bEmpty = true;
1561 *ppszRE = CharTraits::Next(*ppszRE);
1562 *ppszRE = CharTraits::Next(*ppszRE);
1564 if (type == RE_NG_STAR_BEGIN || type == RE_NG_PLUS)
1566 int nJmp = AddInstruction(RE_JMP);
1567 if (nJmp < 0)
1568 return -1;
1569 GetInstruction(nCall).call.nTarget = nJmp+1;
1570 GetInstruction(nJmp).jmp.nTarget = nCall;
1572 else
1573 GetInstruction(nCall).call.nTarget = nSE+1;
1575 if (type == RE_NG_PLUS)
1576 nE = nFirst;
1577 else
1578 nE = nCall;
1580 else // Greedy
1583 int nPushMem = AddInstruction(RE_PUSH_MEMORY);
1584 if (nPushMem < 0)
1585 return -1;
1587 int nStore = AddInstruction(RE_STORE_CHARPOS);
1588 if (nStore < 0)
1589 return -1;
1591 if (AddInstruction(RE_PUSH_CHARPOS) < 0)
1592 return -1;
1594 int nCall = AddInstruction(RE_CALL);
1595 if (nCall < 0)
1596 return -1;
1598 if (AddInstruction(RE_POP_CHARPOS) < 0)
1599 return -1;
1601 int nPopMem = AddInstruction(RE_POP_MEMORY);
1602 if (nPopMem < 0)
1603 return -1;
1605 int nJmp = AddInstruction(RE_JMP);
1606 if (nJmp < 0)
1607 return -1;
1609 GetInstruction(nPushMem).memory.nIndex = m_uRequiredMem++;
1610 GetInstruction(nStore).memory.nIndex = GetInstruction(nPushMem).memory.nIndex;
1611 GetInstruction(nCall).call.nTarget = nJmp+1;
1612 GetInstruction(nPopMem).memory.nIndex = GetInstruction(nPushMem).memory.nIndex;
1614 bEmpty = false;
1616 nSE = ParseSE(ppszRE, bEmpty);
1617 if (nSE < 0)
1618 return nSE;
1620 if (bEmpty && (type == RE_STAR_BEGIN || type == RE_PLUS))
1622 SetLastParseError(REPARSE_ERROR_EMPTY_REPEATOP);
1623 return -1;
1626 if (type != RE_PLUS && type != RE_NG_PLUS)
1627 bEmpty = true;
1629 *ppszRE = CharTraits::Next(*ppszRE);
1632 int nRetNoMatch = AddInstruction(RE_RET_NOMATCH);
1633 if (nRetNoMatch < 0)
1634 return -1;
1636 int nStore1 = AddInstruction(RE_STORE_CHARPOS);
1637 if (nStore1 < 0)
1638 return -1;
1640 GetInstruction(nRetNoMatch).memory.nIndex = GetInstruction(nPushMem).memory.nIndex;
1641 GetInstruction(nStore1).memory.nIndex = GetInstruction(nPushMem).memory.nIndex;
1643 if (type != RE_QUESTION)
1645 int nJmp1 = AddInstruction(RE_JMP);
1646 if (nJmp1 < 0)
1647 return -1;
1648 GetInstruction(nJmp1).jmp.nTarget = nPushMem;
1651 GetInstruction(nJmp).jmp.nTarget = m_Instructions.GetCount();
1652 if (type == RE_PLUS)
1653 nE = nFirst;
1654 else
1655 nE = nPushMem;
1658 return nE;
1662 // ParseAltE: parse grammar rule AltE
1663 int ParseAltE(const RECHAR **ppszRE, bool &bEmpty)
1665 const RECHAR *sz = *ppszRE;
1666 CParseState ParseState(this);
1668 int nPush = AddInstruction(RE_PUSH_CHARPOS);
1669 if (nPush < 0)
1670 return -1;
1672 int nCall = AddInstruction(RE_CALL);
1673 if (nCall < 0)
1674 return -1;
1676 GetInstruction(nCall).call.nTarget = nPush+4;
1677 if (AddInstruction(RE_POP_CHARPOS) < 0)
1678 return -1;
1680 int nJmpNext = AddInstruction(RE_JMP);
1681 if (nJmpNext < 0)
1682 return -1;
1684 int nE = ParseE(ppszRE, bEmpty);
1685 if (nE < 0)
1687 if (GetLastParseError())
1688 return -1;
1689 ParseState.Restore(this);
1690 return nE;
1693 int nJmpEnd = AddInstruction(RE_JMP);
1694 if (nJmpEnd < 0)
1695 return -1;
1697 GetInstruction(nJmpNext).jmp.nTarget = nJmpEnd+1;
1699 if (!MatchToken(ppszRE, '|'))
1701 ParseState.Restore(this);
1702 *ppszRE = sz;
1704 return ParseE(ppszRE, bEmpty);
1707 bool bEmptyAltE;
1708 int nAltE = ParseAltE(ppszRE, bEmptyAltE);
1709 GetInstruction(nJmpEnd).jmp.nTarget = m_Instructions.GetCount();
1710 GetInstruction(nJmpNext).jmp.nTarget = nAltE;
1711 if (nAltE < 0)
1713 if (GetLastParseError())
1714 return -1;
1715 ParseState.Restore(this);
1716 return nAltE;
1718 bEmpty = bEmpty | bEmptyAltE;
1719 return nPush;
1722 // ParseRE: parse grammar rule RE (regular expression)
1723 int ParseRE(const RECHAR **ppszRE, bool &bEmpty)
1725 if (**ppszRE == '\0')
1726 return -1;
1728 int p = ParseAltE(ppszRE, bEmpty);
1729 if (p < 0)
1730 return p;
1732 bool bEmptyRE = true;
1733 ParseRE(ppszRE, bEmptyRE);
1734 if (GetLastParseError())
1735 return -1;
1736 bEmpty = bEmpty && bEmptyRE;
1737 return p;
1740 //pointers to the matched string and matched groups, currently point into an internal allocated
1741 //buffer that hold a copy of the input string.
1742 //This function fix these pointers to point into the original, user supplied buffer (first param to Match method).
1743 //Example: If a ptr (szStart) currently point to <internal buffer>+3, it is fixed to <user supplied buffer>+3
1744 void FixupMatchContext(CAtlREMatchContext<CharTraits> *pContext, const RECHAR *szOrig, const RECHAR *szNew)
1746 ATLENSURE(pContext);
1747 ATLASSERT(szOrig);
1748 ATLASSERT(szNew);
1750 pContext->m_Match.szStart = szOrig + (pContext->m_Match.szStart - szNew);
1751 pContext->m_Match.szEnd = szOrig + (pContext->m_Match.szEnd - szNew);
1752 for (UINT i=0; i<pContext->m_uNumGroups; i++)
1754 if (pContext->m_Matches[i].szStart==NULL || pContext->m_Matches[i].szEnd==NULL)
1756 continue; //Do not fix unmatched groups.
1758 pContext->m_Matches[i].szStart = szOrig + (pContext->m_Matches[i].szStart - szNew);
1759 pContext->m_Matches[i].szEnd = szOrig + (pContext->m_Matches[i].szEnd - szNew);
1762 // implementation
1763 // helpers for dumping and debugging the rx engine
1764 public:
1765 #ifdef ATL_REGEXP_DUMP
1766 size_t DumpInstruction(size_t ip)
1768 printf("%08x ", ip);
1769 switch (GetInstruction(ip).type)
1771 case RE_NOP:
1772 printf("NOP\n");
1773 ip++;
1774 break;
1776 case RE_SYMBOL:
1777 AtlprintfT<RECHAR>(CAToREChar<RECHAR>("Symbol %c\n"),GetInstruction(ip).symbol.nSymbol);
1778 ip++;
1779 break;
1781 case RE_ANY:
1782 printf("Any\n");
1783 ip++;
1784 break;
1786 case RE_RANGE:
1787 printf("Range\n");
1788 ip++;
1789 ip += InstructionsPerRangeBitField();
1790 break;
1792 case RE_NOTRANGE:
1793 printf("NOT Range\n");
1794 ip++;
1795 ip += InstructionsPerRangeBitField();
1796 break;
1798 case RE_RANGE_EX:
1799 printf("RangeEx %08x\n", GetInstruction(ip).range.nTarget);
1800 ip++;
1801 break;
1803 case RE_NOTRANGE_EX:
1804 printf("NotRangeEx %08x\n", GetInstruction(ip).range.nTarget);
1805 ip++;
1806 break;
1808 case RE_GROUP_START:
1809 printf("Start group %d\n", GetInstruction(ip).group.nGroup);
1810 ip++;
1811 break;
1813 case RE_GROUP_END:
1814 printf("Group end %d\n", GetInstruction(ip).group.nGroup);
1815 ip++;
1816 break;
1818 case RE_PUSH_CHARPOS:
1819 printf("Push char pos\n");
1820 ip++;
1821 break;
1823 case RE_POP_CHARPOS:
1824 printf("Pop char pos\n");
1825 ip++;
1826 break;
1828 case RE_STORE_CHARPOS:
1829 printf("Store char pos %d\n", GetInstruction(ip).memory.nIndex);
1830 ip++;
1831 break;
1833 case RE_GET_CHARPOS:
1834 printf("Get char pos %d\n", GetInstruction(ip).memory.nIndex);
1835 ip++;
1836 break;
1838 case RE_STORE_STACKPOS:
1839 printf("Store stack pos %d\n", GetInstruction(ip).memory.nIndex);
1840 ip++;
1841 break;
1843 case RE_GET_STACKPOS:
1844 printf("Get stack pos %d\n", GetInstruction(ip).memory.nIndex);
1845 ip++;
1846 break;
1848 case RE_CALL:
1849 printf("Call %08x\n", GetInstruction(ip).call.nTarget);
1850 ip++;
1851 break;
1853 case RE_JMP:
1854 printf("Jump %08x\n", GetInstruction(ip).jmp.nTarget);
1855 ip++;
1856 break;
1858 case RE_RETURN:
1859 printf("return\n");
1860 ip++;
1861 break;
1863 case RE_PUSH_MEMORY:
1864 printf("Push memory %08x\n", GetInstruction(ip).memory.nIndex);
1865 ip++;
1866 break;
1868 case RE_POP_MEMORY:
1869 printf("Pop memory %08x\n", GetInstruction(ip).memory.nIndex);
1870 ip++;
1871 break;
1873 case RE_RET_NOMATCH:
1874 printf("Return no match %08x\n", GetInstruction(ip).memory.nIndex);
1875 ip++;
1876 break;
1878 case RE_MATCH:
1879 printf("END\n");
1880 ip++;
1881 break;
1883 case RE_ADVANCE:
1884 printf("ADVANCE\n");
1885 ip++;
1886 break;
1888 case RE_FAIL:
1889 printf("FAIL\n");
1890 ip++;
1891 break;
1893 case RE_PREVIOUS:
1894 printf("Prev %d\n", GetInstruction(ip).prev.nGroup);
1895 ip++;
1896 break;
1898 case RE_PUSH_GROUP:
1899 printf("Push group %d\n", GetInstruction(ip).group.nGroup);
1900 ip++;
1901 break;
1903 case RE_POP_GROUP:
1904 printf("Pop group %d\n", GetInstruction(ip).group.nGroup);
1905 ip++;
1906 break;
1909 default:
1910 printf("????\n");
1911 ip++;
1912 break;
1914 return ip;
1917 void Dump(size_t ipCurrent = 0)
1919 size_t ip = 0;
1921 while (ip < m_Instructions.GetCount())
1923 if (ip == ipCurrent)
1924 printf("->");
1925 ip = DumpInstruction(ip);
1928 #endif
1930 #ifdef ATLRX_DEBUG
1931 void cls( HANDLE hConsole )
1933 COORD coordScreen = { 0, 0 }; /* here's where we'll home the
1934 cursor */
1935 BOOL bSuccess;
1936 DWORD cCharsWritten;
1937 CONSOLE_SCREEN_BUFFER_INFO csbi; /* to get buffer info */
1938 DWORD dwConSize; /* number of character cells in
1939 the current buffer */
1941 /* get the number of character cells in the current buffer */
1943 bSuccess = GetConsoleScreenBufferInfo( hConsole, &csbi );
1944 dwConSize = csbi.dwSize.X * csbi.dwSize.Y;
1946 /* fill the entire screen with blanks */
1948 bSuccess = FillConsoleOutputCharacter( hConsole, (TCHAR) ' ',
1949 dwConSize, coordScreen, &cCharsWritten );
1951 /* get the current text attribute */
1953 bSuccess = GetConsoleScreenBufferInfo( hConsole, &csbi );
1955 /* now set the buffer's attributes accordingly */
1957 bSuccess = FillConsoleOutputAttribute( hConsole, csbi.wAttributes,
1958 dwConSize, coordScreen, &cCharsWritten );
1960 /* put the cursor at (0, 0) */
1962 bSuccess = SetConsoleCursorPosition( hConsole, coordScreen );
1963 return;
1966 void DumpStack(CAtlREMatchContext<CharTraits> *pContext)
1968 for (size_t i=pContext->m_nTos; i>0; i--)
1970 if (pContext->m_stack[i] < (void *) m_Instructions.GetCount())
1971 printf("0x%p\n", pContext->m_stack[i]);
1972 else
1974 // assume a pointer into the input
1975 AtlprintfT<RECHAR>(CAToREChar<RECHAR>("%s\n"), pContext->m_stack[i]);
1980 void DumpMemory(CAtlREMatchContext<CharTraits> *pContext)
1982 for (UINT i=0; i<m_uRequiredMem; i++)
1984 AtlprintfT<RECHAR>(CAToREChar<RECHAR>("%d: %s\n"), i, pContext->m_Mem.m_p[i]);
1988 virtual void OnDebugEvent(size_t ip, const RECHAR *szIn, const RECHAR *sz, CAtlREMatchContext<CharTraits> *pContext)
1990 cls(GetStdHandle(STD_OUTPUT_HANDLE));
1991 printf("----------Code---------\n");
1992 Dump(ip);
1993 printf("----------Input---------\n");
1994 AtlprintfT<RECHAR>(CAToREChar<RECHAR>("%s\n"), szIn);
1995 for (int s=0; szIn+s < sz; s++)
1997 printf(" ");
1999 printf("^\n");
2000 printf("----------Memory---------\n");
2001 DumpMemory(pContext);
2002 printf("----------Stack---------\n");
2003 DumpStack(pContext);
2004 getchar();
2006 #endif
2010 } // namespace ATL
2011 #pragma pack(pop)
2013 #endif // __ATLRX_H__