From db69b121ac6e3306d95911c00b18fc1c7c4f5b67 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Mon, 15 Sep 2008 20:41:04 +0200 Subject: [PATCH] jscript: Added String.match implementation. RegExp part of patch is based on Mozilla regexp implementation. --- dlls/jscript/jscript.h | 7 +++ dlls/jscript/regexp.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++ dlls/jscript/string.c | 73 +++++++++++++++++++++++- 3 files changed, 229 insertions(+), 2 deletions(-) diff --git a/dlls/jscript/jscript.h b/dlls/jscript/jscript.h index c4e61d61b2a..a098b82c992 100644 --- a/dlls/jscript/jscript.h +++ b/dlls/jscript/jscript.h @@ -184,6 +184,13 @@ HRESULT create_object_constr(script_ctx_t*,DispatchEx**); HRESULT create_regexp_constr(script_ctx_t*,DispatchEx**); HRESULT create_string_constr(script_ctx_t*,DispatchEx**); +typedef struct { + const WCHAR *str; + DWORD len; +} match_result_t; + +HRESULT regexp_match(DispatchEx*,const WCHAR*,DWORD,BOOL,match_result_t**,DWORD*); + static inline VARIANT *get_arg(DISPPARAMS *dp, DWORD i) { return dp->rgvarg + dp->cArgs-i-1; diff --git a/dlls/jscript/regexp.c b/dlls/jscript/regexp.c index f6fbec3ec69..e8e9bea3618 100644 --- a/dlls/jscript/regexp.c +++ b/dlls/jscript/regexp.c @@ -3116,6 +3116,79 @@ good: return x; } +static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x) +{ + REMatchState *result; + const WCHAR *cp = x->cp; + const WCHAR *cp2; + UINT j; + + /* + * Have to include the position beyond the last character + * in order to detect end-of-input/line condition. + */ + for (cp2 = cp; cp2 <= gData->cpend; cp2++) { + gData->skipped = cp2 - cp; + x->cp = cp2; + for (j = 0; j < gData->regexp->parenCount; j++) + x->parens[j].index = -1; + result = ExecuteREBytecode(gData, x); + if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY)) + return result; + gData->backTrackSP = gData->backTrackStack; + gData->cursz = 0; + gData->stateStackTop = 0; + cp2 = cp + gData->skipped; + } + return NULL; +} + +#define MIN_BACKTRACK_LIMIT 400000 + +static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *re, size_t length) +{ + REMatchState *result; + UINT i; + + gData->backTrackStackSize = INITIAL_BACKTRACK; + gData->backTrackStack = jsheap_alloc(gData->pool, INITIAL_BACKTRACK); + if (!gData->backTrackStack) + goto bad; + + gData->backTrackSP = gData->backTrackStack; + gData->cursz = 0; + gData->backTrackCount = 0; + gData->backTrackLimit = 0; + + gData->stateStackLimit = INITIAL_STATESTACK; + gData->stateStack = jsheap_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK); + if (!gData->stateStack) + goto bad; + + gData->stateStackTop = 0; + gData->cx = cx; + gData->regexp = re; + gData->ok = TRUE; + + result = jsheap_alloc(gData->pool, offsetof(REMatchState, parens) + re->parenCount * sizeof(RECapture)); + if (!result) + goto bad; + + for (i = 0; i < re->classCount; i++) { + if (!re->classList[i].converted && + !ProcessCharSet(gData, &re->classList[i])) { + return NULL; + } + } + + return result; + +bad: + js_ReportOutOfScriptQuota(cx); + gData->ok = FALSE; + return NULL; +} + static void js_DestroyRegExp(JSRegExp *re) { @@ -3224,6 +3297,84 @@ out: return re; } +HRESULT regexp_match(DispatchEx *dispex, const WCHAR *str, DWORD len, BOOL gflag, match_result_t **match_result, + DWORD *result_cnt) +{ + RegExpInstance *This = (RegExpInstance*)dispex; + match_result_t *ret = NULL; + const WCHAR *cp = str; + REGlobalData gData; + REMatchState *x, *result; + DWORD matchlen; + DWORD i=0, ret_size = 0; + jsheap_t *mark; + size_t length; + HRESULT hres = E_FAIL; + + length = len; + + mark = jsheap_mark(&This->dispex.ctx->tmp_heap); + gData.pool = &This->dispex.ctx->tmp_heap; + + while(1) { + gData.cpbegin = cp; + gData.cpend = str + len; + gData.start = cp-str; + gData.skipped = 0; + + x = InitMatch(NULL, &gData, This->jsregexp, length); + if(!x) { + WARN("InitMatch failed\n"); + break; + } + + x->cp = cp; + result = MatchRegExp(&gData, x); + if(!gData.ok) { + WARN("MatchRegExp failed\n"); + break; + } + + if(!result) { + hres = S_OK; + break; + } + + matchlen = (result->cp-cp) - gData.skipped; + + if(ret) + ret = heap_realloc(ret, (ret_size <<= 1) * sizeof(match_result_t)); + else if(ret_size == i) + ret = heap_alloc((ret_size=4) * sizeof(match_result_t)); + if(!ret) { + hres = E_OUTOFMEMORY; + break; + } + + ret[i].str = result->cp-matchlen; + ret[i].len = matchlen; + + length -= result->cp-cp; + cp = result->cp; + i++; + + if(!gflag && !(This->jsregexp->flags & JSREG_GLOB)) { + hres = S_OK; + break; + } + } + + jsheap_clear(mark); + if(FAILED(hres)) { + heap_free(ret); + return hres; + } + + *match_result = ret; + *result_cnt = i; + return S_OK; +} + static HRESULT RegExp_source(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp, VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) { diff --git a/dlls/jscript/string.c b/dlls/jscript/string.c index 033e906eebb..b310cabec32 100644 --- a/dlls/jscript/string.c +++ b/dlls/jscript/string.c @@ -189,8 +189,77 @@ static HRESULT String_link(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS static HRESULT String_match(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp, VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) { - FIXME("\n"); - return E_NOTIMPL; + StringInstance *This = (StringInstance*)dispex; + match_result_t *match_result; + DispatchEx *array; + VARIANT var, *arg_var; + DWORD match_cnt, i; + HRESULT hres = S_OK; + + TRACE("\n"); + + if(dp->cArgs - dp->cNamedArgs != 1) { + FIXME("unsupported args\n"); + return E_NOTIMPL; + } + + arg_var = get_arg(dp, 0); + switch(V_VT(arg_var)) { + case VT_DISPATCH: { + DispatchEx *regexp; + + regexp = iface_to_jsdisp((IUnknown*)V_DISPATCH(arg_var)); + if(regexp) { + if(regexp->builtin_info->class == JSCLASS_REGEXP) { + hres = regexp_match(regexp, This->str, This->length, FALSE, &match_result, &match_cnt); + jsdisp_release(regexp); + if(FAILED(hres)) + return hres; + break; + } + jsdisp_release(regexp); + } + } + default: + FIXME("implemented only for regexp args\n"); + return E_NOTIMPL; + } + + if(!match_cnt) { + TRACE("no match\n"); + + if(retv) + V_VT(retv) = VT_NULL; + return S_OK; + } + + hres = create_array(dispex->ctx, match_cnt, &array); + if(FAILED(hres)) + return hres; + + V_VT(&var) = VT_BSTR; + + for(i=0; i < match_cnt; i++) { + V_BSTR(&var) = SysAllocStringLen(match_result[i].str, match_result[i].len); + if(!V_BSTR(&var)) { + hres = E_OUTOFMEMORY; + break; + } + + hres = jsdisp_propput_idx(array, i, lcid, &var, ei, NULL/*FIXME*/); + SysFreeString(V_BSTR(&var)); + if(FAILED(hres)) + break; + } + + if(FAILED(hres)) { + jsdisp_release(array); + return hres; + } + + V_VT(retv) = VT_DISPATCH; + V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(array); + return S_OK; } static HRESULT String_replace(DispatchEx *dispex, LCID lcid, WORD flags, DISPPARAMS *dp, -- 2.11.4.GIT