From 0403f34b78ef1f663f0703784e089e8396fb338c Mon Sep 17 00:00:00 2001 From: Nikolay Sivov Date: Sun, 5 May 2013 15:44:06 +0400 Subject: [PATCH] msxml3: Fix ignorable whitespace detection. --- dlls/msxml3/domdoc.c | 14 ++++++++-- dlls/msxml3/tests/domdoc.c | 64 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/dlls/msxml3/domdoc.c b/dlls/msxml3/domdoc.c index 67902bd7a1c..b6079973bb7 100644 --- a/dlls/msxml3/domdoc.c +++ b/dlls/msxml3/domdoc.c @@ -416,10 +416,20 @@ static void sax_characters(void *ctx, const xmlChar *ch, int len) if (ctxt->node) { - /* during domdoc_loadXML() the xmlDocPtr->_private data is not available */ + /* Characters are reported with multiple calls, for example each charref is reported with a separate + call and then parser appends it to a single text node or creates a new node if not created. + It's not possible to tell if it's ignorable data or not just looking at data itself cause it could be + a space chars that separate charrefs or similar case. We only need to skip leading and trailing spaces, + or whole node if it has nothing but space chars, so to detect leading space node->last is checked that + contains text node pointer if already created, trailing spaces are detected directly looking at parser input + for next '<' opening bracket - similar logic is used by libxml2 itself. + + Note that during domdoc_loadXML() the xmlDocPtr->_private data is not available. */ if (!This->properties->preserving && !is_preserving_whitespace(ctxt->node) && - strn_isspace(ch, len)) + strn_isspace(ch, len) && + (!ctxt->node->last || + ((ctxt->node->last && (*ctxt->input->cur) == '<')))) return; } diff --git a/dlls/msxml3/tests/domdoc.c b/dlls/msxml3/tests/domdoc.c index d5f24138ed9..cc79d4bba46 100644 --- a/dlls/msxml3/tests/domdoc.c +++ b/dlls/msxml3/tests/domdoc.c @@ -435,6 +435,13 @@ static const char szExampleXML[] = " \n" "\n"; +static const char charrefsxml[] = +"" +"" +" Text A end " +"AB C " +""; + static const CHAR szNodeTypesXML[] = "" "" @@ -4154,10 +4161,58 @@ static inline void _check_ws_preserved(int line, IXMLDOMDocument2* doc, char con IXMLDOMNode_Release(node2); } -static void test_whitespace(void) +static void test_preserve_charref(IXMLDOMDocument2 *doc, VARIANT_BOOL preserve) { + static const WCHAR b1_p[] = {' ','T','e','x','t',' ','A',' ','e','n','d',' ',0}; + static const WCHAR b1_i[] = {'T','e','x','t',' ','A',' ','e','n','d',0}; + static const WCHAR b2_p[] = {'A','B',' ','C',' ',0}; + static const WCHAR b2_i[] = {'A','B',' ','C',0}; + IXMLDOMNodeList *list; + IXMLDOMElement *root; + IXMLDOMNode *node; + const WCHAR *text; VARIANT_BOOL b; + HRESULT hr; + BSTR s; + + hr = IXMLDOMDocument2_put_preserveWhiteSpace(doc, preserve); + ok(hr == S_OK, "got 0x%08x\n", hr); + + hr = IXMLDOMDocument2_loadXML(doc, _bstr_(charrefsxml), &b); + ok(hr == S_OK, "got 0x%08x\n", hr); + + hr = IXMLDOMDocument2_get_documentElement(doc, &root); + ok(hr == S_OK, "got 0x%08x\n", hr); + + hr = IXMLDOMElement_get_childNodes(root, &list); + ok(hr == S_OK, "got 0x%08x\n", hr); + IXMLDOMElement_Release(root); + + text = preserve == VARIANT_TRUE ? b1_p : b1_i; + hr = IXMLDOMNodeList_get_item(list, 0, &node); + ok(hr == S_OK, "got 0x%08x\n", hr); + hr = IXMLDOMNode_get_text(node, &s); + ok(hr == S_OK, "got 0x%08x\n", hr); + ok(!lstrcmpW(s, text), "0x%x, got %s\n", preserve, wine_dbgstr_w(s)); + SysFreeString(s); + IXMLDOMNode_Release(node); + + text = preserve == VARIANT_TRUE ? b2_p : b2_i; + hr = IXMLDOMNodeList_get_item(list, 1, &node); + ok(hr == S_OK, "got 0x%08x\n", hr); + hr = IXMLDOMNode_get_text(node, &s); + ok(hr == S_OK, "got 0x%08x\n", hr); + ok(!lstrcmpW(s, text), "0x%x, got %s\n", preserve, wine_dbgstr_w(s)); + SysFreeString(s); + IXMLDOMNode_Release(node); + + IXMLDOMNodeList_Release(list); +} + +static void test_whitespace(void) +{ IXMLDOMDocument2 *doc1, *doc2, *doc3, *doc4; + VARIANT_BOOL b; doc1 = create_document(&IID_IXMLDOMDocument2); doc2 = create_document(&IID_IXMLDOMDocument2); @@ -4224,10 +4279,15 @@ static void test_whitespace(void) check_ws_preserved(doc3, NULL); check_ws_ignored(doc4, NULL); - IXMLDOMDocument2_Release(doc1); IXMLDOMDocument2_Release(doc2); IXMLDOMDocument2_Release(doc3); IXMLDOMDocument2_Release(doc4); + + /* text with char references */ + test_preserve_charref(doc1, VARIANT_TRUE); + test_preserve_charref(doc1, VARIANT_FALSE); + IXMLDOMDocument2_Release(doc1); + free_bstrs(); } -- 2.11.4.GIT