user32: Get rid of the unused parameter in the EmptyClipboard driver entry point.
[wine.git] / dlls / xmllite / reader.c
blob2aa9d9533add953629bacbc8e3c287e72fa82f22
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW[] = {'\"',0};
90 static const WCHAR quoteW[] = {'\'',0};
91 static const WCHAR ltW[] = {'<',0};
92 static const WCHAR gtW[] = {'>',0};
93 static const WCHAR commentW[] = {'<','!','-','-',0};
94 static const WCHAR piW[] = {'<','?',0};
96 static const char *debugstr_nodetype(XmlNodeType nodetype)
98 static const char * const type_names[] =
100 "None",
101 "Element",
102 "Attribute",
103 "Text",
104 "CDATA",
107 "ProcessingInstruction",
108 "Comment",
110 "DocumentType",
113 "Whitespace",
115 "EndElement",
117 "XmlDeclaration"
120 if (nodetype > _XmlNodeType_Last)
121 return wine_dbg_sprintf("unknown type=%d", nodetype);
123 return type_names[nodetype];
126 static const char *debugstr_reader_prop(XmlReaderProperty prop)
128 static const char * const prop_names[] =
130 "MultiLanguage",
131 "ConformanceLevel",
132 "RandomAccess",
133 "XmlResolver",
134 "DtdProcessing",
135 "ReadState",
136 "MaxElementDepth",
137 "MaxEntityExpansion"
140 if (prop > _XmlReaderProperty_Last)
141 return wine_dbg_sprintf("unknown property=%d", prop);
143 return prop_names[prop];
146 struct xml_encoding_data
148 const WCHAR *name;
149 xml_encoding enc;
150 UINT cp;
153 static const struct xml_encoding_data xml_encoding_map[] = {
154 { utf16W, XmlEncoding_UTF16, ~0 },
155 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
158 const WCHAR *get_encoding_name(xml_encoding encoding)
160 return xml_encoding_map[encoding].name;
163 typedef struct
165 char *data;
166 UINT cur;
167 unsigned int allocated;
168 unsigned int written;
169 } encoded_buffer;
171 typedef struct input_buffer input_buffer;
173 typedef struct
175 IXmlReaderInput IXmlReaderInput_iface;
176 LONG ref;
177 /* reference passed on IXmlReaderInput creation, is kept when input is created */
178 IUnknown *input;
179 IMalloc *imalloc;
180 xml_encoding encoding;
181 BOOL hint;
182 WCHAR *baseuri;
183 /* stream reference set after SetInput() call from reader,
184 stored as sequential stream, cause currently
185 optimizations possible with IStream aren't implemented */
186 ISequentialStream *stream;
187 input_buffer *buffer;
188 unsigned int pending : 1;
189 } xmlreaderinput;
191 static const struct IUnknownVtbl xmlreaderinputvtbl;
193 /* Structure to hold parsed string of specific length.
195 Reader stores node value as 'start' pointer, on request
196 a null-terminated version of it is allocated.
198 To init a strval variable use reader_init_strval(),
199 to set strval as a reader value use reader_set_strval().
201 typedef struct
203 WCHAR *str; /* allocated null-terminated string */
204 UINT len; /* length in WCHARs, altered after ReadValueChunk */
205 UINT start; /* input position where value starts */
206 } strval;
208 static WCHAR emptyW[] = {0};
209 static const strval strval_empty = { emptyW };
211 struct attribute
213 struct list entry;
214 strval localname;
215 strval value;
218 struct element
220 struct list entry;
221 strval qname;
222 strval localname;
225 typedef struct
227 IXmlReader IXmlReader_iface;
228 LONG ref;
229 xmlreaderinput *input;
230 IMalloc *imalloc;
231 XmlReadState state;
232 XmlReaderInternalState instate;
233 XmlReaderResumeState resumestate;
234 XmlNodeType nodetype;
235 DtdProcessing dtdmode;
236 UINT line, pos; /* reader position in XML stream */
237 struct list attrs; /* attributes list for current node */
238 struct attribute *attr; /* current attribute */
239 UINT attr_count;
240 struct list elements;
241 strval strvalues[StringValue_Last];
242 UINT depth;
243 UINT max_depth;
244 BOOL empty_element;
245 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
246 } xmlreader;
248 struct input_buffer
250 encoded_buffer utf16;
251 encoded_buffer encoded;
252 UINT code_page;
253 xmlreaderinput *input;
256 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
258 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
261 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
263 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
266 /* reader memory allocation functions */
267 static inline void *reader_alloc(xmlreader *reader, size_t len)
269 return m_alloc(reader->imalloc, len);
272 static inline void reader_free(xmlreader *reader, void *mem)
274 m_free(reader->imalloc, mem);
277 /* Just return pointer from offset, no attempt to read more. */
278 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
280 encoded_buffer *buffer = &reader->input->buffer->utf16;
281 return (WCHAR*)buffer->data + offset;
284 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
286 return v->str ? v->str : reader_get_ptr2(reader, v->start);
289 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
291 *dest = *src;
293 if (src->str != strval_empty.str)
295 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
296 if (!dest->str) return E_OUTOFMEMORY;
297 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
298 dest->str[dest->len] = 0;
299 dest->start = 0;
302 return S_OK;
305 /* reader input memory allocation functions */
306 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
308 return m_alloc(input->imalloc, len);
311 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
313 return m_realloc(input->imalloc, mem, len);
316 static inline void readerinput_free(xmlreaderinput *input, void *mem)
318 m_free(input->imalloc, mem);
321 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
323 LPWSTR ret = NULL;
325 if(str) {
326 DWORD size;
328 size = (strlenW(str)+1)*sizeof(WCHAR);
329 ret = readerinput_alloc(input, size);
330 if (ret) memcpy(ret, str, size);
333 return ret;
336 static void reader_clear_attrs(xmlreader *reader)
338 struct attribute *attr, *attr2;
339 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
341 reader_free(reader, attr);
343 list_init(&reader->attrs);
344 reader->attr_count = 0;
345 reader->attr = NULL;
348 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
349 while we are on a node with attributes */
350 static HRESULT reader_add_attr(xmlreader *reader, strval *localname, strval *value)
352 struct attribute *attr;
354 attr = reader_alloc(reader, sizeof(*attr));
355 if (!attr) return E_OUTOFMEMORY;
357 attr->localname = *localname;
358 attr->value = *value;
359 list_add_tail(&reader->attrs, &attr->entry);
360 reader->attr_count++;
362 return S_OK;
365 /* This one frees stored string value if needed */
366 static void reader_free_strvalued(xmlreader *reader, strval *v)
368 if (v->str != strval_empty.str)
370 reader_free(reader, v->str);
371 *v = strval_empty;
375 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
377 v->start = start;
378 v->len = len;
379 v->str = NULL;
382 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
384 return debugstr_wn(reader_get_strptr(reader, v), v->len);
387 /* used to initialize from constant string */
388 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
390 v->start = 0;
391 v->len = len;
392 v->str = str;
395 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
397 reader_free_strvalued(reader, &reader->strvalues[type]);
400 static void reader_free_strvalues(xmlreader *reader)
402 int type;
403 for (type = 0; type < StringValue_Last; type++)
404 reader_free_strvalue(reader, type);
407 /* This helper should only be used to test if strings are the same,
408 it doesn't try to sort. */
409 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
411 if (str1->len != str2->len) return 0;
412 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
415 static void reader_clear_elements(xmlreader *reader)
417 struct element *elem, *elem2;
418 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
420 reader_free_strvalued(reader, &elem->qname);
421 reader_free(reader, elem);
423 list_init(&reader->elements);
424 reader->empty_element = FALSE;
427 static HRESULT reader_inc_depth(xmlreader *reader)
429 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
430 return S_OK;
433 static void reader_dec_depth(xmlreader *reader)
435 if (reader->depth > 1) reader->depth--;
438 static HRESULT reader_push_element(xmlreader *reader, strval *qname, strval *localname)
440 struct element *elem;
441 HRESULT hr;
443 elem = reader_alloc(reader, sizeof(*elem));
444 if (!elem) return E_OUTOFMEMORY;
446 hr = reader_strvaldup(reader, qname, &elem->qname);
447 if (FAILED(hr)) {
448 reader_free(reader, elem);
449 return hr;
452 hr = reader_strvaldup(reader, localname, &elem->localname);
453 if (FAILED(hr))
455 reader_free_strvalued(reader, &elem->qname);
456 reader_free(reader, elem);
457 return hr;
460 if (!list_empty(&reader->elements))
462 hr = reader_inc_depth(reader);
463 if (FAILED(hr)) {
464 reader_free(reader, elem);
465 return hr;
469 list_add_head(&reader->elements, &elem->entry);
470 reader->empty_element = FALSE;
471 return hr;
474 static void reader_pop_element(xmlreader *reader)
476 struct element *elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
478 if (elem)
480 list_remove(&elem->entry);
481 reader_free_strvalued(reader, &elem->qname);
482 reader_free_strvalued(reader, &elem->localname);
483 reader_free(reader, elem);
484 reader_dec_depth(reader);
488 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
489 means node value is to be determined. */
490 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
492 strval *v = &reader->strvalues[type];
494 reader_free_strvalue(reader, type);
495 if (!value)
497 v->str = NULL;
498 v->start = 0;
499 v->len = 0;
500 return;
503 if (value->str == strval_empty.str)
504 *v = *value;
505 else
507 if (type == StringValue_Value)
509 /* defer allocation for value string */
510 v->str = NULL;
511 v->start = value->start;
512 v->len = value->len;
514 else
516 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
517 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
518 v->str[value->len] = 0;
519 v->len = value->len;
524 static inline int is_reader_pending(xmlreader *reader)
526 return reader->input->pending;
529 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
531 const int initial_len = 0x2000;
532 buffer->data = readerinput_alloc(input, initial_len);
533 if (!buffer->data) return E_OUTOFMEMORY;
535 memset(buffer->data, 0, 4);
536 buffer->cur = 0;
537 buffer->allocated = initial_len;
538 buffer->written = 0;
540 return S_OK;
543 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
545 readerinput_free(input, buffer->data);
548 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
550 if (encoding == XmlEncoding_Unknown)
552 FIXME("unsupported encoding %d\n", encoding);
553 return E_NOTIMPL;
556 *cp = xml_encoding_map[encoding].cp;
558 return S_OK;
561 xml_encoding parse_encoding_name(const WCHAR *name, int len)
563 int min, max, n, c;
565 if (!name) return XmlEncoding_Unknown;
567 min = 0;
568 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
570 while (min <= max)
572 n = (min+max)/2;
574 if (len != -1)
575 c = strncmpiW(xml_encoding_map[n].name, name, len);
576 else
577 c = strcmpiW(xml_encoding_map[n].name, name);
578 if (!c)
579 return xml_encoding_map[n].enc;
581 if (c > 0)
582 max = n-1;
583 else
584 min = n+1;
587 return XmlEncoding_Unknown;
590 static HRESULT alloc_input_buffer(xmlreaderinput *input)
592 input_buffer *buffer;
593 HRESULT hr;
595 input->buffer = NULL;
597 buffer = readerinput_alloc(input, sizeof(*buffer));
598 if (!buffer) return E_OUTOFMEMORY;
600 buffer->input = input;
601 buffer->code_page = ~0; /* code page is unknown at this point */
602 hr = init_encoded_buffer(input, &buffer->utf16);
603 if (hr != S_OK) {
604 readerinput_free(input, buffer);
605 return hr;
608 hr = init_encoded_buffer(input, &buffer->encoded);
609 if (hr != S_OK) {
610 free_encoded_buffer(input, &buffer->utf16);
611 readerinput_free(input, buffer);
612 return hr;
615 input->buffer = buffer;
616 return S_OK;
619 static void free_input_buffer(input_buffer *buffer)
621 free_encoded_buffer(buffer->input, &buffer->encoded);
622 free_encoded_buffer(buffer->input, &buffer->utf16);
623 readerinput_free(buffer->input, buffer);
626 static void readerinput_release_stream(xmlreaderinput *readerinput)
628 if (readerinput->stream) {
629 ISequentialStream_Release(readerinput->stream);
630 readerinput->stream = NULL;
634 /* Queries already stored interface for IStream/ISequentialStream.
635 Interface supplied on creation will be overwritten */
636 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
638 HRESULT hr;
640 readerinput_release_stream(readerinput);
641 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
642 if (hr != S_OK)
643 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
645 return hr;
648 /* reads a chunk to raw buffer */
649 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
651 encoded_buffer *buffer = &readerinput->buffer->encoded;
652 /* to make sure aligned length won't exceed allocated length */
653 ULONG len = buffer->allocated - buffer->written - 4;
654 ULONG read;
655 HRESULT hr;
657 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
658 variable width encodings like UTF-8 */
659 len = (len + 3) & ~3;
660 /* try to use allocated space or grow */
661 if (buffer->allocated - buffer->written < len)
663 buffer->allocated *= 2;
664 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
665 len = buffer->allocated - buffer->written;
668 read = 0;
669 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
670 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
671 readerinput->pending = hr == E_PENDING;
672 if (FAILED(hr)) return hr;
673 buffer->written += read;
675 return hr;
678 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
679 static void readerinput_grow(xmlreaderinput *readerinput, int length)
681 encoded_buffer *buffer = &readerinput->buffer->utf16;
683 length *= sizeof(WCHAR);
684 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
685 if (buffer->allocated < buffer->written + length + 4)
687 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
688 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
689 buffer->allocated = grown_size;
693 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
695 static const char startA[] = {'<','?'};
696 static const char commentA[] = {'<','!'};
697 encoded_buffer *buffer = &readerinput->buffer->encoded;
698 unsigned char *ptr = (unsigned char*)buffer->data;
700 return !memcmp(buffer->data, startA, sizeof(startA)) ||
701 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
702 /* test start byte */
703 (ptr[0] == '<' &&
705 (ptr[1] && (ptr[1] <= 0x7f)) ||
706 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
707 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
708 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
712 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
714 encoded_buffer *buffer = &readerinput->buffer->encoded;
715 static const WCHAR startW[] = {'<','?'};
716 static const WCHAR commentW[] = {'<','!'};
717 static const char utf8bom[] = {0xef,0xbb,0xbf};
718 static const char utf16lebom[] = {0xff,0xfe};
720 *enc = XmlEncoding_Unknown;
722 if (buffer->written <= 3)
724 HRESULT hr = readerinput_growraw(readerinput);
725 if (FAILED(hr)) return hr;
726 if (buffer->written <= 3) return MX_E_INPUTEND;
729 /* try start symbols if we have enough data to do that, input buffer should contain
730 first chunk already */
731 if (readerinput_is_utf8(readerinput))
732 *enc = XmlEncoding_UTF8;
733 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
734 !memcmp(buffer->data, commentW, sizeof(commentW)))
735 *enc = XmlEncoding_UTF16;
736 /* try with BOM now */
737 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
739 buffer->cur += sizeof(utf8bom);
740 *enc = XmlEncoding_UTF8;
742 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
744 buffer->cur += sizeof(utf16lebom);
745 *enc = XmlEncoding_UTF16;
748 return S_OK;
751 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
753 encoded_buffer *buffer = &readerinput->buffer->encoded;
754 int len = buffer->written;
756 /* complete single byte char */
757 if (!(buffer->data[len-1] & 0x80)) return len;
759 /* find start byte of multibyte char */
760 while (--len && !(buffer->data[len] & 0xc0))
763 return len;
766 /* Returns byte length of complete char sequence for buffer code page,
767 it's relative to current buffer position which is currently used for BOM handling
768 only. */
769 static int readerinput_get_convlen(xmlreaderinput *readerinput)
771 encoded_buffer *buffer = &readerinput->buffer->encoded;
772 int len;
774 if (readerinput->buffer->code_page == CP_UTF8)
775 len = readerinput_get_utf8_convlen(readerinput);
776 else
777 len = buffer->written;
779 TRACE("%d\n", len - buffer->cur);
780 return len - buffer->cur;
783 /* It's possible that raw buffer has some leftovers from last conversion - some char
784 sequence that doesn't represent a full code point. Length argument should be calculated with
785 readerinput_get_convlen(), if it's -1 it will be calculated here. */
786 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
788 encoded_buffer *buffer = &readerinput->buffer->encoded;
790 if (len == -1)
791 len = readerinput_get_convlen(readerinput);
793 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
794 /* everything below cur is lost too */
795 buffer->written -= len + buffer->cur;
796 /* after this point we don't need cur offset really,
797 it's used only to mark where actual data begins when first chunk is read */
798 buffer->cur = 0;
801 /* note that raw buffer content is kept */
802 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
804 encoded_buffer *src = &readerinput->buffer->encoded;
805 encoded_buffer *dest = &readerinput->buffer->utf16;
806 int len, dest_len;
807 HRESULT hr;
808 WCHAR *ptr;
809 UINT cp;
811 hr = get_code_page(enc, &cp);
812 if (FAILED(hr)) return;
814 readerinput->buffer->code_page = cp;
815 len = readerinput_get_convlen(readerinput);
817 TRACE("switching to cp %d\n", cp);
819 /* just copy in this case */
820 if (enc == XmlEncoding_UTF16)
822 readerinput_grow(readerinput, len);
823 memcpy(dest->data, src->data + src->cur, len);
824 dest->written += len*sizeof(WCHAR);
825 return;
828 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
829 readerinput_grow(readerinput, dest_len);
830 ptr = (WCHAR*)dest->data;
831 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
832 ptr[dest_len] = 0;
833 dest->written += dest_len*sizeof(WCHAR);
836 /* shrinks parsed data a buffer begins with */
837 static void reader_shrink(xmlreader *reader)
839 encoded_buffer *buffer = &reader->input->buffer->utf16;
841 /* avoid to move too often using threshold shrink length */
842 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
844 buffer->written -= buffer->cur*sizeof(WCHAR);
845 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
846 buffer->cur = 0;
847 *(WCHAR*)&buffer->data[buffer->written] = 0;
851 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
852 It won't attempt to shrink but will grow destination buffer if needed */
853 static HRESULT reader_more(xmlreader *reader)
855 xmlreaderinput *readerinput = reader->input;
856 encoded_buffer *src = &readerinput->buffer->encoded;
857 encoded_buffer *dest = &readerinput->buffer->utf16;
858 UINT cp = readerinput->buffer->code_page;
859 int len, dest_len;
860 HRESULT hr;
861 WCHAR *ptr;
863 /* get some raw data from stream first */
864 hr = readerinput_growraw(readerinput);
865 len = readerinput_get_convlen(readerinput);
867 /* just copy for UTF-16 case */
868 if (cp == ~0)
870 readerinput_grow(readerinput, len);
871 memcpy(dest->data + dest->written, src->data + src->cur, len);
872 dest->written += len*sizeof(WCHAR);
873 return hr;
876 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
877 readerinput_grow(readerinput, dest_len);
878 ptr = (WCHAR*)(dest->data + dest->written);
879 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
880 ptr[dest_len] = 0;
881 dest->written += dest_len*sizeof(WCHAR);
882 /* get rid of processed data */
883 readerinput_shrinkraw(readerinput, len);
885 return hr;
888 static inline UINT reader_get_cur(xmlreader *reader)
890 return reader->input->buffer->utf16.cur;
893 static inline WCHAR *reader_get_ptr(xmlreader *reader)
895 encoded_buffer *buffer = &reader->input->buffer->utf16;
896 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
897 if (!*ptr) reader_more(reader);
898 return (WCHAR*)buffer->data + buffer->cur;
901 static int reader_cmp(xmlreader *reader, const WCHAR *str)
903 int i=0;
904 const WCHAR *ptr = reader_get_ptr(reader);
905 while (str[i])
907 if (!ptr[i])
909 reader_more(reader);
910 ptr = reader_get_ptr(reader);
912 if (str[i] != ptr[i])
913 return ptr[i] - str[i];
914 i++;
916 return 0;
919 /* moves cursor n WCHARs forward */
920 static void reader_skipn(xmlreader *reader, int n)
922 encoded_buffer *buffer = &reader->input->buffer->utf16;
923 const WCHAR *ptr = reader_get_ptr(reader);
925 while (*ptr++ && n--)
927 buffer->cur++;
928 reader->pos++;
932 static inline BOOL is_wchar_space(WCHAR ch)
934 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
937 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
938 static int reader_skipspaces(xmlreader *reader)
940 encoded_buffer *buffer = &reader->input->buffer->utf16;
941 const WCHAR *ptr = reader_get_ptr(reader);
942 UINT start = reader_get_cur(reader);
944 while (is_wchar_space(*ptr))
946 if (*ptr == '\r')
947 reader->pos = 0;
948 else if (*ptr == '\n')
950 reader->line++;
951 reader->pos = 0;
953 else
954 reader->pos++;
956 buffer->cur++;
957 ptr = reader_get_ptr(reader);
960 return reader_get_cur(reader) - start;
963 /* [26] VersionNum ::= '1.' [0-9]+ */
964 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
966 static const WCHAR onedotW[] = {'1','.',0};
967 WCHAR *ptr, *ptr2;
968 UINT start;
970 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
972 start = reader_get_cur(reader);
973 /* skip "1." */
974 reader_skipn(reader, 2);
976 ptr2 = ptr = reader_get_ptr(reader);
977 while (*ptr >= '0' && *ptr <= '9')
979 reader_skipn(reader, 1);
980 ptr = reader_get_ptr(reader);
983 if (ptr2 == ptr) return WC_E_DIGIT;
984 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
985 TRACE("version=%s\n", debug_strval(reader, val));
986 return S_OK;
989 /* [25] Eq ::= S? '=' S? */
990 static HRESULT reader_parse_eq(xmlreader *reader)
992 static const WCHAR eqW[] = {'=',0};
993 reader_skipspaces(reader);
994 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
995 /* skip '=' */
996 reader_skipn(reader, 1);
997 reader_skipspaces(reader);
998 return S_OK;
1001 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1002 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1004 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1005 strval val, name;
1006 HRESULT hr;
1008 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1010 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1011 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1012 /* skip 'version' */
1013 reader_skipn(reader, 7);
1015 hr = reader_parse_eq(reader);
1016 if (FAILED(hr)) return hr;
1018 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1019 return WC_E_QUOTE;
1020 /* skip "'"|'"' */
1021 reader_skipn(reader, 1);
1023 hr = reader_parse_versionnum(reader, &val);
1024 if (FAILED(hr)) return hr;
1026 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1027 return WC_E_QUOTE;
1029 /* skip "'"|'"' */
1030 reader_skipn(reader, 1);
1032 return reader_add_attr(reader, &name, &val);
1035 /* ([A-Za-z0-9._] | '-') */
1036 static inline BOOL is_wchar_encname(WCHAR ch)
1038 return ((ch >= 'A' && ch <= 'Z') ||
1039 (ch >= 'a' && ch <= 'z') ||
1040 (ch >= '0' && ch <= '9') ||
1041 (ch == '.') || (ch == '_') ||
1042 (ch == '-'));
1045 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1046 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1048 WCHAR *start = reader_get_ptr(reader), *ptr;
1049 xml_encoding enc;
1050 int len;
1052 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1053 return WC_E_ENCNAME;
1055 val->start = reader_get_cur(reader);
1057 ptr = start;
1058 while (is_wchar_encname(*++ptr))
1061 len = ptr - start;
1062 enc = parse_encoding_name(start, len);
1063 TRACE("encoding name %s\n", debugstr_wn(start, len));
1064 val->str = start;
1065 val->len = len;
1067 if (enc == XmlEncoding_Unknown)
1068 return WC_E_ENCNAME;
1070 /* skip encoding name */
1071 reader_skipn(reader, len);
1072 return S_OK;
1075 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1076 static HRESULT reader_parse_encdecl(xmlreader *reader)
1078 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1079 strval name, val;
1080 HRESULT hr;
1082 if (!reader_skipspaces(reader)) return S_FALSE;
1084 if (reader_cmp(reader, encodingW)) return S_FALSE;
1085 name.str = reader_get_ptr(reader);
1086 name.start = reader_get_cur(reader);
1087 name.len = 8;
1088 /* skip 'encoding' */
1089 reader_skipn(reader, 8);
1091 hr = reader_parse_eq(reader);
1092 if (FAILED(hr)) return hr;
1094 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1095 return WC_E_QUOTE;
1096 /* skip "'"|'"' */
1097 reader_skipn(reader, 1);
1099 hr = reader_parse_encname(reader, &val);
1100 if (FAILED(hr)) return hr;
1102 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1103 return WC_E_QUOTE;
1105 /* skip "'"|'"' */
1106 reader_skipn(reader, 1);
1108 return reader_add_attr(reader, &name, &val);
1111 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1112 static HRESULT reader_parse_sddecl(xmlreader *reader)
1114 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1115 static const WCHAR yesW[] = {'y','e','s',0};
1116 static const WCHAR noW[] = {'n','o',0};
1117 strval name, val;
1118 UINT start;
1119 HRESULT hr;
1121 if (!reader_skipspaces(reader)) return S_FALSE;
1123 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1124 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1125 /* skip 'standalone' */
1126 reader_skipn(reader, 10);
1128 hr = reader_parse_eq(reader);
1129 if (FAILED(hr)) return hr;
1131 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1132 return WC_E_QUOTE;
1133 /* skip "'"|'"' */
1134 reader_skipn(reader, 1);
1136 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1137 return WC_E_XMLDECL;
1139 start = reader_get_cur(reader);
1140 /* skip 'yes'|'no' */
1141 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1142 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1143 TRACE("standalone=%s\n", debug_strval(reader, &val));
1145 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1146 return WC_E_QUOTE;
1147 /* skip "'"|'"' */
1148 reader_skipn(reader, 1);
1150 return reader_add_attr(reader, &name, &val);
1153 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1154 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1156 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1157 static const WCHAR declcloseW[] = {'?','>',0};
1158 HRESULT hr;
1160 /* check if we have "<?xml " */
1161 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1163 reader_skipn(reader, 5);
1164 hr = reader_parse_versioninfo(reader);
1165 if (FAILED(hr))
1166 return hr;
1168 hr = reader_parse_encdecl(reader);
1169 if (FAILED(hr))
1170 return hr;
1172 hr = reader_parse_sddecl(reader);
1173 if (FAILED(hr))
1174 return hr;
1176 reader_skipspaces(reader);
1177 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1178 reader_skipn(reader, 2);
1180 reader_inc_depth(reader);
1181 reader->nodetype = XmlNodeType_XmlDeclaration;
1182 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1183 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1184 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1186 return S_OK;
1189 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1190 static HRESULT reader_parse_comment(xmlreader *reader)
1192 WCHAR *ptr;
1193 UINT start;
1195 if (reader->resumestate == XmlReadResumeState_Comment)
1197 start = reader->resume[XmlReadResume_Body];
1198 ptr = reader_get_ptr(reader);
1200 else
1202 /* skip '<!--' */
1203 reader_skipn(reader, 4);
1204 reader_shrink(reader);
1205 ptr = reader_get_ptr(reader);
1206 start = reader_get_cur(reader);
1207 reader->nodetype = XmlNodeType_Comment;
1208 reader->resume[XmlReadResume_Body] = start;
1209 reader->resumestate = XmlReadResumeState_Comment;
1210 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1211 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1212 reader_set_strvalue(reader, StringValue_Value, NULL);
1215 /* will exit when there's no more data, it won't attempt to
1216 read more from stream */
1217 while (*ptr)
1219 if (ptr[0] == '-')
1221 if (ptr[1] == '-')
1223 if (ptr[2] == '>')
1225 strval value;
1227 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1228 TRACE("%s\n", debug_strval(reader, &value));
1230 /* skip rest of markup '->' */
1231 reader_skipn(reader, 3);
1233 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1234 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1235 reader_set_strvalue(reader, StringValue_Value, &value);
1236 reader->resume[XmlReadResume_Body] = 0;
1237 reader->resumestate = XmlReadResumeState_Initial;
1238 return S_OK;
1240 else
1241 return WC_E_COMMENT;
1245 reader_skipn(reader, 1);
1246 ptr++;
1249 return S_OK;
1252 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1253 static inline BOOL is_char(WCHAR ch)
1255 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1256 (ch >= 0x20 && ch <= 0xd7ff) ||
1257 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1258 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1259 (ch >= 0xe000 && ch <= 0xfffd);
1262 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1263 static inline BOOL is_pubchar(WCHAR ch)
1265 return (ch == ' ') ||
1266 (ch >= 'a' && ch <= 'z') ||
1267 (ch >= 'A' && ch <= 'Z') ||
1268 (ch >= '0' && ch <= '9') ||
1269 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1270 (ch == '=') || (ch == '?') ||
1271 (ch == '@') || (ch == '!') ||
1272 (ch >= '#' && ch <= '%') || /* #$% */
1273 (ch == '_') || (ch == '\r') || (ch == '\n');
1276 static inline BOOL is_namestartchar(WCHAR ch)
1278 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1279 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1280 (ch >= 0xc0 && ch <= 0xd6) ||
1281 (ch >= 0xd8 && ch <= 0xf6) ||
1282 (ch >= 0xf8 && ch <= 0x2ff) ||
1283 (ch >= 0x370 && ch <= 0x37d) ||
1284 (ch >= 0x37f && ch <= 0x1fff) ||
1285 (ch >= 0x200c && ch <= 0x200d) ||
1286 (ch >= 0x2070 && ch <= 0x218f) ||
1287 (ch >= 0x2c00 && ch <= 0x2fef) ||
1288 (ch >= 0x3001 && ch <= 0xd7ff) ||
1289 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1290 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1291 (ch >= 0xf900 && ch <= 0xfdcf) ||
1292 (ch >= 0xfdf0 && ch <= 0xfffd);
1295 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1296 static inline BOOL is_ncnamechar(WCHAR ch)
1298 return (ch >= 'A' && ch <= 'Z') ||
1299 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1300 (ch == '-') || (ch == '.') ||
1301 (ch >= '0' && ch <= '9') ||
1302 (ch == 0xb7) ||
1303 (ch >= 0xc0 && ch <= 0xd6) ||
1304 (ch >= 0xd8 && ch <= 0xf6) ||
1305 (ch >= 0xf8 && ch <= 0x2ff) ||
1306 (ch >= 0x300 && ch <= 0x36f) ||
1307 (ch >= 0x370 && ch <= 0x37d) ||
1308 (ch >= 0x37f && ch <= 0x1fff) ||
1309 (ch >= 0x200c && ch <= 0x200d) ||
1310 (ch >= 0x203f && ch <= 0x2040) ||
1311 (ch >= 0x2070 && ch <= 0x218f) ||
1312 (ch >= 0x2c00 && ch <= 0x2fef) ||
1313 (ch >= 0x3001 && ch <= 0xd7ff) ||
1314 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1315 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1316 (ch >= 0xf900 && ch <= 0xfdcf) ||
1317 (ch >= 0xfdf0 && ch <= 0xfffd);
1320 static inline BOOL is_namechar(WCHAR ch)
1322 return (ch == ':') || is_ncnamechar(ch);
1325 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1327 /* When we're on attribute always return attribute type, container node type is kept.
1328 Note that container is not necessarily an element, and attribute doesn't mean it's
1329 an attribute in XML spec terms. */
1330 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1333 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1334 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1335 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1336 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1337 [5] Name ::= NameStartChar (NameChar)* */
1338 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1340 WCHAR *ptr;
1341 UINT start;
1343 if (reader->resume[XmlReadResume_Name])
1345 start = reader->resume[XmlReadResume_Name];
1346 ptr = reader_get_ptr(reader);
1348 else
1350 ptr = reader_get_ptr(reader);
1351 start = reader_get_cur(reader);
1352 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1355 while (is_namechar(*ptr))
1357 reader_skipn(reader, 1);
1358 ptr = reader_get_ptr(reader);
1361 if (is_reader_pending(reader))
1363 reader->resume[XmlReadResume_Name] = start;
1364 return E_PENDING;
1366 else
1367 reader->resume[XmlReadResume_Name] = 0;
1369 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1370 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1372 return S_OK;
1375 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1376 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1378 static const WCHAR xmlW[] = {'x','m','l'};
1379 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1380 strval name;
1381 WCHAR *ptr;
1382 HRESULT hr;
1383 UINT i;
1385 hr = reader_parse_name(reader, &name);
1386 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1388 /* now that we got name check for illegal content */
1389 if (strval_eq(reader, &name, &xmlval))
1390 return WC_E_LEADINGXML;
1392 /* PITarget can't be a qualified name */
1393 ptr = reader_get_strptr(reader, &name);
1394 for (i = 0; i < name.len; i++)
1395 if (ptr[i] == ':')
1396 return i ? NC_E_NAMECOLON : WC_E_PI;
1398 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1399 *target = name;
1400 return S_OK;
1403 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1404 static HRESULT reader_parse_pi(xmlreader *reader)
1406 strval target;
1407 WCHAR *ptr;
1408 UINT start;
1409 HRESULT hr;
1411 switch (reader->resumestate)
1413 case XmlReadResumeState_Initial:
1414 /* skip '<?' */
1415 reader_skipn(reader, 2);
1416 reader_shrink(reader);
1417 reader->resumestate = XmlReadResumeState_PITarget;
1418 case XmlReadResumeState_PITarget:
1419 hr = reader_parse_pitarget(reader, &target);
1420 if (FAILED(hr)) return hr;
1421 reader_set_strvalue(reader, StringValue_LocalName, &target);
1422 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1423 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1424 reader->resumestate = XmlReadResumeState_PIBody;
1425 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1426 default:
1430 start = reader->resume[XmlReadResume_Body];
1431 ptr = reader_get_ptr(reader);
1432 while (*ptr)
1434 if (ptr[0] == '?')
1436 if (ptr[1] == '>')
1438 UINT cur = reader_get_cur(reader);
1439 strval value;
1441 /* strip all leading whitespace chars */
1442 while (start < cur)
1444 ptr = reader_get_ptr2(reader, start);
1445 if (!is_wchar_space(*ptr)) break;
1446 start++;
1449 reader_init_strvalue(start, cur-start, &value);
1451 /* skip '?>' */
1452 reader_skipn(reader, 2);
1453 TRACE("%s\n", debug_strval(reader, &value));
1454 reader->nodetype = XmlNodeType_ProcessingInstruction;
1455 reader->resumestate = XmlReadResumeState_Initial;
1456 reader->resume[XmlReadResume_Body] = 0;
1457 reader_set_strvalue(reader, StringValue_Value, &value);
1458 return S_OK;
1462 reader_skipn(reader, 1);
1463 ptr = reader_get_ptr(reader);
1466 return S_OK;
1469 /* This one is used to parse significant whitespace nodes, like in Misc production */
1470 static HRESULT reader_parse_whitespace(xmlreader *reader)
1472 switch (reader->resumestate)
1474 case XmlReadResumeState_Initial:
1475 reader_shrink(reader);
1476 reader->resumestate = XmlReadResumeState_Whitespace;
1477 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1478 reader->nodetype = XmlNodeType_Whitespace;
1479 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1480 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1481 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1482 /* fallthrough */
1483 case XmlReadResumeState_Whitespace:
1485 strval value;
1486 UINT start;
1488 reader_skipspaces(reader);
1489 if (is_reader_pending(reader)) return S_OK;
1491 start = reader->resume[XmlReadResume_Body];
1492 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1493 reader_set_strvalue(reader, StringValue_Value, &value);
1494 TRACE("%s\n", debug_strval(reader, &value));
1495 reader->resumestate = XmlReadResumeState_Initial;
1497 default:
1501 return S_OK;
1504 /* [27] Misc ::= Comment | PI | S */
1505 static HRESULT reader_parse_misc(xmlreader *reader)
1507 HRESULT hr = S_FALSE;
1509 if (reader->resumestate != XmlReadResumeState_Initial)
1511 hr = reader_more(reader);
1512 if (FAILED(hr)) return hr;
1514 /* finish current node */
1515 switch (reader->resumestate)
1517 case XmlReadResumeState_PITarget:
1518 case XmlReadResumeState_PIBody:
1519 return reader_parse_pi(reader);
1520 case XmlReadResumeState_Comment:
1521 return reader_parse_comment(reader);
1522 case XmlReadResumeState_Whitespace:
1523 return reader_parse_whitespace(reader);
1524 default:
1525 ERR("unknown resume state %d\n", reader->resumestate);
1529 while (1)
1531 const WCHAR *cur = reader_get_ptr(reader);
1533 if (is_wchar_space(*cur))
1534 hr = reader_parse_whitespace(reader);
1535 else if (!reader_cmp(reader, commentW))
1536 hr = reader_parse_comment(reader);
1537 else if (!reader_cmp(reader, piW))
1538 hr = reader_parse_pi(reader);
1539 else
1540 break;
1542 if (hr != S_FALSE) return hr;
1545 return hr;
1548 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1549 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1551 WCHAR *cur = reader_get_ptr(reader), quote;
1552 UINT start;
1554 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1556 quote = *cur;
1557 reader_skipn(reader, 1);
1559 cur = reader_get_ptr(reader);
1560 start = reader_get_cur(reader);
1561 while (is_char(*cur) && *cur != quote)
1563 reader_skipn(reader, 1);
1564 cur = reader_get_ptr(reader);
1566 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1567 if (*cur == quote) reader_skipn(reader, 1);
1569 TRACE("%s\n", debug_strval(reader, literal));
1570 return S_OK;
1573 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1574 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1575 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1577 WCHAR *cur = reader_get_ptr(reader), quote;
1578 UINT start;
1580 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1582 quote = *cur;
1583 reader_skipn(reader, 1);
1585 start = reader_get_cur(reader);
1586 cur = reader_get_ptr(reader);
1587 while (is_pubchar(*cur) && *cur != quote)
1589 reader_skipn(reader, 1);
1590 cur = reader_get_ptr(reader);
1593 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1594 TRACE("%s\n", debug_strval(reader, literal));
1595 return S_OK;
1598 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1599 static HRESULT reader_parse_externalid(xmlreader *reader)
1601 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1602 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1603 strval name;
1604 HRESULT hr;
1605 int cnt;
1607 if (reader_cmp(reader, systemW))
1609 if (reader_cmp(reader, publicW))
1610 return S_FALSE;
1611 else
1613 strval pub;
1615 /* public id */
1616 reader_skipn(reader, 6);
1617 cnt = reader_skipspaces(reader);
1618 if (!cnt) return WC_E_WHITESPACE;
1620 hr = reader_parse_pub_literal(reader, &pub);
1621 if (FAILED(hr)) return hr;
1623 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1624 return reader_add_attr(reader, &name, &pub);
1627 else
1629 strval sys;
1631 /* system id */
1632 reader_skipn(reader, 6);
1633 cnt = reader_skipspaces(reader);
1634 if (!cnt) return WC_E_WHITESPACE;
1636 hr = reader_parse_sys_literal(reader, &sys);
1637 if (FAILED(hr)) return hr;
1639 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1640 return reader_add_attr(reader, &name, &sys);
1643 return hr;
1646 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1647 static HRESULT reader_parse_dtd(xmlreader *reader)
1649 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1650 strval name;
1651 WCHAR *cur;
1652 HRESULT hr;
1654 /* check if we have "<!DOCTYPE" */
1655 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1656 reader_shrink(reader);
1658 /* DTD processing is not allowed by default */
1659 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1661 reader_skipn(reader, 9);
1662 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1664 /* name */
1665 hr = reader_parse_name(reader, &name);
1666 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1668 reader_skipspaces(reader);
1670 hr = reader_parse_externalid(reader);
1671 if (FAILED(hr)) return hr;
1673 reader_skipspaces(reader);
1675 cur = reader_get_ptr(reader);
1676 if (*cur != '>')
1678 FIXME("internal subset parsing not implemented\n");
1679 return E_NOTIMPL;
1682 /* skip '>' */
1683 reader_skipn(reader, 1);
1685 reader->nodetype = XmlNodeType_DocumentType;
1686 reader_set_strvalue(reader, StringValue_LocalName, &name);
1687 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1689 return S_OK;
1692 /* [11 NS] LocalPart ::= NCName */
1693 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1695 WCHAR *ptr;
1696 UINT start;
1698 if (reader->resume[XmlReadResume_Local])
1700 start = reader->resume[XmlReadResume_Local];
1701 ptr = reader_get_ptr(reader);
1703 else
1705 ptr = reader_get_ptr(reader);
1706 start = reader_get_cur(reader);
1709 while (is_ncnamechar(*ptr))
1711 reader_skipn(reader, 1);
1712 ptr = reader_get_ptr(reader);
1715 if (is_reader_pending(reader))
1717 reader->resume[XmlReadResume_Local] = start;
1718 return E_PENDING;
1720 else
1721 reader->resume[XmlReadResume_Local] = 0;
1723 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1725 return S_OK;
1728 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1729 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1730 [9 NS] UnprefixedName ::= LocalPart
1731 [10 NS] Prefix ::= NCName */
1732 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1734 WCHAR *ptr;
1735 UINT start;
1736 HRESULT hr;
1738 if (reader->resume[XmlReadResume_Name])
1740 start = reader->resume[XmlReadResume_Name];
1741 ptr = reader_get_ptr(reader);
1743 else
1745 ptr = reader_get_ptr(reader);
1746 start = reader_get_cur(reader);
1747 reader->resume[XmlReadResume_Name] = start;
1748 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1751 if (reader->resume[XmlReadResume_Local])
1753 hr = reader_parse_local(reader, local);
1754 if (FAILED(hr)) return hr;
1756 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1757 local->start - reader->resume[XmlReadResume_Name] - 1,
1758 prefix);
1760 else
1762 /* skip prefix part */
1763 while (is_ncnamechar(*ptr))
1765 reader_skipn(reader, 1);
1766 ptr = reader_get_ptr(reader);
1769 if (is_reader_pending(reader)) return E_PENDING;
1771 /* got a qualified name */
1772 if (*ptr == ':')
1774 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1776 /* skip ':' */
1777 reader_skipn(reader, 1);
1778 hr = reader_parse_local(reader, local);
1779 if (FAILED(hr)) return hr;
1781 else
1783 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1784 reader_init_strvalue(0, 0, prefix);
1788 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1790 if (prefix->len)
1791 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1792 else
1793 TRACE("ncname %s\n", debug_strval(reader, local));
1795 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1796 /* count ':' too */
1797 (prefix->len ? prefix->len + 1 : 0) + local->len,
1798 qname);
1800 reader->resume[XmlReadResume_Name] = 0;
1801 reader->resume[XmlReadResume_Local] = 0;
1803 return S_OK;
1806 /* Applies normalization rules to a single char, used for attribute values.
1808 Rules include 2 steps:
1810 1) replacing \r\n with a single \n;
1811 2) replacing all whitespace chars with ' '.
1814 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1816 encoded_buffer *buffer = &reader->input->buffer->utf16;
1818 if (!is_wchar_space(*ptr)) return;
1820 if (*ptr == '\r' && *(ptr+1) == '\n')
1822 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1823 memmove(ptr+1, ptr+2, len);
1825 *ptr = ' ';
1828 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1830 static const WCHAR entltW[] = {'l','t'};
1831 static const WCHAR entgtW[] = {'g','t'};
1832 static const WCHAR entampW[] = {'a','m','p'};
1833 static const WCHAR entaposW[] = {'a','p','o','s'};
1834 static const WCHAR entquotW[] = {'q','u','o','t'};
1835 static const strval lt = { (WCHAR*)entltW, 2 };
1836 static const strval gt = { (WCHAR*)entgtW, 2 };
1837 static const strval amp = { (WCHAR*)entampW, 3 };
1838 static const strval apos = { (WCHAR*)entaposW, 4 };
1839 static const strval quot = { (WCHAR*)entquotW, 4 };
1840 WCHAR *str = reader_get_strptr(reader, name);
1842 switch (*str)
1844 case 'l':
1845 if (strval_eq(reader, name, &lt)) return '<';
1846 break;
1847 case 'g':
1848 if (strval_eq(reader, name, &gt)) return '>';
1849 break;
1850 case 'a':
1851 if (strval_eq(reader, name, &amp))
1852 return '&';
1853 else if (strval_eq(reader, name, &apos))
1854 return '\'';
1855 break;
1856 case 'q':
1857 if (strval_eq(reader, name, &quot)) return '\"';
1858 break;
1859 default:
1863 return 0;
1866 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1867 [67] Reference ::= EntityRef | CharRef
1868 [68] EntityRef ::= '&' Name ';' */
1869 static HRESULT reader_parse_reference(xmlreader *reader)
1871 encoded_buffer *buffer = &reader->input->buffer->utf16;
1872 WCHAR *start = reader_get_ptr(reader), *ptr;
1873 UINT cur = reader_get_cur(reader);
1874 WCHAR ch = 0;
1875 int len;
1877 /* skip '&' */
1878 reader_skipn(reader, 1);
1879 ptr = reader_get_ptr(reader);
1881 if (*ptr == '#')
1883 reader_skipn(reader, 1);
1884 ptr = reader_get_ptr(reader);
1886 /* hex char or decimal */
1887 if (*ptr == 'x')
1889 reader_skipn(reader, 1);
1890 ptr = reader_get_ptr(reader);
1892 while (*ptr != ';')
1894 if ((*ptr >= '0' && *ptr <= '9'))
1895 ch = ch*16 + *ptr - '0';
1896 else if ((*ptr >= 'a' && *ptr <= 'f'))
1897 ch = ch*16 + *ptr - 'a' + 10;
1898 else if ((*ptr >= 'A' && *ptr <= 'F'))
1899 ch = ch*16 + *ptr - 'A' + 10;
1900 else
1901 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
1902 reader_skipn(reader, 1);
1903 ptr = reader_get_ptr(reader);
1906 else
1908 while (*ptr != ';')
1910 if ((*ptr >= '0' && *ptr <= '9'))
1912 ch = ch*10 + *ptr - '0';
1913 reader_skipn(reader, 1);
1914 ptr = reader_get_ptr(reader);
1916 else
1917 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
1921 if (!is_char(ch)) return WC_E_XMLCHARACTER;
1923 /* normalize */
1924 if (is_wchar_space(ch)) ch = ' ';
1926 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1927 memmove(start+1, ptr+1, len);
1928 buffer->cur = cur + 1;
1930 *start = ch;
1932 else
1934 strval name;
1935 HRESULT hr;
1937 hr = reader_parse_name(reader, &name);
1938 if (FAILED(hr)) return hr;
1940 ptr = reader_get_ptr(reader);
1941 if (*ptr != ';') return WC_E_SEMICOLON;
1943 /* predefined entities resolve to a single character */
1944 ch = get_predefined_entity(reader, &name);
1945 if (ch)
1947 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1948 memmove(start+1, ptr+1, len);
1949 buffer->cur = cur + 1;
1951 *start = ch;
1953 else
1955 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
1956 return WC_E_UNDECLAREDENTITY;
1961 return S_OK;
1964 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1965 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
1967 WCHAR *ptr, quote;
1968 UINT start;
1970 ptr = reader_get_ptr(reader);
1972 /* skip opening quote */
1973 quote = *ptr;
1974 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
1975 reader_skipn(reader, 1);
1977 ptr = reader_get_ptr(reader);
1978 start = reader_get_cur(reader);
1979 while (*ptr)
1981 if (*ptr == '<') return WC_E_LESSTHAN;
1983 if (*ptr == quote)
1985 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
1986 /* skip closing quote */
1987 reader_skipn(reader, 1);
1988 return S_OK;
1991 if (*ptr == '&')
1993 HRESULT hr = reader_parse_reference(reader);
1994 if (FAILED(hr)) return hr;
1996 else
1998 reader_normalize_space(reader, ptr);
1999 reader_skipn(reader, 1);
2001 ptr = reader_get_ptr(reader);
2004 return WC_E_QUOTE;
2007 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2008 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2009 [3 NS] DefaultAttName ::= 'xmlns'
2010 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2011 static HRESULT reader_parse_attribute(xmlreader *reader)
2013 static const WCHAR xmlnsW[] = {'x','m','l','n','s',0};
2014 strval prefix, local, qname, xmlns, value;
2015 HRESULT hr;
2017 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2018 if (FAILED(hr)) return hr;
2020 reader_init_cstrvalue((WCHAR*)xmlnsW, 5, &xmlns);
2022 if (strval_eq(reader, &prefix, &xmlns))
2024 FIXME("namespace definitions not supported\n");
2025 return E_NOTIMPL;
2028 if (strval_eq(reader, &qname, &xmlns))
2029 FIXME("default namespace definitions not supported\n");
2031 hr = reader_parse_eq(reader);
2032 if (FAILED(hr)) return hr;
2034 hr = reader_parse_attvalue(reader, &value);
2035 if (FAILED(hr)) return hr;
2037 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2038 return reader_add_attr(reader, &local, &value);
2041 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2042 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2043 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2045 HRESULT hr;
2047 hr = reader_parse_qname(reader, prefix, local, qname);
2048 if (FAILED(hr)) return hr;
2050 while (1)
2052 static const WCHAR endW[] = {'/','>',0};
2054 reader_skipspaces(reader);
2056 /* empty element */
2057 if ((*empty = !reader_cmp(reader, endW)))
2059 /* skip '/>' */
2060 reader_skipn(reader, 2);
2061 reader->empty_element = TRUE;
2062 return S_OK;
2065 /* got a start tag */
2066 if (!reader_cmp(reader, gtW))
2068 /* skip '>' */
2069 reader_skipn(reader, 1);
2070 return reader_push_element(reader, qname, local);
2073 hr = reader_parse_attribute(reader);
2074 if (FAILED(hr)) return hr;
2077 return S_OK;
2080 /* [39] element ::= EmptyElemTag | STag content ETag */
2081 static HRESULT reader_parse_element(xmlreader *reader)
2083 HRESULT hr;
2085 switch (reader->resumestate)
2087 case XmlReadResumeState_Initial:
2088 /* check if we are really on element */
2089 if (reader_cmp(reader, ltW)) return S_FALSE;
2091 /* skip '<' */
2092 reader_skipn(reader, 1);
2094 reader_shrink(reader);
2095 reader->resumestate = XmlReadResumeState_STag;
2096 case XmlReadResumeState_STag:
2098 strval qname, prefix, local;
2099 int empty = 0;
2101 /* this handles empty elements too */
2102 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2103 if (FAILED(hr)) return hr;
2105 /* FIXME: need to check for defined namespace to reject invalid prefix,
2106 currently reject all prefixes */
2107 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
2109 /* if we got empty element and stack is empty go straight to Misc */
2110 if (empty && list_empty(&reader->elements))
2111 reader->instate = XmlReadInState_MiscEnd;
2112 else
2113 reader->instate = XmlReadInState_Content;
2115 reader->nodetype = XmlNodeType_Element;
2116 reader->resumestate = XmlReadResumeState_Initial;
2117 reader_set_strvalue(reader, StringValue_LocalName, &local);
2118 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2119 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2120 break;
2122 default:
2123 hr = E_FAIL;
2126 return hr;
2129 /* [13 NS] ETag ::= '</' QName S? '>' */
2130 static HRESULT reader_parse_endtag(xmlreader *reader)
2132 strval prefix, local, qname;
2133 struct element *elem;
2134 HRESULT hr;
2136 /* skip '</' */
2137 reader_skipn(reader, 2);
2139 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2140 if (FAILED(hr)) return hr;
2142 reader_skipspaces(reader);
2144 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2146 /* skip '>' */
2147 reader_skipn(reader, 1);
2149 /* Element stack should never be empty at this point, cause we shouldn't get to
2150 content parsing if it's empty. */
2151 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2152 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2154 reader_pop_element(reader);
2156 /* It was a root element, the rest is expected as Misc */
2157 if (list_empty(&reader->elements))
2158 reader->instate = XmlReadInState_MiscEnd;
2160 reader->nodetype = XmlNodeType_EndElement;
2161 reader_set_strvalue(reader, StringValue_LocalName, &local);
2162 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2164 return S_OK;
2167 /* [18] CDSect ::= CDStart CData CDEnd
2168 [19] CDStart ::= '<![CDATA['
2169 [20] CData ::= (Char* - (Char* ']]>' Char*))
2170 [21] CDEnd ::= ']]>' */
2171 static HRESULT reader_parse_cdata(xmlreader *reader)
2173 WCHAR *ptr;
2174 UINT start;
2176 if (reader->resumestate == XmlReadResumeState_CDATA)
2178 start = reader->resume[XmlReadResume_Body];
2179 ptr = reader_get_ptr(reader);
2181 else
2183 /* skip markup '<![CDATA[' */
2184 reader_skipn(reader, 9);
2185 reader_shrink(reader);
2186 ptr = reader_get_ptr(reader);
2187 start = reader_get_cur(reader);
2188 reader->nodetype = XmlNodeType_CDATA;
2189 reader->resume[XmlReadResume_Body] = start;
2190 reader->resumestate = XmlReadResumeState_CDATA;
2191 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2192 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2193 reader_set_strvalue(reader, StringValue_Value, NULL);
2196 while (*ptr)
2198 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2200 strval value;
2202 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2204 /* skip ']]>' */
2205 reader_skipn(reader, 3);
2206 TRACE("%s\n", debug_strval(reader, &value));
2208 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2209 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2210 reader_set_strvalue(reader, StringValue_Value, &value);
2211 reader->resume[XmlReadResume_Body] = 0;
2212 reader->resumestate = XmlReadResumeState_Initial;
2213 return S_OK;
2215 else
2217 /* Value normalization is not fully implemented, rules are:
2219 - single '\r' -> '\n';
2220 - sequence '\r\n' -> '\n', in this case value length changes;
2222 if (*ptr == '\r') *ptr = '\n';
2223 reader_skipn(reader, 1);
2224 ptr++;
2228 return S_OK;
2231 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2232 static HRESULT reader_parse_chardata(xmlreader *reader)
2234 WCHAR *ptr;
2235 UINT start;
2237 if (reader->resumestate == XmlReadResumeState_CharData)
2239 start = reader->resume[XmlReadResume_Body];
2240 ptr = reader_get_ptr(reader);
2242 else
2244 reader_shrink(reader);
2245 ptr = reader_get_ptr(reader);
2246 start = reader_get_cur(reader);
2247 /* There's no text */
2248 if (!*ptr || *ptr == '<') return S_OK;
2249 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2250 reader->resume[XmlReadResume_Body] = start;
2251 reader->resumestate = XmlReadResumeState_CharData;
2252 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2253 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2254 reader_set_strvalue(reader, StringValue_Value, NULL);
2257 while (*ptr)
2259 /* CDATA closing sequence ']]>' is not allowed */
2260 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2261 return WC_E_CDSECTEND;
2263 /* Found next markup part */
2264 if (ptr[0] == '<')
2266 strval value;
2268 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2269 reader_set_strvalue(reader, StringValue_Value, &value);
2270 reader->resume[XmlReadResume_Body] = 0;
2271 reader->resumestate = XmlReadResumeState_Initial;
2272 return S_OK;
2275 reader_skipn(reader, 1);
2277 /* this covers a case when text has leading whitespace chars */
2278 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2279 ptr++;
2282 return S_OK;
2285 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2286 static HRESULT reader_parse_content(xmlreader *reader)
2288 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2289 static const WCHAR etagW[] = {'<','/',0};
2290 static const WCHAR ampW[] = {'&',0};
2292 if (reader->resumestate != XmlReadResumeState_Initial)
2294 switch (reader->resumestate)
2296 case XmlReadResumeState_CDATA:
2297 return reader_parse_cdata(reader);
2298 case XmlReadResumeState_Comment:
2299 return reader_parse_comment(reader);
2300 case XmlReadResumeState_PIBody:
2301 case XmlReadResumeState_PITarget:
2302 return reader_parse_pi(reader);
2303 case XmlReadResumeState_CharData:
2304 return reader_parse_chardata(reader);
2305 default:
2306 ERR("unknown resume state %d\n", reader->resumestate);
2310 reader_shrink(reader);
2312 /* handle end tag here, it indicates end of content as well */
2313 if (!reader_cmp(reader, etagW))
2314 return reader_parse_endtag(reader);
2316 if (!reader_cmp(reader, commentW))
2317 return reader_parse_comment(reader);
2319 if (!reader_cmp(reader, piW))
2320 return reader_parse_pi(reader);
2322 if (!reader_cmp(reader, cdstartW))
2323 return reader_parse_cdata(reader);
2325 if (!reader_cmp(reader, ampW))
2326 return reader_parse_reference(reader);
2328 if (!reader_cmp(reader, ltW))
2329 return reader_parse_element(reader);
2331 /* what's left must be CharData */
2332 return reader_parse_chardata(reader);
2335 static HRESULT reader_parse_nextnode(xmlreader *reader)
2337 HRESULT hr;
2339 if (!is_reader_pending(reader))
2340 reader_clear_attrs(reader);
2342 while (1)
2344 switch (reader->instate)
2346 /* if it's a first call for a new input we need to detect stream encoding */
2347 case XmlReadInState_Initial:
2349 xml_encoding enc;
2351 hr = readerinput_growraw(reader->input);
2352 if (FAILED(hr)) return hr;
2354 /* try to detect encoding by BOM or data and set input code page */
2355 hr = readerinput_detectencoding(reader->input, &enc);
2356 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2357 if (FAILED(hr)) return hr;
2359 /* always switch first time cause we have to put something in */
2360 readerinput_switchencoding(reader->input, enc);
2362 /* parse xml declaration */
2363 hr = reader_parse_xmldecl(reader);
2364 if (FAILED(hr)) return hr;
2366 readerinput_shrinkraw(reader->input, -1);
2367 reader->instate = XmlReadInState_Misc_DTD;
2368 if (hr == S_OK) return hr;
2370 break;
2371 case XmlReadInState_Misc_DTD:
2372 hr = reader_parse_misc(reader);
2373 if (FAILED(hr)) return hr;
2375 if (hr == S_FALSE)
2376 reader->instate = XmlReadInState_DTD;
2377 else
2378 return hr;
2379 break;
2380 case XmlReadInState_DTD:
2381 hr = reader_parse_dtd(reader);
2382 if (FAILED(hr)) return hr;
2384 if (hr == S_OK)
2386 reader->instate = XmlReadInState_DTD_Misc;
2387 return hr;
2389 else
2390 reader->instate = XmlReadInState_Element;
2391 break;
2392 case XmlReadInState_DTD_Misc:
2393 hr = reader_parse_misc(reader);
2394 if (FAILED(hr)) return hr;
2396 if (hr == S_FALSE)
2397 reader->instate = XmlReadInState_Element;
2398 else
2399 return hr;
2400 break;
2401 case XmlReadInState_Element:
2402 return reader_parse_element(reader);
2403 case XmlReadInState_Content:
2404 return reader_parse_content(reader);
2405 case XmlReadInState_MiscEnd:
2406 hr = reader_parse_misc(reader);
2407 if (FAILED(hr)) return hr;
2409 if (hr == S_FALSE)
2410 reader->instate = XmlReadInState_Eof;
2411 return hr;
2412 case XmlReadInState_Eof:
2413 return S_FALSE;
2414 default:
2415 FIXME("internal state %d not handled\n", reader->instate);
2416 return E_NOTIMPL;
2420 return E_NOTIMPL;
2423 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2425 xmlreader *This = impl_from_IXmlReader(iface);
2427 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2429 if (IsEqualGUID(riid, &IID_IUnknown) ||
2430 IsEqualGUID(riid, &IID_IXmlReader))
2432 *ppvObject = iface;
2434 else
2436 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2437 *ppvObject = NULL;
2438 return E_NOINTERFACE;
2441 IXmlReader_AddRef(iface);
2443 return S_OK;
2446 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2448 xmlreader *This = impl_from_IXmlReader(iface);
2449 ULONG ref = InterlockedIncrement(&This->ref);
2450 TRACE("(%p)->(%d)\n", This, ref);
2451 return ref;
2454 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2456 xmlreader *This = impl_from_IXmlReader(iface);
2457 LONG ref = InterlockedDecrement(&This->ref);
2459 TRACE("(%p)->(%d)\n", This, ref);
2461 if (ref == 0)
2463 IMalloc *imalloc = This->imalloc;
2464 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2465 reader_clear_attrs(This);
2466 reader_clear_elements(This);
2467 reader_free_strvalues(This);
2468 reader_free(This, This);
2469 if (imalloc) IMalloc_Release(imalloc);
2472 return ref;
2475 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2477 xmlreader *This = impl_from_IXmlReader(iface);
2478 IXmlReaderInput *readerinput;
2479 HRESULT hr;
2481 TRACE("(%p)->(%p)\n", This, input);
2483 if (This->input)
2485 readerinput_release_stream(This->input);
2486 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2487 This->input = NULL;
2490 This->line = This->pos = 0;
2491 reader_clear_elements(This);
2492 This->depth = 0;
2493 This->resumestate = XmlReadResumeState_Initial;
2494 memset(This->resume, 0, sizeof(This->resume));
2496 /* just reset current input */
2497 if (!input)
2499 This->state = XmlReadState_Initial;
2500 return S_OK;
2503 /* now try IXmlReaderInput, ISequentialStream, IStream */
2504 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2505 if (hr == S_OK)
2507 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2508 This->input = impl_from_IXmlReaderInput(readerinput);
2509 else
2511 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2512 readerinput, readerinput->lpVtbl);
2513 IUnknown_Release(readerinput);
2514 return E_FAIL;
2519 if (hr != S_OK || !readerinput)
2521 /* create IXmlReaderInput basing on supplied interface */
2522 hr = CreateXmlReaderInputWithEncodingName(input,
2523 This->imalloc, NULL, FALSE, NULL, &readerinput);
2524 if (hr != S_OK) return hr;
2525 This->input = impl_from_IXmlReaderInput(readerinput);
2528 /* set stream for supplied IXmlReaderInput */
2529 hr = readerinput_query_for_stream(This->input);
2530 if (hr == S_OK)
2532 This->state = XmlReadState_Initial;
2533 This->instate = XmlReadInState_Initial;
2536 return hr;
2539 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2541 xmlreader *This = impl_from_IXmlReader(iface);
2543 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2545 if (!value) return E_INVALIDARG;
2547 switch (property)
2549 case XmlReaderProperty_DtdProcessing:
2550 *value = This->dtdmode;
2551 break;
2552 case XmlReaderProperty_ReadState:
2553 *value = This->state;
2554 break;
2555 default:
2556 FIXME("Unimplemented property (%u)\n", property);
2557 return E_NOTIMPL;
2560 return S_OK;
2563 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2565 xmlreader *This = impl_from_IXmlReader(iface);
2567 TRACE("(%p)->(%s %lu)\n", This, debugstr_reader_prop(property), value);
2569 switch (property)
2571 case XmlReaderProperty_DtdProcessing:
2572 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2573 This->dtdmode = value;
2574 break;
2575 default:
2576 FIXME("Unimplemented property (%u)\n", property);
2577 return E_NOTIMPL;
2580 return S_OK;
2583 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2585 xmlreader *This = impl_from_IXmlReader(iface);
2586 XmlNodeType oldtype = This->nodetype;
2587 HRESULT hr;
2589 TRACE("(%p)->(%p)\n", This, nodetype);
2591 if (This->state == XmlReadState_Closed) return S_FALSE;
2593 hr = reader_parse_nextnode(This);
2594 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2595 This->state = XmlReadState_Interactive;
2596 if (hr == S_OK)
2598 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2599 *nodetype = This->nodetype;
2602 return hr;
2605 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2607 xmlreader *This = impl_from_IXmlReader(iface);
2608 TRACE("(%p)->(%p)\n", This, node_type);
2610 *node_type = reader_get_nodetype(This);
2611 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2614 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2616 xmlreader *This = impl_from_IXmlReader(iface);
2618 TRACE("(%p)\n", This);
2620 if (!This->attr_count) return S_FALSE;
2621 This->attr = LIST_ENTRY(list_head(&This->attrs), struct attribute, entry);
2622 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2623 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2625 return S_OK;
2628 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2630 xmlreader *This = impl_from_IXmlReader(iface);
2631 const struct list *next;
2633 TRACE("(%p)\n", This);
2635 if (!This->attr_count) return S_FALSE;
2637 if (!This->attr)
2638 return IXmlReader_MoveToFirstAttribute(iface);
2640 next = list_next(&This->attrs, &This->attr->entry);
2641 if (next)
2643 This->attr = LIST_ENTRY(next, struct attribute, entry);
2644 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2645 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2648 return next ? S_OK : S_FALSE;
2651 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2652 LPCWSTR local_name,
2653 LPCWSTR namespaceUri)
2655 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2656 return E_NOTIMPL;
2659 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2661 xmlreader *This = impl_from_IXmlReader(iface);
2662 struct element *elem;
2664 TRACE("(%p)\n", This);
2666 if (!This->attr_count) return S_FALSE;
2667 This->attr = NULL;
2669 /* FIXME: support other node types with 'attributes' like DTD */
2670 elem = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2671 if (elem)
2673 reader_set_strvalue(This, StringValue_QualifiedName, &elem->qname);
2674 reader_set_strvalue(This, StringValue_LocalName, &elem->localname);
2677 return S_OK;
2680 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2682 xmlreader *This = impl_from_IXmlReader(iface);
2684 TRACE("(%p)->(%p %p)\n", This, name, len);
2685 *name = This->strvalues[StringValue_QualifiedName].str;
2686 if (len) *len = This->strvalues[StringValue_QualifiedName].len;
2687 return S_OK;
2690 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface,
2691 LPCWSTR *namespaceUri,
2692 UINT *namespaceUri_length)
2694 FIXME("(%p %p %p): stub\n", iface, namespaceUri, namespaceUri_length);
2695 return E_NOTIMPL;
2698 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2700 xmlreader *This = impl_from_IXmlReader(iface);
2702 TRACE("(%p)->(%p %p)\n", This, name, len);
2703 *name = This->strvalues[StringValue_LocalName].str;
2704 if (len) *len = This->strvalues[StringValue_LocalName].len;
2705 return S_OK;
2708 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2710 xmlreader *This = impl_from_IXmlReader(iface);
2712 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2713 *prefix = This->strvalues[StringValue_Prefix].str;
2714 if (len) *len = This->strvalues[StringValue_Prefix].len;
2715 return S_OK;
2718 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
2720 xmlreader *reader = impl_from_IXmlReader(iface);
2721 strval *val = &reader->strvalues[StringValue_Value];
2723 TRACE("(%p)->(%p %p)\n", reader, value, len);
2725 *value = NULL;
2727 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
2729 XmlNodeType type;
2730 HRESULT hr;
2732 hr = IXmlReader_Read(iface, &type);
2733 if (FAILED(hr)) return hr;
2735 /* return if still pending, partially read values are not reported */
2736 if (is_reader_pending(reader)) return E_PENDING;
2739 if (!val->str)
2741 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
2742 if (!ptr) return E_OUTOFMEMORY;
2743 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
2744 ptr[val->len] = 0;
2745 val->str = ptr;
2748 *value = val->str;
2749 if (len) *len = val->len;
2750 return S_OK;
2753 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
2755 xmlreader *reader = impl_from_IXmlReader(iface);
2756 strval *val = &reader->strvalues[StringValue_Value];
2757 UINT len;
2759 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
2761 /* Value is already allocated, chunked reads are not possible. */
2762 if (val->str) return S_FALSE;
2764 if (val->len)
2766 len = min(chunk_size, val->len);
2767 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
2768 val->start += len;
2769 val->len -= len;
2770 if (read) *read = len;
2773 return S_OK;
2776 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
2777 LPCWSTR *baseUri,
2778 UINT *baseUri_length)
2780 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
2781 return E_NOTIMPL;
2784 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
2786 FIXME("(%p): stub\n", iface);
2787 return FALSE;
2790 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
2792 xmlreader *This = impl_from_IXmlReader(iface);
2793 TRACE("(%p)\n", This);
2794 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2795 when current node is start tag of an element */
2796 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->empty_element : FALSE;
2799 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
2801 xmlreader *This = impl_from_IXmlReader(iface);
2803 TRACE("(%p %p)\n", This, lineNumber);
2805 if (!lineNumber) return E_INVALIDARG;
2807 *lineNumber = This->line;
2809 return S_OK;
2812 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
2814 xmlreader *This = impl_from_IXmlReader(iface);
2816 TRACE("(%p %p)\n", This, linePosition);
2818 if (!linePosition) return E_INVALIDARG;
2820 *linePosition = This->pos;
2822 return S_OK;
2825 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
2827 xmlreader *This = impl_from_IXmlReader(iface);
2829 TRACE("(%p)->(%p)\n", This, count);
2831 if (!count) return E_INVALIDARG;
2833 *count = This->attr_count;
2834 return S_OK;
2837 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
2839 xmlreader *This = impl_from_IXmlReader(iface);
2840 TRACE("(%p)->(%p)\n", This, depth);
2841 *depth = This->depth;
2842 return S_OK;
2845 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
2847 FIXME("(%p): stub\n", iface);
2848 return E_NOTIMPL;
2851 static const struct IXmlReaderVtbl xmlreader_vtbl =
2853 xmlreader_QueryInterface,
2854 xmlreader_AddRef,
2855 xmlreader_Release,
2856 xmlreader_SetInput,
2857 xmlreader_GetProperty,
2858 xmlreader_SetProperty,
2859 xmlreader_Read,
2860 xmlreader_GetNodeType,
2861 xmlreader_MoveToFirstAttribute,
2862 xmlreader_MoveToNextAttribute,
2863 xmlreader_MoveToAttributeByName,
2864 xmlreader_MoveToElement,
2865 xmlreader_GetQualifiedName,
2866 xmlreader_GetNamespaceUri,
2867 xmlreader_GetLocalName,
2868 xmlreader_GetPrefix,
2869 xmlreader_GetValue,
2870 xmlreader_ReadValueChunk,
2871 xmlreader_GetBaseUri,
2872 xmlreader_IsDefault,
2873 xmlreader_IsEmptyElement,
2874 xmlreader_GetLineNumber,
2875 xmlreader_GetLinePosition,
2876 xmlreader_GetAttributeCount,
2877 xmlreader_GetDepth,
2878 xmlreader_IsEOF
2881 /** IXmlReaderInput **/
2882 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
2884 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2886 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2888 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
2889 IsEqualGUID(riid, &IID_IUnknown))
2891 *ppvObject = iface;
2893 else
2895 WARN("interface %s not implemented\n", debugstr_guid(riid));
2896 *ppvObject = NULL;
2897 return E_NOINTERFACE;
2900 IUnknown_AddRef(iface);
2902 return S_OK;
2905 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
2907 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2908 ULONG ref = InterlockedIncrement(&This->ref);
2909 TRACE("(%p)->(%d)\n", This, ref);
2910 return ref;
2913 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
2915 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2916 LONG ref = InterlockedDecrement(&This->ref);
2918 TRACE("(%p)->(%d)\n", This, ref);
2920 if (ref == 0)
2922 IMalloc *imalloc = This->imalloc;
2923 if (This->input) IUnknown_Release(This->input);
2924 if (This->stream) ISequentialStream_Release(This->stream);
2925 if (This->buffer) free_input_buffer(This->buffer);
2926 readerinput_free(This, This->baseuri);
2927 readerinput_free(This, This);
2928 if (imalloc) IMalloc_Release(imalloc);
2931 return ref;
2934 static const struct IUnknownVtbl xmlreaderinputvtbl =
2936 xmlreaderinput_QueryInterface,
2937 xmlreaderinput_AddRef,
2938 xmlreaderinput_Release
2941 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
2943 xmlreader *reader;
2944 int i;
2946 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
2948 if (!IsEqualGUID(riid, &IID_IXmlReader))
2950 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
2951 return E_FAIL;
2954 if (imalloc)
2955 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
2956 else
2957 reader = heap_alloc(sizeof(*reader));
2958 if(!reader) return E_OUTOFMEMORY;
2960 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
2961 reader->ref = 1;
2962 reader->input = NULL;
2963 reader->state = XmlReadState_Closed;
2964 reader->instate = XmlReadInState_Initial;
2965 reader->resumestate = XmlReadResumeState_Initial;
2966 reader->dtdmode = DtdProcessing_Prohibit;
2967 reader->line = reader->pos = 0;
2968 reader->imalloc = imalloc;
2969 if (imalloc) IMalloc_AddRef(imalloc);
2970 reader->nodetype = XmlNodeType_None;
2971 list_init(&reader->attrs);
2972 reader->attr_count = 0;
2973 reader->attr = NULL;
2974 list_init(&reader->elements);
2975 reader->depth = 0;
2976 reader->max_depth = 256;
2977 reader->empty_element = FALSE;
2978 memset(reader->resume, 0, sizeof(reader->resume));
2980 for (i = 0; i < StringValue_Last; i++)
2981 reader->strvalues[i] = strval_empty;
2983 *obj = &reader->IXmlReader_iface;
2985 TRACE("returning iface %p\n", *obj);
2987 return S_OK;
2990 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
2991 IMalloc *imalloc,
2992 LPCWSTR encoding,
2993 BOOL hint,
2994 LPCWSTR base_uri,
2995 IXmlReaderInput **ppInput)
2997 xmlreaderinput *readerinput;
2998 HRESULT hr;
3000 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3001 hint, wine_dbgstr_w(base_uri), ppInput);
3003 if (!stream || !ppInput) return E_INVALIDARG;
3005 if (imalloc)
3006 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3007 else
3008 readerinput = heap_alloc(sizeof(*readerinput));
3009 if(!readerinput) return E_OUTOFMEMORY;
3011 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3012 readerinput->ref = 1;
3013 readerinput->imalloc = imalloc;
3014 readerinput->stream = NULL;
3015 if (imalloc) IMalloc_AddRef(imalloc);
3016 readerinput->encoding = parse_encoding_name(encoding, -1);
3017 readerinput->hint = hint;
3018 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3019 readerinput->pending = 0;
3021 hr = alloc_input_buffer(readerinput);
3022 if (hr != S_OK)
3024 readerinput_free(readerinput, readerinput->baseuri);
3025 readerinput_free(readerinput, readerinput);
3026 if (imalloc) IMalloc_Release(imalloc);
3027 return hr;
3029 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3031 *ppInput = &readerinput->IXmlReaderInput_iface;
3033 TRACE("returning iface %p\n", *ppInput);
3035 return S_OK;