xmllite: Use BOOL type where appropriate.
[wine.git] / dlls / xmllite / reader.c
bloba216951dcb31e24c30fc5e8b043a4e958b697a67
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlEncoding_UTF16,
44 XmlEncoding_UTF8,
45 XmlEncoding_Unknown
46 } xml_encoding;
48 typedef enum
50 XmlReadInState_Initial,
51 XmlReadInState_XmlDecl,
52 XmlReadInState_Misc_DTD,
53 XmlReadInState_DTD,
54 XmlReadInState_DTD_Misc,
55 XmlReadInState_Element,
56 XmlReadInState_Content,
57 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
58 XmlReadInState_Eof
59 } XmlReaderInternalState;
61 /* This state denotes where parsing was interrupted by input problem.
62 Reader resumes parsing using this information. */
63 typedef enum
65 XmlReadResumeState_Initial,
66 XmlReadResumeState_PITarget,
67 XmlReadResumeState_PIBody,
68 XmlReadResumeState_CDATA,
69 XmlReadResumeState_Comment,
70 XmlReadResumeState_STag,
71 XmlReadResumeState_CharData
72 } XmlReaderResumeState;
74 /* saved pointer index to resume from particular input position */
75 typedef enum
77 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
78 XmlReadResume_Local, /* local for QName */
79 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
80 XmlReadResume_Last
81 } XmlReaderResume;
83 typedef enum
85 StringValue_LocalName,
86 StringValue_Prefix,
87 StringValue_QualifiedName,
88 StringValue_Value,
89 StringValue_Last
90 } XmlReaderStringValue;
92 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
93 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
95 static const WCHAR dblquoteW[] = {'\"',0};
96 static const WCHAR quoteW[] = {'\'',0};
97 static const WCHAR ltW[] = {'<',0};
98 static const WCHAR gtW[] = {'>',0};
99 static const WCHAR commentW[] = {'<','!','-','-',0};
100 static const WCHAR piW[] = {'<','?',0};
102 static const char *debugstr_nodetype(XmlNodeType nodetype)
104 static const char* type_names[] =
106 "None",
107 "Element",
108 "Attribute",
109 "Text",
110 "CDATA",
113 "ProcessingInstruction",
114 "Comment",
116 "DocumentType",
119 "Whitespace",
121 "EndElement",
123 "XmlDeclaration"
126 if (nodetype > _XmlNodeType_Last)
128 static char buf[25];
129 sprintf(buf, "unknown type=%d", nodetype);
130 return buf;
132 return type_names[nodetype];
135 static const char *debugstr_prop(XmlReaderProperty prop)
137 static const char* prop_names[] =
139 "MultiLanguage",
140 "ConformanceLevel",
141 "RandomAccess",
142 "XmlResolver",
143 "DtdProcessing",
144 "ReadState",
145 "MaxElementDepth",
146 "MaxEntityExpansion"
149 if (prop > _XmlReaderProperty_Last)
151 static char buf[25];
152 sprintf(buf, "unknown property=%d", prop);
153 return buf;
155 return prop_names[prop];
158 struct xml_encoding_data
160 const WCHAR *name;
161 xml_encoding enc;
162 UINT cp;
165 static const struct xml_encoding_data xml_encoding_map[] = {
166 { utf16W, XmlEncoding_UTF16, ~0 },
167 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
170 typedef struct
172 char *data;
173 UINT cur;
174 unsigned int allocated;
175 unsigned int written;
176 } encoded_buffer;
178 typedef struct input_buffer input_buffer;
180 typedef struct
182 IXmlReaderInput IXmlReaderInput_iface;
183 LONG ref;
184 /* reference passed on IXmlReaderInput creation, is kept when input is created */
185 IUnknown *input;
186 IMalloc *imalloc;
187 xml_encoding encoding;
188 BOOL hint;
189 WCHAR *baseuri;
190 /* stream reference set after SetInput() call from reader,
191 stored as sequential stream, cause currently
192 optimizations possible with IStream aren't implemented */
193 ISequentialStream *stream;
194 input_buffer *buffer;
195 unsigned int pending : 1;
196 } xmlreaderinput;
198 static const struct IUnknownVtbl xmlreaderinputvtbl;
200 /* Structure to hold parsed string of specific length.
202 Reader stores node value as 'start' pointer, on request
203 a null-terminated version of it is allocated.
205 To init a strval variable use reader_init_strval(),
206 to set strval as a reader value use reader_set_strval().
208 typedef struct
210 WCHAR *str; /* allocated null-terminated string */
211 UINT len; /* length in WCHARs, altered after ReadValueChunk */
212 UINT start; /* input position where value starts */
213 } strval;
215 static WCHAR emptyW[] = {0};
216 static const strval strval_empty = { emptyW };
218 struct attribute
220 struct list entry;
221 strval localname;
222 strval value;
225 struct element
227 struct list entry;
228 strval qname;
229 strval localname;
232 typedef struct
234 IXmlReader IXmlReader_iface;
235 LONG ref;
236 xmlreaderinput *input;
237 IMalloc *imalloc;
238 XmlReadState state;
239 XmlReaderInternalState instate;
240 XmlReaderResumeState resumestate;
241 XmlNodeType nodetype;
242 DtdProcessing dtdmode;
243 UINT line, pos; /* reader position in XML stream */
244 struct list attrs; /* attributes list for current node */
245 struct attribute *attr; /* current attribute */
246 UINT attr_count;
247 struct list elements;
248 strval strvalues[StringValue_Last];
249 UINT depth;
250 UINT max_depth;
251 BOOL empty_element;
252 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
253 } xmlreader;
255 struct input_buffer
257 encoded_buffer utf16;
258 encoded_buffer encoded;
259 UINT code_page;
260 xmlreaderinput *input;
263 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
265 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
268 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
270 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
273 static inline void *m_alloc(IMalloc *imalloc, size_t len)
275 if (imalloc)
276 return IMalloc_Alloc(imalloc, len);
277 else
278 return heap_alloc(len);
281 static inline void *m_realloc(IMalloc *imalloc, void *mem, size_t len)
283 if (imalloc)
284 return IMalloc_Realloc(imalloc, mem, len);
285 else
286 return heap_realloc(mem, len);
289 static inline void m_free(IMalloc *imalloc, void *mem)
291 if (imalloc)
292 IMalloc_Free(imalloc, mem);
293 else
294 heap_free(mem);
297 /* reader memory allocation functions */
298 static inline void *reader_alloc(xmlreader *reader, size_t len)
300 return m_alloc(reader->imalloc, len);
303 static inline void reader_free(xmlreader *reader, void *mem)
305 m_free(reader->imalloc, mem);
308 /* Just return pointer from offset, no attempt to read more. */
309 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
311 encoded_buffer *buffer = &reader->input->buffer->utf16;
312 return (WCHAR*)buffer->data + offset;
315 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
317 return v->str ? v->str : reader_get_ptr2(reader, v->start);
320 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
322 *dest = *src;
324 if (src->str != strval_empty.str)
326 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
327 if (!dest->str) return E_OUTOFMEMORY;
328 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
329 dest->str[dest->len] = 0;
330 dest->start = 0;
333 return S_OK;
336 /* reader input memory allocation functions */
337 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
339 return m_alloc(input->imalloc, len);
342 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
344 return m_realloc(input->imalloc, mem, len);
347 static inline void readerinput_free(xmlreaderinput *input, void *mem)
349 m_free(input->imalloc, mem);
352 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
354 LPWSTR ret = NULL;
356 if(str) {
357 DWORD size;
359 size = (strlenW(str)+1)*sizeof(WCHAR);
360 ret = readerinput_alloc(input, size);
361 if (ret) memcpy(ret, str, size);
364 return ret;
367 static void reader_clear_attrs(xmlreader *reader)
369 struct attribute *attr, *attr2;
370 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
372 reader_free(reader, attr);
374 list_init(&reader->attrs);
375 reader->attr_count = 0;
378 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
379 while we are on a node with attributes */
380 static HRESULT reader_add_attr(xmlreader *reader, strval *localname, strval *value)
382 struct attribute *attr;
384 attr = reader_alloc(reader, sizeof(*attr));
385 if (!attr) return E_OUTOFMEMORY;
387 attr->localname = *localname;
388 attr->value = *value;
389 list_add_tail(&reader->attrs, &attr->entry);
390 reader->attr_count++;
392 return S_OK;
395 /* This one frees stored string value if needed */
396 static void reader_free_strvalued(xmlreader *reader, strval *v)
398 if (v->str != strval_empty.str)
400 reader_free(reader, v->str);
401 *v = strval_empty;
405 /* returns length in WCHARs from 'start' to current buffer offset */
406 static inline UINT reader_get_len(const xmlreader *reader, UINT start)
408 return reader->input->buffer->utf16.cur - start;
411 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
413 v->start = start;
414 v->len = len;
415 v->str = NULL;
418 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
420 return debugstr_wn(reader_get_strptr(reader, v), v->len);
423 /* used to initalize from constant string */
424 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
426 v->start = 0;
427 v->len = len;
428 v->str = str;
431 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
433 reader_free_strvalued(reader, &reader->strvalues[type]);
436 static void reader_free_strvalues(xmlreader *reader)
438 int type;
439 for (type = 0; type < StringValue_Last; type++)
440 reader_free_strvalue(reader, type);
443 /* This helper should only be used to test if strings are the same,
444 it doesn't try to sort. */
445 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
447 if (str1->len != str2->len) return 0;
448 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
451 static void reader_clear_elements(xmlreader *reader)
453 struct element *elem, *elem2;
454 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
456 reader_free_strvalued(reader, &elem->qname);
457 reader_free(reader, elem);
459 list_init(&reader->elements);
460 reader->empty_element = FALSE;
463 static HRESULT reader_inc_depth(xmlreader *reader)
465 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
466 return S_OK;
469 static void reader_dec_depth(xmlreader *reader)
471 if (reader->depth > 1) reader->depth--;
474 static HRESULT reader_push_element(xmlreader *reader, strval *qname, strval *localname)
476 struct element *elem;
477 HRESULT hr;
479 elem = reader_alloc(reader, sizeof(*elem));
480 if (!elem) return E_OUTOFMEMORY;
482 hr = reader_strvaldup(reader, qname, &elem->qname);
483 if (FAILED(hr)) {
484 reader_free(reader, elem);
485 return hr;
488 hr = reader_strvaldup(reader, localname, &elem->localname);
489 if (FAILED(hr))
491 reader_free_strvalued(reader, &elem->qname);
492 reader_free(reader, elem);
493 return hr;
496 if (!list_empty(&reader->elements))
498 hr = reader_inc_depth(reader);
499 if (FAILED(hr)) {
500 reader_free(reader, elem);
501 return hr;
505 list_add_head(&reader->elements, &elem->entry);
506 reader->empty_element = FALSE;
507 return hr;
510 static void reader_pop_element(xmlreader *reader)
512 struct element *elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
514 if (elem)
516 list_remove(&elem->entry);
517 reader_free_strvalued(reader, &elem->qname);
518 reader_free_strvalued(reader, &elem->localname);
519 reader_free(reader, elem);
520 reader_dec_depth(reader);
524 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
525 means node value is to be determined. */
526 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
528 strval *v = &reader->strvalues[type];
530 reader_free_strvalue(reader, type);
531 if (!value)
533 v->str = NULL;
534 v->start = 0;
535 v->len = 0;
536 return;
539 if (value->str == strval_empty.str)
540 *v = *value;
541 else
543 if (type == StringValue_Value)
545 /* defer allocation for value string */
546 v->str = NULL;
547 v->start = value->start;
548 v->len = value->len;
550 else
552 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
553 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
554 v->str[value->len] = 0;
555 v->len = value->len;
560 static inline int is_reader_pending(xmlreader *reader)
562 return reader->input->pending;
565 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
567 const int initial_len = 0x2000;
568 buffer->data = readerinput_alloc(input, initial_len);
569 if (!buffer->data) return E_OUTOFMEMORY;
571 memset(buffer->data, 0, 4);
572 buffer->cur = 0;
573 buffer->allocated = initial_len;
574 buffer->written = 0;
576 return S_OK;
579 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
581 readerinput_free(input, buffer->data);
584 static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
586 if (encoding == XmlEncoding_Unknown)
588 FIXME("unsupported encoding %d\n", encoding);
589 return E_NOTIMPL;
592 *cp = xml_encoding_map[encoding].cp;
594 return S_OK;
597 static xml_encoding parse_encoding_name(const WCHAR *name, int len)
599 int min, max, n, c;
601 if (!name) return XmlEncoding_Unknown;
603 min = 0;
604 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
606 while (min <= max)
608 n = (min+max)/2;
610 if (len != -1)
611 c = strncmpiW(xml_encoding_map[n].name, name, len);
612 else
613 c = strcmpiW(xml_encoding_map[n].name, name);
614 if (!c)
615 return xml_encoding_map[n].enc;
617 if (c > 0)
618 max = n-1;
619 else
620 min = n+1;
623 return XmlEncoding_Unknown;
626 static HRESULT alloc_input_buffer(xmlreaderinput *input)
628 input_buffer *buffer;
629 HRESULT hr;
631 input->buffer = NULL;
633 buffer = readerinput_alloc(input, sizeof(*buffer));
634 if (!buffer) return E_OUTOFMEMORY;
636 buffer->input = input;
637 buffer->code_page = ~0; /* code page is unknown at this point */
638 hr = init_encoded_buffer(input, &buffer->utf16);
639 if (hr != S_OK) {
640 readerinput_free(input, buffer);
641 return hr;
644 hr = init_encoded_buffer(input, &buffer->encoded);
645 if (hr != S_OK) {
646 free_encoded_buffer(input, &buffer->utf16);
647 readerinput_free(input, buffer);
648 return hr;
651 input->buffer = buffer;
652 return S_OK;
655 static void free_input_buffer(input_buffer *buffer)
657 free_encoded_buffer(buffer->input, &buffer->encoded);
658 free_encoded_buffer(buffer->input, &buffer->utf16);
659 readerinput_free(buffer->input, buffer);
662 static void readerinput_release_stream(xmlreaderinput *readerinput)
664 if (readerinput->stream) {
665 ISequentialStream_Release(readerinput->stream);
666 readerinput->stream = NULL;
670 /* Queries already stored interface for IStream/ISequentialStream.
671 Interface supplied on creation will be overwritten */
672 static HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
674 HRESULT hr;
676 readerinput_release_stream(readerinput);
677 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
678 if (hr != S_OK)
679 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
681 return hr;
684 /* reads a chunk to raw buffer */
685 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
687 encoded_buffer *buffer = &readerinput->buffer->encoded;
688 /* to make sure aligned length won't exceed allocated length */
689 ULONG len = buffer->allocated - buffer->written - 4;
690 ULONG read;
691 HRESULT hr;
693 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
694 variable width encodings like UTF-8 */
695 len = (len + 3) & ~3;
696 /* try to use allocated space or grow */
697 if (buffer->allocated - buffer->written < len)
699 buffer->allocated *= 2;
700 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
701 len = buffer->allocated - buffer->written;
704 read = 0;
705 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
706 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
707 readerinput->pending = hr == E_PENDING;
708 if (FAILED(hr)) return hr;
709 buffer->written += read;
711 return hr;
714 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
715 static void readerinput_grow(xmlreaderinput *readerinput, int length)
717 encoded_buffer *buffer = &readerinput->buffer->utf16;
719 length *= sizeof(WCHAR);
720 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
721 if (buffer->allocated < buffer->written + length + 4)
723 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
724 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
725 buffer->allocated = grown_size;
729 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
731 static char startA[] = {'<','?'};
732 static char commentA[] = {'<','!'};
733 encoded_buffer *buffer = &readerinput->buffer->encoded;
734 unsigned char *ptr = (unsigned char*)buffer->data;
736 return !memcmp(buffer->data, startA, sizeof(startA)) ||
737 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
738 /* test start byte */
739 (ptr[0] == '<' &&
741 (ptr[1] && (ptr[1] <= 0x7f)) ||
742 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
743 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
744 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
748 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
750 encoded_buffer *buffer = &readerinput->buffer->encoded;
751 static WCHAR startW[] = {'<','?'};
752 static WCHAR commentW[] = {'<','!'};
753 static char utf8bom[] = {0xef,0xbb,0xbf};
754 static char utf16lebom[] = {0xff,0xfe};
756 *enc = XmlEncoding_Unknown;
758 if (buffer->written <= 3)
760 HRESULT hr = readerinput_growraw(readerinput);
761 if (FAILED(hr)) return hr;
762 if (buffer->written <= 3) return MX_E_INPUTEND;
765 /* try start symbols if we have enough data to do that, input buffer should contain
766 first chunk already */
767 if (readerinput_is_utf8(readerinput))
768 *enc = XmlEncoding_UTF8;
769 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
770 !memcmp(buffer->data, commentW, sizeof(commentW)))
771 *enc = XmlEncoding_UTF16;
772 /* try with BOM now */
773 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
775 buffer->cur += sizeof(utf8bom);
776 *enc = XmlEncoding_UTF8;
778 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
780 buffer->cur += sizeof(utf16lebom);
781 *enc = XmlEncoding_UTF16;
784 return S_OK;
787 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
789 encoded_buffer *buffer = &readerinput->buffer->encoded;
790 int len = buffer->written;
792 /* complete single byte char */
793 if (!(buffer->data[len-1] & 0x80)) return len;
795 /* find start byte of multibyte char */
796 while (--len && !(buffer->data[len] & 0xc0))
799 return len;
802 /* Returns byte length of complete char sequence for buffer code page,
803 it's relative to current buffer position which is currently used for BOM handling
804 only. */
805 static int readerinput_get_convlen(xmlreaderinput *readerinput)
807 encoded_buffer *buffer = &readerinput->buffer->encoded;
808 int len;
810 if (readerinput->buffer->code_page == CP_UTF8)
811 len = readerinput_get_utf8_convlen(readerinput);
812 else
813 len = buffer->written;
815 TRACE("%d\n", len - buffer->cur);
816 return len - buffer->cur;
819 /* It's possible that raw buffer has some leftovers from last conversion - some char
820 sequence that doesn't represent a full code point. Length argument should be calculated with
821 readerinput_get_convlen(), if it's -1 it will be calculated here. */
822 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
824 encoded_buffer *buffer = &readerinput->buffer->encoded;
826 if (len == -1)
827 len = readerinput_get_convlen(readerinput);
829 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
830 /* everything below cur is lost too */
831 buffer->written -= len + buffer->cur;
832 /* after this point we don't need cur offset really,
833 it's used only to mark where actual data begins when first chunk is read */
834 buffer->cur = 0;
837 /* note that raw buffer content is kept */
838 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
840 encoded_buffer *src = &readerinput->buffer->encoded;
841 encoded_buffer *dest = &readerinput->buffer->utf16;
842 int len, dest_len;
843 HRESULT hr;
844 WCHAR *ptr;
845 UINT cp;
847 hr = get_code_page(enc, &cp);
848 if (FAILED(hr)) return;
850 readerinput->buffer->code_page = cp;
851 len = readerinput_get_convlen(readerinput);
853 TRACE("switching to cp %d\n", cp);
855 /* just copy in this case */
856 if (enc == XmlEncoding_UTF16)
858 readerinput_grow(readerinput, len);
859 memcpy(dest->data, src->data + src->cur, len);
860 dest->written += len*sizeof(WCHAR);
861 return;
864 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
865 readerinput_grow(readerinput, dest_len);
866 ptr = (WCHAR*)dest->data;
867 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
868 ptr[dest_len] = 0;
869 dest->written += dest_len*sizeof(WCHAR);
872 /* shrinks parsed data a buffer begins with */
873 static void reader_shrink(xmlreader *reader)
875 encoded_buffer *buffer = &reader->input->buffer->utf16;
877 /* avoid to move too often using threshold shrink length */
878 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
880 buffer->written -= buffer->cur*sizeof(WCHAR);
881 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
882 buffer->cur = 0;
883 *(WCHAR*)&buffer->data[buffer->written] = 0;
887 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
888 It won't attempt to shrink but will grow destination buffer if needed */
889 static HRESULT reader_more(xmlreader *reader)
891 xmlreaderinput *readerinput = reader->input;
892 encoded_buffer *src = &readerinput->buffer->encoded;
893 encoded_buffer *dest = &readerinput->buffer->utf16;
894 UINT cp = readerinput->buffer->code_page;
895 int len, dest_len;
896 HRESULT hr;
897 WCHAR *ptr;
899 /* get some raw data from stream first */
900 hr = readerinput_growraw(readerinput);
901 len = readerinput_get_convlen(readerinput);
903 /* just copy for UTF-16 case */
904 if (cp == ~0)
906 readerinput_grow(readerinput, len);
907 memcpy(dest->data + dest->written, src->data + src->cur, len);
908 dest->written += len*sizeof(WCHAR);
909 return hr;
912 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
913 readerinput_grow(readerinput, dest_len);
914 ptr = (WCHAR*)(dest->data + dest->written);
915 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
916 ptr[dest_len] = 0;
917 dest->written += dest_len*sizeof(WCHAR);
918 /* get rid of processed data */
919 readerinput_shrinkraw(readerinput, len);
921 return hr;
924 static inline UINT reader_get_cur(xmlreader *reader)
926 return reader->input->buffer->utf16.cur;
929 static inline WCHAR *reader_get_ptr(xmlreader *reader)
931 encoded_buffer *buffer = &reader->input->buffer->utf16;
932 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
933 if (!*ptr) reader_more(reader);
934 return (WCHAR*)buffer->data + buffer->cur;
937 static int reader_cmp(xmlreader *reader, const WCHAR *str)
939 const WCHAR *ptr = reader_get_ptr(reader);
940 return strncmpW(str, ptr, strlenW(str));
943 /* moves cursor n WCHARs forward */
944 static void reader_skipn(xmlreader *reader, int n)
946 encoded_buffer *buffer = &reader->input->buffer->utf16;
947 const WCHAR *ptr = reader_get_ptr(reader);
949 while (*ptr++ && n--)
951 buffer->cur++;
952 reader->pos++;
956 static inline BOOL is_wchar_space(WCHAR ch)
958 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
961 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
962 static int reader_skipspaces(xmlreader *reader)
964 encoded_buffer *buffer = &reader->input->buffer->utf16;
965 const WCHAR *ptr = reader_get_ptr(reader), *start = ptr;
967 while (is_wchar_space(*ptr))
969 buffer->cur++;
970 if (*ptr == '\r')
971 reader->pos = 0;
972 else if (*ptr == '\n')
974 reader->line++;
975 reader->pos = 0;
977 else
978 reader->pos++;
979 ptr++;
982 return ptr - start;
985 /* [26] VersionNum ::= '1.' [0-9]+ */
986 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
988 static const WCHAR onedotW[] = {'1','.',0};
989 WCHAR *ptr, *ptr2;
990 UINT start;
992 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
994 start = reader_get_cur(reader);
995 /* skip "1." */
996 reader_skipn(reader, 2);
998 ptr2 = ptr = reader_get_ptr(reader);
999 while (*ptr >= '0' && *ptr <= '9')
1001 reader_skipn(reader, 1);
1002 ptr = reader_get_ptr(reader);
1005 if (ptr2 == ptr) return WC_E_DIGIT;
1006 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1007 TRACE("version=%s\n", debug_strval(reader, val));
1008 return S_OK;
1011 /* [25] Eq ::= S? '=' S? */
1012 static HRESULT reader_parse_eq(xmlreader *reader)
1014 static const WCHAR eqW[] = {'=',0};
1015 reader_skipspaces(reader);
1016 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1017 /* skip '=' */
1018 reader_skipn(reader, 1);
1019 reader_skipspaces(reader);
1020 return S_OK;
1023 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1024 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1026 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1027 strval val, name;
1028 HRESULT hr;
1030 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1032 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1033 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1034 /* skip 'version' */
1035 reader_skipn(reader, 7);
1037 hr = reader_parse_eq(reader);
1038 if (FAILED(hr)) return hr;
1040 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1041 return WC_E_QUOTE;
1042 /* skip "'"|'"' */
1043 reader_skipn(reader, 1);
1045 hr = reader_parse_versionnum(reader, &val);
1046 if (FAILED(hr)) return hr;
1048 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1049 return WC_E_QUOTE;
1051 /* skip "'"|'"' */
1052 reader_skipn(reader, 1);
1054 return reader_add_attr(reader, &name, &val);
1057 /* ([A-Za-z0-9._] | '-') */
1058 static inline BOOL is_wchar_encname(WCHAR ch)
1060 return ((ch >= 'A' && ch <= 'Z') ||
1061 (ch >= 'a' && ch <= 'z') ||
1062 (ch >= '0' && ch <= '9') ||
1063 (ch == '.') || (ch == '_') ||
1064 (ch == '-'));
1067 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1068 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1070 WCHAR *start = reader_get_ptr(reader), *ptr;
1071 xml_encoding enc;
1072 int len;
1074 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1075 return WC_E_ENCNAME;
1077 ptr = start;
1078 while (is_wchar_encname(*++ptr))
1081 len = ptr - start;
1082 enc = parse_encoding_name(start, len);
1083 TRACE("encoding name %s\n", debugstr_wn(start, len));
1084 val->str = start;
1085 val->len = len;
1087 if (enc == XmlEncoding_Unknown)
1088 return WC_E_ENCNAME;
1090 /* skip encoding name */
1091 reader_skipn(reader, len);
1092 return S_OK;
1095 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1096 static HRESULT reader_parse_encdecl(xmlreader *reader)
1098 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1099 strval name, val;
1100 HRESULT hr;
1102 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1104 if (reader_cmp(reader, encodingW)) return S_FALSE;
1105 name.str = reader_get_ptr(reader);
1106 name.len = 8;
1107 /* skip 'encoding' */
1108 reader_skipn(reader, 8);
1110 hr = reader_parse_eq(reader);
1111 if (FAILED(hr)) return hr;
1113 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1114 return WC_E_QUOTE;
1115 /* skip "'"|'"' */
1116 reader_skipn(reader, 1);
1118 hr = reader_parse_encname(reader, &val);
1119 if (FAILED(hr)) return hr;
1121 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1122 return WC_E_QUOTE;
1124 /* skip "'"|'"' */
1125 reader_skipn(reader, 1);
1127 return reader_add_attr(reader, &name, &val);
1130 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1131 static HRESULT reader_parse_sddecl(xmlreader *reader)
1133 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1134 static const WCHAR yesW[] = {'y','e','s',0};
1135 static const WCHAR noW[] = {'n','o',0};
1136 strval name, val;
1137 UINT start;
1138 HRESULT hr;
1140 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1142 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1143 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1144 /* skip 'standalone' */
1145 reader_skipn(reader, 10);
1147 hr = reader_parse_eq(reader);
1148 if (FAILED(hr)) return hr;
1150 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1151 return WC_E_QUOTE;
1152 /* skip "'"|'"' */
1153 reader_skipn(reader, 1);
1155 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1156 return WC_E_XMLDECL;
1158 start = reader_get_cur(reader);
1159 /* skip 'yes'|'no' */
1160 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1161 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1162 TRACE("standalone=%s\n", debug_strval(reader, &val));
1164 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1165 return WC_E_QUOTE;
1166 /* skip "'"|'"' */
1167 reader_skipn(reader, 1);
1169 return reader_add_attr(reader, &name, &val);
1172 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1173 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1175 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1176 static const WCHAR declcloseW[] = {'?','>',0};
1177 HRESULT hr;
1179 /* check if we have "<?xml " */
1180 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1182 reader_skipn(reader, 5);
1183 hr = reader_parse_versioninfo(reader);
1184 if (FAILED(hr))
1185 return hr;
1187 hr = reader_parse_encdecl(reader);
1188 if (FAILED(hr))
1189 return hr;
1191 hr = reader_parse_sddecl(reader);
1192 if (FAILED(hr))
1193 return hr;
1195 reader_skipspaces(reader);
1196 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1197 reader_skipn(reader, 2);
1199 reader_inc_depth(reader);
1200 reader->nodetype = XmlNodeType_XmlDeclaration;
1201 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1202 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1203 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1205 return S_OK;
1208 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1209 static HRESULT reader_parse_comment(xmlreader *reader)
1211 WCHAR *ptr;
1212 UINT start;
1214 if (reader->resumestate == XmlReadResumeState_Comment)
1216 start = reader->resume[XmlReadResume_Body];
1217 ptr = reader_get_ptr(reader);
1219 else
1221 /* skip '<!--' */
1222 reader_skipn(reader, 4);
1223 reader_shrink(reader);
1224 ptr = reader_get_ptr(reader);
1225 start = reader_get_cur(reader);
1226 reader->nodetype = XmlNodeType_Comment;
1227 reader->resume[XmlReadResume_Body] = start;
1228 reader->resumestate = XmlReadResumeState_Comment;
1229 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1230 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1231 reader_set_strvalue(reader, StringValue_Value, NULL);
1234 /* will exit when there's no more data, it won't attempt to
1235 read more from stream */
1236 while (*ptr)
1238 if (ptr[0] == '-')
1240 if (ptr[1] == '-')
1242 if (ptr[2] == '>')
1244 strval value;
1246 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1247 TRACE("%s\n", debug_strval(reader, &value));
1249 /* skip rest of markup '->' */
1250 reader_skipn(reader, 3);
1252 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1253 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1254 reader_set_strvalue(reader, StringValue_Value, &value);
1255 reader->resume[XmlReadResume_Body] = 0;
1256 reader->resumestate = XmlReadResumeState_Initial;
1257 return S_OK;
1259 else
1260 return WC_E_COMMENT;
1264 reader_skipn(reader, 1);
1265 ptr++;
1268 return S_OK;
1271 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1272 static inline BOOL is_char(WCHAR ch)
1274 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1275 (ch >= 0x20 && ch <= 0xd7ff) ||
1276 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1277 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1278 (ch >= 0xe000 && ch <= 0xfffd);
1281 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1282 static inline BOOL is_pubchar(WCHAR ch)
1284 return (ch == ' ') ||
1285 (ch >= 'a' && ch <= 'z') ||
1286 (ch >= 'A' && ch <= 'Z') ||
1287 (ch >= '0' && ch <= '9') ||
1288 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1289 (ch == '=') || (ch == '?') ||
1290 (ch == '@') || (ch == '!') ||
1291 (ch >= '#' && ch <= '%') || /* #$% */
1292 (ch == '_') || (ch == '\r') || (ch == '\n');
1295 static inline BOOL is_namestartchar(WCHAR ch)
1297 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1298 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1299 (ch >= 0xc0 && ch <= 0xd6) ||
1300 (ch >= 0xd8 && ch <= 0xf6) ||
1301 (ch >= 0xf8 && ch <= 0x2ff) ||
1302 (ch >= 0x370 && ch <= 0x37d) ||
1303 (ch >= 0x37f && ch <= 0x1fff) ||
1304 (ch >= 0x200c && ch <= 0x200d) ||
1305 (ch >= 0x2070 && ch <= 0x218f) ||
1306 (ch >= 0x2c00 && ch <= 0x2fef) ||
1307 (ch >= 0x3001 && ch <= 0xd7ff) ||
1308 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1309 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1310 (ch >= 0xf900 && ch <= 0xfdcf) ||
1311 (ch >= 0xfdf0 && ch <= 0xfffd);
1314 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1315 static inline BOOL is_ncnamechar(WCHAR ch)
1317 return (ch >= 'A' && ch <= 'Z') ||
1318 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1319 (ch == '-') || (ch == '.') ||
1320 (ch >= '0' && ch <= '9') ||
1321 (ch == 0xb7) ||
1322 (ch >= 0xc0 && ch <= 0xd6) ||
1323 (ch >= 0xd8 && ch <= 0xf6) ||
1324 (ch >= 0xf8 && ch <= 0x2ff) ||
1325 (ch >= 0x300 && ch <= 0x36f) ||
1326 (ch >= 0x370 && ch <= 0x37d) ||
1327 (ch >= 0x37f && ch <= 0x1fff) ||
1328 (ch >= 0x200c && ch <= 0x200d) ||
1329 (ch >= 0x203f && ch <= 0x2040) ||
1330 (ch >= 0x2070 && ch <= 0x218f) ||
1331 (ch >= 0x2c00 && ch <= 0x2fef) ||
1332 (ch >= 0x3001 && ch <= 0xd7ff) ||
1333 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1334 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1335 (ch >= 0xf900 && ch <= 0xfdcf) ||
1336 (ch >= 0xfdf0 && ch <= 0xfffd);
1339 static inline BOOL is_namechar(WCHAR ch)
1341 return (ch == ':') || is_ncnamechar(ch);
1344 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1346 /* When we're on attribute always return attribute type, container node type is kept.
1347 Note that container is not necessarily an element, and attribute doesn't mean it's
1348 an attribute in XML spec terms. */
1349 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1352 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1353 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1354 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1355 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1356 [5] Name ::= NameStartChar (NameChar)* */
1357 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1359 WCHAR *ptr;
1360 UINT start;
1362 if (reader->resume[XmlReadResume_Name])
1364 start = reader->resume[XmlReadResume_Name];
1365 ptr = reader_get_ptr(reader);
1367 else
1369 ptr = reader_get_ptr(reader);
1370 start = reader_get_cur(reader);
1371 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1374 while (is_namechar(*ptr))
1376 reader_skipn(reader, 1);
1377 ptr = reader_get_ptr(reader);
1380 if (is_reader_pending(reader))
1382 reader->resume[XmlReadResume_Name] = start;
1383 return E_PENDING;
1385 else
1386 reader->resume[XmlReadResume_Name] = 0;
1388 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1389 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1391 return S_OK;
1394 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1395 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1397 static const WCHAR xmlW[] = {'x','m','l'};
1398 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1399 strval name;
1400 WCHAR *ptr;
1401 HRESULT hr;
1402 UINT i;
1404 hr = reader_parse_name(reader, &name);
1405 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1407 /* now that we got name check for illegal content */
1408 if (strval_eq(reader, &name, &xmlval))
1409 return WC_E_LEADINGXML;
1411 /* PITarget can't be a qualified name */
1412 ptr = reader_get_strptr(reader, &name);
1413 for (i = 0; i < name.len; i++)
1414 if (ptr[i] == ':')
1415 return i ? NC_E_NAMECOLON : WC_E_PI;
1417 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1418 *target = name;
1419 return S_OK;
1422 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1423 static HRESULT reader_parse_pi(xmlreader *reader)
1425 strval target;
1426 WCHAR *ptr;
1427 UINT start;
1428 HRESULT hr;
1430 switch (reader->resumestate)
1432 case XmlReadResumeState_Initial:
1433 /* skip '<?' */
1434 reader_skipn(reader, 2);
1435 reader_shrink(reader);
1436 reader->resumestate = XmlReadResumeState_PITarget;
1437 case XmlReadResumeState_PITarget:
1438 hr = reader_parse_pitarget(reader, &target);
1439 if (FAILED(hr)) return hr;
1440 reader_set_strvalue(reader, StringValue_LocalName, &target);
1441 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1442 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1443 reader->resumestate = XmlReadResumeState_PIBody;
1444 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1445 default:
1449 start = reader->resume[XmlReadResume_Body];
1450 ptr = reader_get_ptr(reader);
1451 while (*ptr)
1453 if (ptr[0] == '?')
1455 if (ptr[1] == '>')
1457 UINT cur = reader_get_cur(reader);
1458 strval value;
1460 /* strip all leading whitespace chars */
1461 while (start < cur)
1463 ptr = reader_get_ptr2(reader, start);
1464 if (!is_wchar_space(*ptr)) break;
1465 start++;
1468 reader_init_strvalue(start, cur-start, &value);
1470 /* skip '?>' */
1471 reader_skipn(reader, 2);
1472 TRACE("%s\n", debug_strval(reader, &value));
1473 reader->nodetype = XmlNodeType_ProcessingInstruction;
1474 reader->resumestate = XmlReadResumeState_Initial;
1475 reader->resume[XmlReadResume_Body] = 0;
1476 reader_set_strvalue(reader, StringValue_Value, &value);
1477 return S_OK;
1481 reader_skipn(reader, 1);
1482 ptr = reader_get_ptr(reader);
1485 return S_OK;
1488 /* This one is used to parse significant whitespace nodes, like in Misc production */
1489 static HRESULT reader_parse_whitespace(xmlreader *reader)
1491 WCHAR *start, *ptr;
1493 reader_shrink(reader);
1494 start = reader_get_ptr(reader);
1496 reader_skipspaces(reader);
1497 ptr = reader_get_ptr(reader);
1498 TRACE("%s\n", debugstr_wn(start, ptr-start));
1500 reader->nodetype = XmlNodeType_Whitespace;
1501 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1502 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1503 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1504 return S_OK;
1507 /* [27] Misc ::= Comment | PI | S */
1508 static HRESULT reader_parse_misc(xmlreader *reader)
1510 HRESULT hr = S_FALSE;
1512 if (reader->resumestate != XmlReadResumeState_Initial)
1514 hr = reader_more(reader);
1515 if (FAILED(hr)) return hr;
1517 /* finish current node */
1518 switch (reader->resumestate)
1520 case XmlReadResumeState_PITarget:
1521 case XmlReadResumeState_PIBody:
1522 return reader_parse_pi(reader);
1523 case XmlReadResumeState_Comment:
1524 return reader_parse_comment(reader);
1525 default:
1526 ERR("unknown resume state %d\n", reader->resumestate);
1530 while (1)
1532 const WCHAR *cur = reader_get_ptr(reader);
1534 if (is_wchar_space(*cur))
1535 hr = reader_parse_whitespace(reader);
1536 else if (!reader_cmp(reader, commentW))
1537 hr = reader_parse_comment(reader);
1538 else if (!reader_cmp(reader, piW))
1539 hr = reader_parse_pi(reader);
1540 else
1541 break;
1543 if (hr != S_FALSE) return hr;
1546 return hr;
1549 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1550 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1552 WCHAR *cur = reader_get_ptr(reader), quote;
1553 UINT start;
1555 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1557 quote = *cur;
1558 reader_skipn(reader, 1);
1560 cur = reader_get_ptr(reader);
1561 start = reader_get_cur(reader);
1562 while (is_char(*cur) && *cur != quote)
1564 reader_skipn(reader, 1);
1565 cur = reader_get_ptr(reader);
1567 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1568 if (*cur == quote) reader_skipn(reader, 1);
1570 TRACE("%s\n", debug_strval(reader, literal));
1571 return S_OK;
1574 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1575 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1576 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1578 WCHAR *cur = reader_get_ptr(reader), quote;
1579 UINT start;
1581 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1583 quote = *cur;
1584 reader_skipn(reader, 1);
1586 start = reader_get_cur(reader);
1587 cur = reader_get_ptr(reader);
1588 while (is_pubchar(*cur) && *cur != quote)
1590 reader_skipn(reader, 1);
1591 cur = reader_get_ptr(reader);
1594 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1595 TRACE("%s\n", debug_strval(reader, literal));
1596 return S_OK;
1599 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1600 static HRESULT reader_parse_externalid(xmlreader *reader)
1602 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1603 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1604 strval name;
1605 HRESULT hr;
1606 int cnt;
1608 if (reader_cmp(reader, systemW))
1610 if (reader_cmp(reader, publicW))
1611 return S_FALSE;
1612 else
1614 strval pub;
1616 /* public id */
1617 reader_skipn(reader, 6);
1618 cnt = reader_skipspaces(reader);
1619 if (!cnt) return WC_E_WHITESPACE;
1621 hr = reader_parse_pub_literal(reader, &pub);
1622 if (FAILED(hr)) return hr;
1624 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1625 return reader_add_attr(reader, &name, &pub);
1628 else
1630 strval sys;
1632 /* system id */
1633 reader_skipn(reader, 6);
1634 cnt = reader_skipspaces(reader);
1635 if (!cnt) return WC_E_WHITESPACE;
1637 hr = reader_parse_sys_literal(reader, &sys);
1638 if (FAILED(hr)) return hr;
1640 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1641 return reader_add_attr(reader, &name, &sys);
1644 return hr;
1647 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1648 static HRESULT reader_parse_dtd(xmlreader *reader)
1650 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1651 strval name;
1652 WCHAR *cur;
1653 HRESULT hr;
1655 /* check if we have "<!DOCTYPE" */
1656 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1657 reader_shrink(reader);
1659 /* DTD processing is not allowed by default */
1660 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1662 reader_skipn(reader, 9);
1663 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1665 /* name */
1666 hr = reader_parse_name(reader, &name);
1667 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1669 reader_skipspaces(reader);
1671 hr = reader_parse_externalid(reader);
1672 if (FAILED(hr)) return hr;
1674 reader_skipspaces(reader);
1676 cur = reader_get_ptr(reader);
1677 if (*cur != '>')
1679 FIXME("internal subset parsing not implemented\n");
1680 return E_NOTIMPL;
1683 /* skip '>' */
1684 reader_skipn(reader, 1);
1686 reader->nodetype = XmlNodeType_DocumentType;
1687 reader_set_strvalue(reader, StringValue_LocalName, &name);
1688 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1690 return S_OK;
1693 /* [11 NS] LocalPart ::= NCName */
1694 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1696 WCHAR *ptr;
1697 UINT start;
1699 if (reader->resume[XmlReadResume_Local])
1701 start = reader->resume[XmlReadResume_Local];
1702 ptr = reader_get_ptr(reader);
1704 else
1706 ptr = reader_get_ptr(reader);
1707 start = reader_get_cur(reader);
1710 while (is_ncnamechar(*ptr))
1712 reader_skipn(reader, 1);
1713 ptr = reader_get_ptr(reader);
1716 if (is_reader_pending(reader))
1718 reader->resume[XmlReadResume_Local] = start;
1719 return E_PENDING;
1721 else
1722 reader->resume[XmlReadResume_Local] = 0;
1724 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1726 return S_OK;
1729 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1730 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1731 [9 NS] UnprefixedName ::= LocalPart
1732 [10 NS] Prefix ::= NCName */
1733 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1735 WCHAR *ptr;
1736 UINT start;
1737 HRESULT hr;
1739 if (reader->resume[XmlReadResume_Name])
1741 start = reader->resume[XmlReadResume_Name];
1742 ptr = reader_get_ptr(reader);
1744 else
1746 ptr = reader_get_ptr(reader);
1747 start = reader_get_cur(reader);
1748 reader->resume[XmlReadResume_Name] = start;
1749 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1752 if (reader->resume[XmlReadResume_Local])
1754 hr = reader_parse_local(reader, local);
1755 if (FAILED(hr)) return hr;
1757 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1758 local->start - reader->resume[XmlReadResume_Name] - 1,
1759 prefix);
1761 else
1763 /* skip prefix part */
1764 while (is_ncnamechar(*ptr))
1766 reader_skipn(reader, 1);
1767 ptr = reader_get_ptr(reader);
1770 if (is_reader_pending(reader)) return E_PENDING;
1772 /* got a qualified name */
1773 if (*ptr == ':')
1775 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1777 /* skip ':' */
1778 reader_skipn(reader, 1);
1779 hr = reader_parse_local(reader, local);
1780 if (FAILED(hr)) return hr;
1782 else
1784 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1785 reader_init_strvalue(0, 0, prefix);
1789 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1791 if (prefix->len)
1792 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1793 else
1794 TRACE("ncname %s\n", debug_strval(reader, local));
1796 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1797 /* count ':' too */
1798 (prefix->len ? prefix->len + 1 : 0) + local->len,
1799 qname);
1801 reader->resume[XmlReadResume_Name] = 0;
1802 reader->resume[XmlReadResume_Local] = 0;
1804 return S_OK;
1807 /* Applies normalization rules to a single char, used for attribute values.
1809 Rules include 2 steps:
1811 1) replacing \r\n with a single \n;
1812 2) replacing all whitespace chars with ' '.
1815 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1817 encoded_buffer *buffer = &reader->input->buffer->utf16;
1819 if (!is_wchar_space(*ptr)) return;
1821 if (*ptr == '\r' && *(ptr+1) == '\n')
1823 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1824 memmove(ptr+1, ptr+2, len);
1826 *ptr = ' ';
1829 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1831 static const WCHAR entltW[] = {'l','t'};
1832 static const WCHAR entgtW[] = {'g','t'};
1833 static const WCHAR entampW[] = {'a','m','p'};
1834 static const WCHAR entaposW[] = {'a','p','o','s'};
1835 static const WCHAR entquotW[] = {'q','u','o','t'};
1836 static const strval lt = { (WCHAR*)entltW, 2 };
1837 static const strval gt = { (WCHAR*)entgtW, 2 };
1838 static const strval amp = { (WCHAR*)entampW, 3 };
1839 static const strval apos = { (WCHAR*)entaposW, 4 };
1840 static const strval quot = { (WCHAR*)entquotW, 4 };
1841 WCHAR *str = reader_get_strptr(reader, name);
1843 switch (*str)
1845 case 'l':
1846 if (strval_eq(reader, name, &lt)) return '<';
1847 break;
1848 case 'g':
1849 if (strval_eq(reader, name, &gt)) return '>';
1850 break;
1851 case 'a':
1852 if (strval_eq(reader, name, &amp))
1853 return '&';
1854 else if (strval_eq(reader, name, &apos))
1855 return '\'';
1856 break;
1857 case 'q':
1858 if (strval_eq(reader, name, &quot)) return '\"';
1859 break;
1860 default:
1864 return 0;
1867 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1868 [67] Reference ::= EntityRef | CharRef
1869 [68] EntityRef ::= '&' Name ';' */
1870 static HRESULT reader_parse_reference(xmlreader *reader)
1872 encoded_buffer *buffer = &reader->input->buffer->utf16;
1873 WCHAR *start = reader_get_ptr(reader), *ptr;
1874 UINT cur = reader_get_cur(reader);
1875 WCHAR ch = 0;
1876 int len;
1878 /* skip '&' */
1879 reader_skipn(reader, 1);
1880 ptr = reader_get_ptr(reader);
1882 if (*ptr == '#')
1884 reader_skipn(reader, 1);
1885 ptr = reader_get_ptr(reader);
1887 /* hex char or decimal */
1888 if (*ptr == 'x')
1890 reader_skipn(reader, 1);
1891 ptr = reader_get_ptr(reader);
1893 while (*ptr != ';')
1895 if ((*ptr >= '0' && *ptr <= '9'))
1896 ch = ch*16 + *ptr - '0';
1897 else if ((*ptr >= 'a' && *ptr <= 'f'))
1898 ch = ch*16 + *ptr - 'a' + 10;
1899 else if ((*ptr >= 'A' && *ptr <= 'F'))
1900 ch = ch*16 + *ptr - 'A' + 10;
1901 else
1902 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
1903 reader_skipn(reader, 1);
1904 ptr = reader_get_ptr(reader);
1907 else
1909 while (*ptr != ';')
1911 if ((*ptr >= '0' && *ptr <= '9'))
1913 ch = ch*10 + *ptr - '0';
1914 reader_skipn(reader, 1);
1915 ptr = reader_get_ptr(reader);
1917 else
1918 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
1922 if (!is_char(ch)) return WC_E_XMLCHARACTER;
1924 /* normalize */
1925 if (is_wchar_space(ch)) ch = ' ';
1927 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1928 memmove(start+1, ptr+1, len);
1929 buffer->cur = cur + 1;
1931 *start = ch;
1933 else
1935 strval name;
1936 HRESULT hr;
1938 hr = reader_parse_name(reader, &name);
1939 if (FAILED(hr)) return hr;
1941 ptr = reader_get_ptr(reader);
1942 if (*ptr != ';') return WC_E_SEMICOLON;
1944 /* predefined entities resolve to a single character */
1945 ch = get_predefined_entity(reader, &name);
1946 if (ch)
1948 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1949 memmove(start+1, ptr+1, len);
1950 buffer->cur = cur + 1;
1952 *start = ch;
1954 else
1956 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
1957 return WC_E_UNDECLAREDENTITY;
1962 return S_OK;
1965 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1966 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
1968 WCHAR *ptr, quote;
1969 UINT start;
1971 ptr = reader_get_ptr(reader);
1973 /* skip opening quote */
1974 quote = *ptr;
1975 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
1976 reader_skipn(reader, 1);
1978 ptr = reader_get_ptr(reader);
1979 start = reader_get_cur(reader);
1980 while (*ptr)
1982 if (*ptr == '<') return WC_E_LESSTHAN;
1984 if (*ptr == quote)
1986 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
1987 /* skip closing quote */
1988 reader_skipn(reader, 1);
1989 return S_OK;
1992 if (*ptr == '&')
1994 HRESULT hr = reader_parse_reference(reader);
1995 if (FAILED(hr)) return hr;
1997 else
1999 reader_normalize_space(reader, ptr);
2000 reader_skipn(reader, 1);
2002 ptr = reader_get_ptr(reader);
2005 return WC_E_QUOTE;
2008 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2009 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2010 [3 NS] DefaultAttName ::= 'xmlns'
2011 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2012 static HRESULT reader_parse_attribute(xmlreader *reader)
2014 static const WCHAR xmlnsW[] = {'x','m','l','n','s',0};
2015 strval prefix, local, qname, xmlns, value;
2016 HRESULT hr;
2018 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2019 if (FAILED(hr)) return hr;
2021 reader_init_cstrvalue((WCHAR*)xmlnsW, 5, &xmlns);
2023 if (strval_eq(reader, &prefix, &xmlns))
2025 FIXME("namespace definitions not supported\n");
2026 return E_NOTIMPL;
2029 if (strval_eq(reader, &qname, &xmlns))
2031 FIXME("default namespace definitions not supported\n");
2032 return E_NOTIMPL;
2035 hr = reader_parse_eq(reader);
2036 if (FAILED(hr)) return hr;
2038 hr = reader_parse_attvalue(reader, &value);
2039 if (FAILED(hr)) return hr;
2041 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2042 return reader_add_attr(reader, &local, &value);
2045 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2046 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2047 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2049 HRESULT hr;
2051 hr = reader_parse_qname(reader, prefix, local, qname);
2052 if (FAILED(hr)) return hr;
2054 while (1)
2056 static const WCHAR endW[] = {'/','>',0};
2058 reader_skipspaces(reader);
2060 /* empty element */
2061 if ((*empty = !reader_cmp(reader, endW)))
2063 /* skip '/>' */
2064 reader_skipn(reader, 2);
2065 reader->empty_element = TRUE;
2066 return S_OK;
2069 /* got a start tag */
2070 if (!reader_cmp(reader, gtW))
2072 /* skip '>' */
2073 reader_skipn(reader, 1);
2074 return reader_push_element(reader, qname, local);
2077 hr = reader_parse_attribute(reader);
2078 if (FAILED(hr)) return hr;
2081 return S_OK;
2084 /* [39] element ::= EmptyElemTag | STag content ETag */
2085 static HRESULT reader_parse_element(xmlreader *reader)
2087 HRESULT hr;
2089 switch (reader->resumestate)
2091 case XmlReadResumeState_Initial:
2092 /* check if we are really on element */
2093 if (reader_cmp(reader, ltW)) return S_FALSE;
2095 /* skip '<' */
2096 reader_skipn(reader, 1);
2098 reader_shrink(reader);
2099 reader->resumestate = XmlReadResumeState_STag;
2100 case XmlReadResumeState_STag:
2102 strval qname, prefix, local;
2103 int empty = 0;
2105 /* this handles empty elements too */
2106 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2107 if (FAILED(hr)) return hr;
2109 /* FIXME: need to check for defined namespace to reject invalid prefix,
2110 currently reject all prefixes */
2111 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
2113 /* if we got empty element and stack is empty go straight to Misc */
2114 if (empty && list_empty(&reader->elements))
2115 reader->instate = XmlReadInState_MiscEnd;
2116 else
2117 reader->instate = XmlReadInState_Content;
2119 reader->nodetype = XmlNodeType_Element;
2120 reader->resumestate = XmlReadResumeState_Initial;
2121 reader_set_strvalue(reader, StringValue_LocalName, &local);
2122 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2123 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2124 break;
2126 default:
2127 hr = E_FAIL;
2130 return hr;
2133 /* [13 NS] ETag ::= '</' QName S? '>' */
2134 static HRESULT reader_parse_endtag(xmlreader *reader)
2136 strval prefix, local, qname;
2137 struct element *elem;
2138 HRESULT hr;
2140 /* skip '</' */
2141 reader_skipn(reader, 2);
2143 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2144 if (FAILED(hr)) return hr;
2146 reader_skipspaces(reader);
2148 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2150 /* skip '>' */
2151 reader_skipn(reader, 1);
2153 /* Element stack should never be empty at this point, cause we shouldn't get to
2154 content parsing if it's empty. */
2155 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2156 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2158 reader_pop_element(reader);
2160 /* It was a root element, the rest is expected as Misc */
2161 if (list_empty(&reader->elements))
2162 reader->instate = XmlReadInState_MiscEnd;
2164 reader->nodetype = XmlNodeType_EndElement;
2165 reader_set_strvalue(reader, StringValue_LocalName, &local);
2166 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2168 return S_OK;
2171 /* [18] CDSect ::= CDStart CData CDEnd
2172 [19] CDStart ::= '<![CDATA['
2173 [20] CData ::= (Char* - (Char* ']]>' Char*))
2174 [21] CDEnd ::= ']]>' */
2175 static HRESULT reader_parse_cdata(xmlreader *reader)
2177 WCHAR *ptr;
2178 UINT start;
2180 if (reader->resumestate == XmlReadResumeState_CDATA)
2182 start = reader->resume[XmlReadResume_Body];
2183 ptr = reader_get_ptr(reader);
2185 else
2187 /* skip markup '<![CDATA[' */
2188 reader_skipn(reader, 9);
2189 reader_shrink(reader);
2190 ptr = reader_get_ptr(reader);
2191 start = reader_get_cur(reader);
2192 reader->nodetype = XmlNodeType_CDATA;
2193 reader->resume[XmlReadResume_Body] = start;
2194 reader->resumestate = XmlReadResumeState_CDATA;
2195 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2196 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2197 reader_set_strvalue(reader, StringValue_Value, NULL);
2200 while (*ptr)
2202 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2204 strval value;
2206 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2208 /* skip ']]>' */
2209 reader_skipn(reader, 3);
2210 TRACE("%s\n", debug_strval(reader, &value));
2212 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2213 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2214 reader_set_strvalue(reader, StringValue_Value, &value);
2215 reader->resume[XmlReadResume_Body] = 0;
2216 reader->resumestate = XmlReadResumeState_Initial;
2217 return S_OK;
2219 else
2221 /* Value normalization is not fully implemented, rules are:
2223 - single '\r' -> '\n';
2224 - sequence '\r\n' -> '\n', in this case value length changes;
2226 if (*ptr == '\r') *ptr = '\n';
2227 reader_skipn(reader, 1);
2228 ptr++;
2232 return S_OK;
2235 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2236 static HRESULT reader_parse_chardata(xmlreader *reader)
2238 WCHAR *ptr;
2239 UINT start;
2241 if (reader->resumestate == XmlReadResumeState_CharData)
2243 start = reader->resume[XmlReadResume_Body];
2244 ptr = reader_get_ptr(reader);
2246 else
2248 reader_shrink(reader);
2249 ptr = reader_get_ptr(reader);
2250 start = reader_get_cur(reader);
2251 /* There's no text */
2252 if (!*ptr || *ptr == '<') return S_OK;
2253 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2254 reader->resume[XmlReadResume_Body] = start;
2255 reader->resumestate = XmlReadResumeState_CharData;
2256 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2257 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2258 reader_set_strvalue(reader, StringValue_Value, NULL);
2261 while (*ptr)
2263 /* CDATA closing sequence ']]>' is not allowed */
2264 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2265 return WC_E_CDSECTEND;
2267 /* Found next markup part */
2268 if (ptr[0] == '<')
2270 strval value;
2272 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2273 reader_set_strvalue(reader, StringValue_Value, &value);
2274 reader->resume[XmlReadResume_Body] = 0;
2275 reader->resumestate = XmlReadResumeState_Initial;
2276 return S_OK;
2279 reader_skipn(reader, 1);
2281 /* this covers a case when text has leading whitespace chars */
2282 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2283 ptr++;
2286 return S_OK;
2289 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2290 static HRESULT reader_parse_content(xmlreader *reader)
2292 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2293 static const WCHAR etagW[] = {'<','/',0};
2294 static const WCHAR ampW[] = {'&',0};
2296 if (reader->resumestate != XmlReadResumeState_Initial)
2298 switch (reader->resumestate)
2300 case XmlReadResumeState_CDATA:
2301 return reader_parse_cdata(reader);
2302 case XmlReadResumeState_Comment:
2303 return reader_parse_comment(reader);
2304 case XmlReadResumeState_PIBody:
2305 case XmlReadResumeState_PITarget:
2306 return reader_parse_pi(reader);
2307 case XmlReadResumeState_CharData:
2308 return reader_parse_chardata(reader);
2309 default:
2310 ERR("unknown resume state %d\n", reader->resumestate);
2314 reader_shrink(reader);
2316 /* handle end tag here, it indicates end of content as well */
2317 if (!reader_cmp(reader, etagW))
2318 return reader_parse_endtag(reader);
2320 if (!reader_cmp(reader, commentW))
2321 return reader_parse_comment(reader);
2323 if (!reader_cmp(reader, piW))
2324 return reader_parse_pi(reader);
2326 if (!reader_cmp(reader, cdstartW))
2327 return reader_parse_cdata(reader);
2329 if (!reader_cmp(reader, ampW))
2330 return reader_parse_reference(reader);
2332 if (!reader_cmp(reader, ltW))
2333 return reader_parse_element(reader);
2335 /* what's left must be CharData */
2336 return reader_parse_chardata(reader);
2339 static HRESULT reader_parse_nextnode(xmlreader *reader)
2341 HRESULT hr;
2343 if (!is_reader_pending(reader))
2344 reader_clear_attrs(reader);
2346 while (1)
2348 switch (reader->instate)
2350 /* if it's a first call for a new input we need to detect stream encoding */
2351 case XmlReadInState_Initial:
2353 xml_encoding enc;
2355 hr = readerinput_growraw(reader->input);
2356 if (FAILED(hr)) return hr;
2358 /* try to detect encoding by BOM or data and set input code page */
2359 hr = readerinput_detectencoding(reader->input, &enc);
2360 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2361 if (FAILED(hr)) return hr;
2363 /* always switch first time cause we have to put something in */
2364 readerinput_switchencoding(reader->input, enc);
2366 /* parse xml declaration */
2367 hr = reader_parse_xmldecl(reader);
2368 if (FAILED(hr)) return hr;
2370 readerinput_shrinkraw(reader->input, -1);
2371 reader->instate = XmlReadInState_Misc_DTD;
2372 if (hr == S_OK) return hr;
2374 break;
2375 case XmlReadInState_Misc_DTD:
2376 hr = reader_parse_misc(reader);
2377 if (FAILED(hr)) return hr;
2379 if (hr == S_FALSE)
2380 reader->instate = XmlReadInState_DTD;
2381 else
2382 return hr;
2383 break;
2384 case XmlReadInState_DTD:
2385 hr = reader_parse_dtd(reader);
2386 if (FAILED(hr)) return hr;
2388 if (hr == S_OK)
2390 reader->instate = XmlReadInState_DTD_Misc;
2391 return hr;
2393 else
2394 reader->instate = XmlReadInState_Element;
2395 break;
2396 case XmlReadInState_DTD_Misc:
2397 hr = reader_parse_misc(reader);
2398 if (FAILED(hr)) return hr;
2400 if (hr == S_FALSE)
2401 reader->instate = XmlReadInState_Element;
2402 else
2403 return hr;
2404 break;
2405 case XmlReadInState_Element:
2406 return reader_parse_element(reader);
2407 case XmlReadInState_Content:
2408 return reader_parse_content(reader);
2409 case XmlReadInState_MiscEnd:
2410 hr = reader_parse_misc(reader);
2411 if (FAILED(hr)) return hr;
2413 if (hr == S_FALSE)
2414 reader->instate = XmlReadInState_Eof;
2415 return hr;
2416 case XmlReadInState_Eof:
2417 return S_FALSE;
2418 default:
2419 FIXME("internal state %d not handled\n", reader->instate);
2420 return E_NOTIMPL;
2424 return E_NOTIMPL;
2427 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2429 xmlreader *This = impl_from_IXmlReader(iface);
2431 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2433 if (IsEqualGUID(riid, &IID_IUnknown) ||
2434 IsEqualGUID(riid, &IID_IXmlReader))
2436 *ppvObject = iface;
2438 else
2440 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2441 *ppvObject = NULL;
2442 return E_NOINTERFACE;
2445 IXmlReader_AddRef(iface);
2447 return S_OK;
2450 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2452 xmlreader *This = impl_from_IXmlReader(iface);
2453 ULONG ref = InterlockedIncrement(&This->ref);
2454 TRACE("(%p)->(%d)\n", This, ref);
2455 return ref;
2458 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2460 xmlreader *This = impl_from_IXmlReader(iface);
2461 LONG ref = InterlockedDecrement(&This->ref);
2463 TRACE("(%p)->(%d)\n", This, ref);
2465 if (ref == 0)
2467 IMalloc *imalloc = This->imalloc;
2468 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2469 reader_clear_attrs(This);
2470 reader_clear_elements(This);
2471 reader_free_strvalues(This);
2472 reader_free(This, This);
2473 if (imalloc) IMalloc_Release(imalloc);
2476 return ref;
2479 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2481 xmlreader *This = impl_from_IXmlReader(iface);
2482 IXmlReaderInput *readerinput;
2483 HRESULT hr;
2485 TRACE("(%p)->(%p)\n", This, input);
2487 if (This->input)
2489 readerinput_release_stream(This->input);
2490 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2491 This->input = NULL;
2494 This->line = This->pos = 0;
2495 reader_clear_elements(This);
2496 This->depth = 0;
2497 This->resumestate = XmlReadResumeState_Initial;
2498 memset(This->resume, 0, sizeof(This->resume));
2500 /* just reset current input */
2501 if (!input)
2503 This->state = XmlReadState_Initial;
2504 return S_OK;
2507 /* now try IXmlReaderInput, ISequentialStream, IStream */
2508 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2509 if (hr == S_OK)
2511 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2512 This->input = impl_from_IXmlReaderInput(readerinput);
2513 else
2515 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2516 readerinput, readerinput->lpVtbl);
2517 IUnknown_Release(readerinput);
2518 return E_FAIL;
2523 if (hr != S_OK || !readerinput)
2525 /* create IXmlReaderInput basing on supplied interface */
2526 hr = CreateXmlReaderInputWithEncodingName(input,
2527 NULL, NULL, FALSE, NULL, &readerinput);
2528 if (hr != S_OK) return hr;
2529 This->input = impl_from_IXmlReaderInput(readerinput);
2532 /* set stream for supplied IXmlReaderInput */
2533 hr = readerinput_query_for_stream(This->input);
2534 if (hr == S_OK)
2536 This->state = XmlReadState_Initial;
2537 This->instate = XmlReadInState_Initial;
2540 return hr;
2543 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2545 xmlreader *This = impl_from_IXmlReader(iface);
2547 TRACE("(%p)->(%s %p)\n", This, debugstr_prop(property), value);
2549 if (!value) return E_INVALIDARG;
2551 switch (property)
2553 case XmlReaderProperty_DtdProcessing:
2554 *value = This->dtdmode;
2555 break;
2556 case XmlReaderProperty_ReadState:
2557 *value = This->state;
2558 break;
2559 default:
2560 FIXME("Unimplemented property (%u)\n", property);
2561 return E_NOTIMPL;
2564 return S_OK;
2567 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2569 xmlreader *This = impl_from_IXmlReader(iface);
2571 TRACE("(%p)->(%s %lu)\n", This, debugstr_prop(property), value);
2573 switch (property)
2575 case XmlReaderProperty_DtdProcessing:
2576 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2577 This->dtdmode = value;
2578 break;
2579 default:
2580 FIXME("Unimplemented property (%u)\n", property);
2581 return E_NOTIMPL;
2584 return S_OK;
2587 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2589 xmlreader *This = impl_from_IXmlReader(iface);
2590 XmlNodeType oldtype = This->nodetype;
2591 HRESULT hr;
2593 TRACE("(%p)->(%p)\n", This, nodetype);
2595 if (This->state == XmlReadState_Closed) return S_FALSE;
2597 hr = reader_parse_nextnode(This);
2598 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2599 This->state = XmlReadState_Interactive;
2600 if (hr == S_OK)
2602 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2603 *nodetype = This->nodetype;
2606 return hr;
2609 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2611 xmlreader *This = impl_from_IXmlReader(iface);
2612 TRACE("(%p)->(%p)\n", This, node_type);
2614 *node_type = reader_get_nodetype(This);
2615 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2618 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2620 xmlreader *This = impl_from_IXmlReader(iface);
2622 TRACE("(%p)\n", This);
2624 if (!This->attr_count) return S_FALSE;
2625 This->attr = LIST_ENTRY(list_head(&This->attrs), struct attribute, entry);
2626 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2627 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2629 return S_OK;
2632 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2634 xmlreader *This = impl_from_IXmlReader(iface);
2635 const struct list *next;
2637 TRACE("(%p)\n", This);
2639 if (!This->attr_count) return S_FALSE;
2641 if (!This->attr)
2642 return IXmlReader_MoveToFirstAttribute(iface);
2644 next = list_next(&This->attrs, &This->attr->entry);
2645 if (next)
2647 This->attr = LIST_ENTRY(next, struct attribute, entry);
2648 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2649 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2652 return next ? S_OK : S_FALSE;
2655 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2656 LPCWSTR local_name,
2657 LPCWSTR namespaceUri)
2659 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2660 return E_NOTIMPL;
2663 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2665 xmlreader *This = impl_from_IXmlReader(iface);
2666 struct element *elem;
2668 TRACE("(%p)\n", This);
2670 if (!This->attr_count) return S_FALSE;
2671 This->attr = NULL;
2673 /* FIXME: support other node types with 'attributes' like DTD */
2674 elem = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2675 if (elem)
2677 reader_set_strvalue(This, StringValue_QualifiedName, &elem->qname);
2678 reader_set_strvalue(This, StringValue_LocalName, &elem->localname);
2681 return S_OK;
2684 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2686 xmlreader *This = impl_from_IXmlReader(iface);
2688 TRACE("(%p)->(%p %p)\n", This, name, len);
2689 *name = This->strvalues[StringValue_QualifiedName].str;
2690 *len = This->strvalues[StringValue_QualifiedName].len;
2691 return S_OK;
2694 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface,
2695 LPCWSTR *namespaceUri,
2696 UINT *namespaceUri_length)
2698 FIXME("(%p %p %p): stub\n", iface, namespaceUri, namespaceUri_length);
2699 return E_NOTIMPL;
2702 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2704 xmlreader *This = impl_from_IXmlReader(iface);
2706 TRACE("(%p)->(%p %p)\n", This, name, len);
2707 *name = This->strvalues[StringValue_LocalName].str;
2708 if (len) *len = This->strvalues[StringValue_LocalName].len;
2709 return S_OK;
2712 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2714 xmlreader *This = impl_from_IXmlReader(iface);
2716 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2717 *prefix = This->strvalues[StringValue_Prefix].str;
2718 if (len) *len = This->strvalues[StringValue_Prefix].len;
2719 return S_OK;
2722 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
2724 xmlreader *reader = impl_from_IXmlReader(iface);
2725 strval *val = &reader->strvalues[StringValue_Value];
2727 TRACE("(%p)->(%p %p)\n", reader, value, len);
2729 *value = NULL;
2731 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
2733 XmlNodeType type;
2734 HRESULT hr;
2736 hr = IXmlReader_Read(iface, &type);
2737 if (FAILED(hr)) return hr;
2739 /* return if still pending, partially read values are not reported */
2740 if (is_reader_pending(reader)) return E_PENDING;
2743 if (!val->str)
2745 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
2746 if (!ptr) return E_OUTOFMEMORY;
2747 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
2748 ptr[val->len] = 0;
2749 val->str = ptr;
2752 *value = val->str;
2753 if (len) *len = val->len;
2754 return S_OK;
2757 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
2759 xmlreader *reader = impl_from_IXmlReader(iface);
2760 strval *val = &reader->strvalues[StringValue_Value];
2761 UINT len;
2763 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
2765 /* Value is already allocated, chunked reads are not possible. */
2766 if (val->str) return S_FALSE;
2768 if (val->len)
2770 len = min(chunk_size, val->len);
2771 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
2772 val->start += len;
2773 val->len -= len;
2774 if (read) *read = len;
2777 return S_OK;
2780 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
2781 LPCWSTR *baseUri,
2782 UINT *baseUri_length)
2784 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
2785 return E_NOTIMPL;
2788 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
2790 FIXME("(%p): stub\n", iface);
2791 return FALSE;
2794 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
2796 xmlreader *This = impl_from_IXmlReader(iface);
2797 TRACE("(%p)\n", This);
2798 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2799 when current node is start tag of an element */
2800 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->empty_element : FALSE;
2803 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
2805 xmlreader *This = impl_from_IXmlReader(iface);
2807 TRACE("(%p %p)\n", This, lineNumber);
2809 if (!lineNumber) return E_INVALIDARG;
2811 *lineNumber = This->line;
2813 return S_OK;
2816 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
2818 xmlreader *This = impl_from_IXmlReader(iface);
2820 TRACE("(%p %p)\n", This, linePosition);
2822 if (!linePosition) return E_INVALIDARG;
2824 *linePosition = This->pos;
2826 return S_OK;
2829 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
2831 xmlreader *This = impl_from_IXmlReader(iface);
2833 TRACE("(%p)->(%p)\n", This, count);
2835 if (!count) return E_INVALIDARG;
2837 *count = This->attr_count;
2838 return S_OK;
2841 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
2843 xmlreader *This = impl_from_IXmlReader(iface);
2844 TRACE("(%p)->(%p)\n", This, depth);
2845 *depth = This->depth;
2846 return S_OK;
2849 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
2851 FIXME("(%p): stub\n", iface);
2852 return E_NOTIMPL;
2855 static const struct IXmlReaderVtbl xmlreader_vtbl =
2857 xmlreader_QueryInterface,
2858 xmlreader_AddRef,
2859 xmlreader_Release,
2860 xmlreader_SetInput,
2861 xmlreader_GetProperty,
2862 xmlreader_SetProperty,
2863 xmlreader_Read,
2864 xmlreader_GetNodeType,
2865 xmlreader_MoveToFirstAttribute,
2866 xmlreader_MoveToNextAttribute,
2867 xmlreader_MoveToAttributeByName,
2868 xmlreader_MoveToElement,
2869 xmlreader_GetQualifiedName,
2870 xmlreader_GetNamespaceUri,
2871 xmlreader_GetLocalName,
2872 xmlreader_GetPrefix,
2873 xmlreader_GetValue,
2874 xmlreader_ReadValueChunk,
2875 xmlreader_GetBaseUri,
2876 xmlreader_IsDefault,
2877 xmlreader_IsEmptyElement,
2878 xmlreader_GetLineNumber,
2879 xmlreader_GetLinePosition,
2880 xmlreader_GetAttributeCount,
2881 xmlreader_GetDepth,
2882 xmlreader_IsEOF
2885 /** IXmlReaderInput **/
2886 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
2888 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2890 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2892 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
2893 IsEqualGUID(riid, &IID_IUnknown))
2895 *ppvObject = iface;
2897 else
2899 WARN("interface %s not implemented\n", debugstr_guid(riid));
2900 *ppvObject = NULL;
2901 return E_NOINTERFACE;
2904 IUnknown_AddRef(iface);
2906 return S_OK;
2909 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
2911 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2912 ULONG ref = InterlockedIncrement(&This->ref);
2913 TRACE("(%p)->(%d)\n", This, ref);
2914 return ref;
2917 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
2919 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2920 LONG ref = InterlockedDecrement(&This->ref);
2922 TRACE("(%p)->(%d)\n", This, ref);
2924 if (ref == 0)
2926 IMalloc *imalloc = This->imalloc;
2927 if (This->input) IUnknown_Release(This->input);
2928 if (This->stream) ISequentialStream_Release(This->stream);
2929 if (This->buffer) free_input_buffer(This->buffer);
2930 readerinput_free(This, This->baseuri);
2931 readerinput_free(This, This);
2932 if (imalloc) IMalloc_Release(imalloc);
2935 return ref;
2938 static const struct IUnknownVtbl xmlreaderinputvtbl =
2940 xmlreaderinput_QueryInterface,
2941 xmlreaderinput_AddRef,
2942 xmlreaderinput_Release
2945 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
2947 xmlreader *reader;
2948 int i;
2950 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
2952 if (!IsEqualGUID(riid, &IID_IXmlReader))
2954 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
2955 return E_FAIL;
2958 if (imalloc)
2959 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
2960 else
2961 reader = heap_alloc(sizeof(*reader));
2962 if(!reader) return E_OUTOFMEMORY;
2964 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
2965 reader->ref = 1;
2966 reader->input = NULL;
2967 reader->state = XmlReadState_Closed;
2968 reader->instate = XmlReadInState_Initial;
2969 reader->resumestate = XmlReadResumeState_Initial;
2970 reader->dtdmode = DtdProcessing_Prohibit;
2971 reader->line = reader->pos = 0;
2972 reader->imalloc = imalloc;
2973 if (imalloc) IMalloc_AddRef(imalloc);
2974 reader->nodetype = XmlNodeType_None;
2975 list_init(&reader->attrs);
2976 reader->attr_count = 0;
2977 reader->attr = NULL;
2978 list_init(&reader->elements);
2979 reader->depth = 0;
2980 reader->max_depth = 256;
2981 reader->empty_element = FALSE;
2982 memset(reader->resume, 0, sizeof(reader->resume));
2984 for (i = 0; i < StringValue_Last; i++)
2985 reader->strvalues[i] = strval_empty;
2987 *obj = &reader->IXmlReader_iface;
2989 TRACE("returning iface %p\n", *obj);
2991 return S_OK;
2994 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
2995 IMalloc *imalloc,
2996 LPCWSTR encoding,
2997 BOOL hint,
2998 LPCWSTR base_uri,
2999 IXmlReaderInput **ppInput)
3001 xmlreaderinput *readerinput;
3002 HRESULT hr;
3004 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3005 hint, wine_dbgstr_w(base_uri), ppInput);
3007 if (!stream || !ppInput) return E_INVALIDARG;
3009 if (imalloc)
3010 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3011 else
3012 readerinput = heap_alloc(sizeof(*readerinput));
3013 if(!readerinput) return E_OUTOFMEMORY;
3015 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3016 readerinput->ref = 1;
3017 readerinput->imalloc = imalloc;
3018 readerinput->stream = NULL;
3019 if (imalloc) IMalloc_AddRef(imalloc);
3020 readerinput->encoding = parse_encoding_name(encoding, -1);
3021 readerinput->hint = hint;
3022 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3023 readerinput->pending = 0;
3025 hr = alloc_input_buffer(readerinput);
3026 if (hr != S_OK)
3028 readerinput_free(readerinput, readerinput->baseuri);
3029 readerinput_free(readerinput, readerinput);
3030 if (imalloc) IMalloc_Release(imalloc);
3031 return hr;
3033 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3035 *ppInput = &readerinput->IXmlReaderInput_iface;
3037 TRACE("returning iface %p\n", *ppInput);
3039 return S_OK;