wbemprox: Implement Win32_CDROMDrive.MediaType.
[wine/wine-gecko.git] / dlls / xmllite / reader.c
blob6e51d5efe4b9bf5b33cbf55d284958ccc7a50b69
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW[] = {'\"',0};
90 static const WCHAR quoteW[] = {'\'',0};
91 static const WCHAR ltW[] = {'<',0};
92 static const WCHAR gtW[] = {'>',0};
93 static const WCHAR commentW[] = {'<','!','-','-',0};
94 static const WCHAR piW[] = {'<','?',0};
96 static const char *debugstr_nodetype(XmlNodeType nodetype)
98 static const char * const type_names[] =
100 "None",
101 "Element",
102 "Attribute",
103 "Text",
104 "CDATA",
107 "ProcessingInstruction",
108 "Comment",
110 "DocumentType",
113 "Whitespace",
115 "EndElement",
117 "XmlDeclaration"
120 if (nodetype > _XmlNodeType_Last)
121 return wine_dbg_sprintf("unknown type=%d", nodetype);
123 return type_names[nodetype];
126 static const char *debugstr_prop(XmlReaderProperty prop)
128 static const char * const prop_names[] =
130 "MultiLanguage",
131 "ConformanceLevel",
132 "RandomAccess",
133 "XmlResolver",
134 "DtdProcessing",
135 "ReadState",
136 "MaxElementDepth",
137 "MaxEntityExpansion"
140 if (prop > _XmlReaderProperty_Last)
141 return wine_dbg_sprintf("unknown property=%d", prop);
143 return prop_names[prop];
146 struct xml_encoding_data
148 const WCHAR *name;
149 xml_encoding enc;
150 UINT cp;
153 static const struct xml_encoding_data xml_encoding_map[] = {
154 { utf16W, XmlEncoding_UTF16, ~0 },
155 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
158 typedef struct
160 char *data;
161 UINT cur;
162 unsigned int allocated;
163 unsigned int written;
164 } encoded_buffer;
166 typedef struct input_buffer input_buffer;
168 typedef struct
170 IXmlReaderInput IXmlReaderInput_iface;
171 LONG ref;
172 /* reference passed on IXmlReaderInput creation, is kept when input is created */
173 IUnknown *input;
174 IMalloc *imalloc;
175 xml_encoding encoding;
176 BOOL hint;
177 WCHAR *baseuri;
178 /* stream reference set after SetInput() call from reader,
179 stored as sequential stream, cause currently
180 optimizations possible with IStream aren't implemented */
181 ISequentialStream *stream;
182 input_buffer *buffer;
183 unsigned int pending : 1;
184 } xmlreaderinput;
186 static const struct IUnknownVtbl xmlreaderinputvtbl;
188 /* Structure to hold parsed string of specific length.
190 Reader stores node value as 'start' pointer, on request
191 a null-terminated version of it is allocated.
193 To init a strval variable use reader_init_strval(),
194 to set strval as a reader value use reader_set_strval().
196 typedef struct
198 WCHAR *str; /* allocated null-terminated string */
199 UINT len; /* length in WCHARs, altered after ReadValueChunk */
200 UINT start; /* input position where value starts */
201 } strval;
203 static WCHAR emptyW[] = {0};
204 static const strval strval_empty = { emptyW };
206 struct attribute
208 struct list entry;
209 strval localname;
210 strval value;
213 struct element
215 struct list entry;
216 strval qname;
217 strval localname;
220 typedef struct
222 IXmlReader IXmlReader_iface;
223 LONG ref;
224 xmlreaderinput *input;
225 IMalloc *imalloc;
226 XmlReadState state;
227 XmlReaderInternalState instate;
228 XmlReaderResumeState resumestate;
229 XmlNodeType nodetype;
230 DtdProcessing dtdmode;
231 UINT line, pos; /* reader position in XML stream */
232 struct list attrs; /* attributes list for current node */
233 struct attribute *attr; /* current attribute */
234 UINT attr_count;
235 struct list elements;
236 strval strvalues[StringValue_Last];
237 UINT depth;
238 UINT max_depth;
239 BOOL empty_element;
240 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
241 } xmlreader;
243 struct input_buffer
245 encoded_buffer utf16;
246 encoded_buffer encoded;
247 UINT code_page;
248 xmlreaderinput *input;
251 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
253 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
256 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
258 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
261 static inline void *m_realloc(IMalloc *imalloc, void *mem, size_t len)
263 if (imalloc)
264 return IMalloc_Realloc(imalloc, mem, len);
265 else
266 return heap_realloc(mem, len);
269 /* reader memory allocation functions */
270 static inline void *reader_alloc(xmlreader *reader, size_t len)
272 return m_alloc(reader->imalloc, len);
275 static inline void reader_free(xmlreader *reader, void *mem)
277 m_free(reader->imalloc, mem);
280 /* Just return pointer from offset, no attempt to read more. */
281 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
283 encoded_buffer *buffer = &reader->input->buffer->utf16;
284 return (WCHAR*)buffer->data + offset;
287 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
289 return v->str ? v->str : reader_get_ptr2(reader, v->start);
292 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
294 *dest = *src;
296 if (src->str != strval_empty.str)
298 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
299 if (!dest->str) return E_OUTOFMEMORY;
300 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
301 dest->str[dest->len] = 0;
302 dest->start = 0;
305 return S_OK;
308 /* reader input memory allocation functions */
309 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
311 return m_alloc(input->imalloc, len);
314 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
316 return m_realloc(input->imalloc, mem, len);
319 static inline void readerinput_free(xmlreaderinput *input, void *mem)
321 m_free(input->imalloc, mem);
324 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
326 LPWSTR ret = NULL;
328 if(str) {
329 DWORD size;
331 size = (strlenW(str)+1)*sizeof(WCHAR);
332 ret = readerinput_alloc(input, size);
333 if (ret) memcpy(ret, str, size);
336 return ret;
339 static void reader_clear_attrs(xmlreader *reader)
341 struct attribute *attr, *attr2;
342 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
344 reader_free(reader, attr);
346 list_init(&reader->attrs);
347 reader->attr_count = 0;
348 reader->attr = NULL;
351 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
352 while we are on a node with attributes */
353 static HRESULT reader_add_attr(xmlreader *reader, strval *localname, strval *value)
355 struct attribute *attr;
357 attr = reader_alloc(reader, sizeof(*attr));
358 if (!attr) return E_OUTOFMEMORY;
360 attr->localname = *localname;
361 attr->value = *value;
362 list_add_tail(&reader->attrs, &attr->entry);
363 reader->attr_count++;
365 return S_OK;
368 /* This one frees stored string value if needed */
369 static void reader_free_strvalued(xmlreader *reader, strval *v)
371 if (v->str != strval_empty.str)
373 reader_free(reader, v->str);
374 *v = strval_empty;
378 /* returns length in WCHARs from 'start' to current buffer offset */
379 static inline UINT reader_get_len(const xmlreader *reader, UINT start)
381 return reader->input->buffer->utf16.cur - start;
384 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
386 v->start = start;
387 v->len = len;
388 v->str = NULL;
391 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
393 return debugstr_wn(reader_get_strptr(reader, v), v->len);
396 /* used to initialize from constant string */
397 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
399 v->start = 0;
400 v->len = len;
401 v->str = str;
404 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
406 reader_free_strvalued(reader, &reader->strvalues[type]);
409 static void reader_free_strvalues(xmlreader *reader)
411 int type;
412 for (type = 0; type < StringValue_Last; type++)
413 reader_free_strvalue(reader, type);
416 /* This helper should only be used to test if strings are the same,
417 it doesn't try to sort. */
418 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
420 if (str1->len != str2->len) return 0;
421 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
424 static void reader_clear_elements(xmlreader *reader)
426 struct element *elem, *elem2;
427 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
429 reader_free_strvalued(reader, &elem->qname);
430 reader_free(reader, elem);
432 list_init(&reader->elements);
433 reader->empty_element = FALSE;
436 static HRESULT reader_inc_depth(xmlreader *reader)
438 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
439 return S_OK;
442 static void reader_dec_depth(xmlreader *reader)
444 if (reader->depth > 1) reader->depth--;
447 static HRESULT reader_push_element(xmlreader *reader, strval *qname, strval *localname)
449 struct element *elem;
450 HRESULT hr;
452 elem = reader_alloc(reader, sizeof(*elem));
453 if (!elem) return E_OUTOFMEMORY;
455 hr = reader_strvaldup(reader, qname, &elem->qname);
456 if (FAILED(hr)) {
457 reader_free(reader, elem);
458 return hr;
461 hr = reader_strvaldup(reader, localname, &elem->localname);
462 if (FAILED(hr))
464 reader_free_strvalued(reader, &elem->qname);
465 reader_free(reader, elem);
466 return hr;
469 if (!list_empty(&reader->elements))
471 hr = reader_inc_depth(reader);
472 if (FAILED(hr)) {
473 reader_free(reader, elem);
474 return hr;
478 list_add_head(&reader->elements, &elem->entry);
479 reader->empty_element = FALSE;
480 return hr;
483 static void reader_pop_element(xmlreader *reader)
485 struct element *elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
487 if (elem)
489 list_remove(&elem->entry);
490 reader_free_strvalued(reader, &elem->qname);
491 reader_free_strvalued(reader, &elem->localname);
492 reader_free(reader, elem);
493 reader_dec_depth(reader);
497 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
498 means node value is to be determined. */
499 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
501 strval *v = &reader->strvalues[type];
503 reader_free_strvalue(reader, type);
504 if (!value)
506 v->str = NULL;
507 v->start = 0;
508 v->len = 0;
509 return;
512 if (value->str == strval_empty.str)
513 *v = *value;
514 else
516 if (type == StringValue_Value)
518 /* defer allocation for value string */
519 v->str = NULL;
520 v->start = value->start;
521 v->len = value->len;
523 else
525 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
526 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
527 v->str[value->len] = 0;
528 v->len = value->len;
533 static inline int is_reader_pending(xmlreader *reader)
535 return reader->input->pending;
538 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
540 const int initial_len = 0x2000;
541 buffer->data = readerinput_alloc(input, initial_len);
542 if (!buffer->data) return E_OUTOFMEMORY;
544 memset(buffer->data, 0, 4);
545 buffer->cur = 0;
546 buffer->allocated = initial_len;
547 buffer->written = 0;
549 return S_OK;
552 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
554 readerinput_free(input, buffer->data);
557 static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
559 if (encoding == XmlEncoding_Unknown)
561 FIXME("unsupported encoding %d\n", encoding);
562 return E_NOTIMPL;
565 *cp = xml_encoding_map[encoding].cp;
567 return S_OK;
570 xml_encoding parse_encoding_name(const WCHAR *name, int len)
572 int min, max, n, c;
574 if (!name) return XmlEncoding_Unknown;
576 min = 0;
577 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
579 while (min <= max)
581 n = (min+max)/2;
583 if (len != -1)
584 c = strncmpiW(xml_encoding_map[n].name, name, len);
585 else
586 c = strcmpiW(xml_encoding_map[n].name, name);
587 if (!c)
588 return xml_encoding_map[n].enc;
590 if (c > 0)
591 max = n-1;
592 else
593 min = n+1;
596 return XmlEncoding_Unknown;
599 static HRESULT alloc_input_buffer(xmlreaderinput *input)
601 input_buffer *buffer;
602 HRESULT hr;
604 input->buffer = NULL;
606 buffer = readerinput_alloc(input, sizeof(*buffer));
607 if (!buffer) return E_OUTOFMEMORY;
609 buffer->input = input;
610 buffer->code_page = ~0; /* code page is unknown at this point */
611 hr = init_encoded_buffer(input, &buffer->utf16);
612 if (hr != S_OK) {
613 readerinput_free(input, buffer);
614 return hr;
617 hr = init_encoded_buffer(input, &buffer->encoded);
618 if (hr != S_OK) {
619 free_encoded_buffer(input, &buffer->utf16);
620 readerinput_free(input, buffer);
621 return hr;
624 input->buffer = buffer;
625 return S_OK;
628 static void free_input_buffer(input_buffer *buffer)
630 free_encoded_buffer(buffer->input, &buffer->encoded);
631 free_encoded_buffer(buffer->input, &buffer->utf16);
632 readerinput_free(buffer->input, buffer);
635 static void readerinput_release_stream(xmlreaderinput *readerinput)
637 if (readerinput->stream) {
638 ISequentialStream_Release(readerinput->stream);
639 readerinput->stream = NULL;
643 /* Queries already stored interface for IStream/ISequentialStream.
644 Interface supplied on creation will be overwritten */
645 static HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
647 HRESULT hr;
649 readerinput_release_stream(readerinput);
650 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
651 if (hr != S_OK)
652 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
654 return hr;
657 /* reads a chunk to raw buffer */
658 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
660 encoded_buffer *buffer = &readerinput->buffer->encoded;
661 /* to make sure aligned length won't exceed allocated length */
662 ULONG len = buffer->allocated - buffer->written - 4;
663 ULONG read;
664 HRESULT hr;
666 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
667 variable width encodings like UTF-8 */
668 len = (len + 3) & ~3;
669 /* try to use allocated space or grow */
670 if (buffer->allocated - buffer->written < len)
672 buffer->allocated *= 2;
673 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
674 len = buffer->allocated - buffer->written;
677 read = 0;
678 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
679 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
680 readerinput->pending = hr == E_PENDING;
681 if (FAILED(hr)) return hr;
682 buffer->written += read;
684 return hr;
687 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
688 static void readerinput_grow(xmlreaderinput *readerinput, int length)
690 encoded_buffer *buffer = &readerinput->buffer->utf16;
692 length *= sizeof(WCHAR);
693 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
694 if (buffer->allocated < buffer->written + length + 4)
696 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
697 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
698 buffer->allocated = grown_size;
702 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
704 static const char startA[] = {'<','?'};
705 static const char commentA[] = {'<','!'};
706 encoded_buffer *buffer = &readerinput->buffer->encoded;
707 unsigned char *ptr = (unsigned char*)buffer->data;
709 return !memcmp(buffer->data, startA, sizeof(startA)) ||
710 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
711 /* test start byte */
712 (ptr[0] == '<' &&
714 (ptr[1] && (ptr[1] <= 0x7f)) ||
715 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
716 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
717 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
721 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
723 encoded_buffer *buffer = &readerinput->buffer->encoded;
724 static const WCHAR startW[] = {'<','?'};
725 static const WCHAR commentW[] = {'<','!'};
726 static const char utf8bom[] = {0xef,0xbb,0xbf};
727 static const char utf16lebom[] = {0xff,0xfe};
729 *enc = XmlEncoding_Unknown;
731 if (buffer->written <= 3)
733 HRESULT hr = readerinput_growraw(readerinput);
734 if (FAILED(hr)) return hr;
735 if (buffer->written <= 3) return MX_E_INPUTEND;
738 /* try start symbols if we have enough data to do that, input buffer should contain
739 first chunk already */
740 if (readerinput_is_utf8(readerinput))
741 *enc = XmlEncoding_UTF8;
742 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
743 !memcmp(buffer->data, commentW, sizeof(commentW)))
744 *enc = XmlEncoding_UTF16;
745 /* try with BOM now */
746 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
748 buffer->cur += sizeof(utf8bom);
749 *enc = XmlEncoding_UTF8;
751 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
753 buffer->cur += sizeof(utf16lebom);
754 *enc = XmlEncoding_UTF16;
757 return S_OK;
760 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
762 encoded_buffer *buffer = &readerinput->buffer->encoded;
763 int len = buffer->written;
765 /* complete single byte char */
766 if (!(buffer->data[len-1] & 0x80)) return len;
768 /* find start byte of multibyte char */
769 while (--len && !(buffer->data[len] & 0xc0))
772 return len;
775 /* Returns byte length of complete char sequence for buffer code page,
776 it's relative to current buffer position which is currently used for BOM handling
777 only. */
778 static int readerinput_get_convlen(xmlreaderinput *readerinput)
780 encoded_buffer *buffer = &readerinput->buffer->encoded;
781 int len;
783 if (readerinput->buffer->code_page == CP_UTF8)
784 len = readerinput_get_utf8_convlen(readerinput);
785 else
786 len = buffer->written;
788 TRACE("%d\n", len - buffer->cur);
789 return len - buffer->cur;
792 /* It's possible that raw buffer has some leftovers from last conversion - some char
793 sequence that doesn't represent a full code point. Length argument should be calculated with
794 readerinput_get_convlen(), if it's -1 it will be calculated here. */
795 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
797 encoded_buffer *buffer = &readerinput->buffer->encoded;
799 if (len == -1)
800 len = readerinput_get_convlen(readerinput);
802 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
803 /* everything below cur is lost too */
804 buffer->written -= len + buffer->cur;
805 /* after this point we don't need cur offset really,
806 it's used only to mark where actual data begins when first chunk is read */
807 buffer->cur = 0;
810 /* note that raw buffer content is kept */
811 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
813 encoded_buffer *src = &readerinput->buffer->encoded;
814 encoded_buffer *dest = &readerinput->buffer->utf16;
815 int len, dest_len;
816 HRESULT hr;
817 WCHAR *ptr;
818 UINT cp;
820 hr = get_code_page(enc, &cp);
821 if (FAILED(hr)) return;
823 readerinput->buffer->code_page = cp;
824 len = readerinput_get_convlen(readerinput);
826 TRACE("switching to cp %d\n", cp);
828 /* just copy in this case */
829 if (enc == XmlEncoding_UTF16)
831 readerinput_grow(readerinput, len);
832 memcpy(dest->data, src->data + src->cur, len);
833 dest->written += len*sizeof(WCHAR);
834 return;
837 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
838 readerinput_grow(readerinput, dest_len);
839 ptr = (WCHAR*)dest->data;
840 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
841 ptr[dest_len] = 0;
842 dest->written += dest_len*sizeof(WCHAR);
845 /* shrinks parsed data a buffer begins with */
846 static void reader_shrink(xmlreader *reader)
848 encoded_buffer *buffer = &reader->input->buffer->utf16;
850 /* avoid to move too often using threshold shrink length */
851 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
853 buffer->written -= buffer->cur*sizeof(WCHAR);
854 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
855 buffer->cur = 0;
856 *(WCHAR*)&buffer->data[buffer->written] = 0;
860 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
861 It won't attempt to shrink but will grow destination buffer if needed */
862 static HRESULT reader_more(xmlreader *reader)
864 xmlreaderinput *readerinput = reader->input;
865 encoded_buffer *src = &readerinput->buffer->encoded;
866 encoded_buffer *dest = &readerinput->buffer->utf16;
867 UINT cp = readerinput->buffer->code_page;
868 int len, dest_len;
869 HRESULT hr;
870 WCHAR *ptr;
872 /* get some raw data from stream first */
873 hr = readerinput_growraw(readerinput);
874 len = readerinput_get_convlen(readerinput);
876 /* just copy for UTF-16 case */
877 if (cp == ~0)
879 readerinput_grow(readerinput, len);
880 memcpy(dest->data + dest->written, src->data + src->cur, len);
881 dest->written += len*sizeof(WCHAR);
882 return hr;
885 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
886 readerinput_grow(readerinput, dest_len);
887 ptr = (WCHAR*)(dest->data + dest->written);
888 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
889 ptr[dest_len] = 0;
890 dest->written += dest_len*sizeof(WCHAR);
891 /* get rid of processed data */
892 readerinput_shrinkraw(readerinput, len);
894 return hr;
897 static inline UINT reader_get_cur(xmlreader *reader)
899 return reader->input->buffer->utf16.cur;
902 static inline WCHAR *reader_get_ptr(xmlreader *reader)
904 encoded_buffer *buffer = &reader->input->buffer->utf16;
905 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
906 if (!*ptr) reader_more(reader);
907 return (WCHAR*)buffer->data + buffer->cur;
910 static int reader_cmp(xmlreader *reader, const WCHAR *str)
912 const WCHAR *ptr = reader_get_ptr(reader);
913 return strncmpW(str, ptr, strlenW(str));
916 /* moves cursor n WCHARs forward */
917 static void reader_skipn(xmlreader *reader, int n)
919 encoded_buffer *buffer = &reader->input->buffer->utf16;
920 const WCHAR *ptr = reader_get_ptr(reader);
922 while (*ptr++ && n--)
924 buffer->cur++;
925 reader->pos++;
929 static inline BOOL is_wchar_space(WCHAR ch)
931 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
934 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
935 static int reader_skipspaces(xmlreader *reader)
937 encoded_buffer *buffer = &reader->input->buffer->utf16;
938 const WCHAR *ptr = reader_get_ptr(reader);
939 UINT start = reader_get_cur(reader);
941 while (is_wchar_space(*ptr))
943 if (*ptr == '\r')
944 reader->pos = 0;
945 else if (*ptr == '\n')
947 reader->line++;
948 reader->pos = 0;
950 else
951 reader->pos++;
953 buffer->cur++;
954 ptr = reader_get_ptr(reader);
957 return reader_get_cur(reader) - start;
960 /* [26] VersionNum ::= '1.' [0-9]+ */
961 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
963 static const WCHAR onedotW[] = {'1','.',0};
964 WCHAR *ptr, *ptr2;
965 UINT start;
967 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
969 start = reader_get_cur(reader);
970 /* skip "1." */
971 reader_skipn(reader, 2);
973 ptr2 = ptr = reader_get_ptr(reader);
974 while (*ptr >= '0' && *ptr <= '9')
976 reader_skipn(reader, 1);
977 ptr = reader_get_ptr(reader);
980 if (ptr2 == ptr) return WC_E_DIGIT;
981 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
982 TRACE("version=%s\n", debug_strval(reader, val));
983 return S_OK;
986 /* [25] Eq ::= S? '=' S? */
987 static HRESULT reader_parse_eq(xmlreader *reader)
989 static const WCHAR eqW[] = {'=',0};
990 reader_skipspaces(reader);
991 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
992 /* skip '=' */
993 reader_skipn(reader, 1);
994 reader_skipspaces(reader);
995 return S_OK;
998 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
999 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1001 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1002 strval val, name;
1003 HRESULT hr;
1005 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1007 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1008 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1009 /* skip 'version' */
1010 reader_skipn(reader, 7);
1012 hr = reader_parse_eq(reader);
1013 if (FAILED(hr)) return hr;
1015 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1016 return WC_E_QUOTE;
1017 /* skip "'"|'"' */
1018 reader_skipn(reader, 1);
1020 hr = reader_parse_versionnum(reader, &val);
1021 if (FAILED(hr)) return hr;
1023 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1024 return WC_E_QUOTE;
1026 /* skip "'"|'"' */
1027 reader_skipn(reader, 1);
1029 return reader_add_attr(reader, &name, &val);
1032 /* ([A-Za-z0-9._] | '-') */
1033 static inline BOOL is_wchar_encname(WCHAR ch)
1035 return ((ch >= 'A' && ch <= 'Z') ||
1036 (ch >= 'a' && ch <= 'z') ||
1037 (ch >= '0' && ch <= '9') ||
1038 (ch == '.') || (ch == '_') ||
1039 (ch == '-'));
1042 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1043 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1045 WCHAR *start = reader_get_ptr(reader), *ptr;
1046 xml_encoding enc;
1047 int len;
1049 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1050 return WC_E_ENCNAME;
1052 val->start = reader_get_cur(reader);
1054 ptr = start;
1055 while (is_wchar_encname(*++ptr))
1058 len = ptr - start;
1059 enc = parse_encoding_name(start, len);
1060 TRACE("encoding name %s\n", debugstr_wn(start, len));
1061 val->str = start;
1062 val->len = len;
1064 if (enc == XmlEncoding_Unknown)
1065 return WC_E_ENCNAME;
1067 /* skip encoding name */
1068 reader_skipn(reader, len);
1069 return S_OK;
1072 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1073 static HRESULT reader_parse_encdecl(xmlreader *reader)
1075 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1076 strval name, val;
1077 HRESULT hr;
1079 if (!reader_skipspaces(reader)) return S_FALSE;
1081 if (reader_cmp(reader, encodingW)) return S_FALSE;
1082 name.str = reader_get_ptr(reader);
1083 name.start = reader_get_cur(reader);
1084 name.len = 8;
1085 /* skip 'encoding' */
1086 reader_skipn(reader, 8);
1088 hr = reader_parse_eq(reader);
1089 if (FAILED(hr)) return hr;
1091 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1092 return WC_E_QUOTE;
1093 /* skip "'"|'"' */
1094 reader_skipn(reader, 1);
1096 hr = reader_parse_encname(reader, &val);
1097 if (FAILED(hr)) return hr;
1099 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1100 return WC_E_QUOTE;
1102 /* skip "'"|'"' */
1103 reader_skipn(reader, 1);
1105 return reader_add_attr(reader, &name, &val);
1108 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1109 static HRESULT reader_parse_sddecl(xmlreader *reader)
1111 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1112 static const WCHAR yesW[] = {'y','e','s',0};
1113 static const WCHAR noW[] = {'n','o',0};
1114 strval name, val;
1115 UINT start;
1116 HRESULT hr;
1118 if (!reader_skipspaces(reader)) return S_FALSE;
1120 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1121 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1122 /* skip 'standalone' */
1123 reader_skipn(reader, 10);
1125 hr = reader_parse_eq(reader);
1126 if (FAILED(hr)) return hr;
1128 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1129 return WC_E_QUOTE;
1130 /* skip "'"|'"' */
1131 reader_skipn(reader, 1);
1133 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1134 return WC_E_XMLDECL;
1136 start = reader_get_cur(reader);
1137 /* skip 'yes'|'no' */
1138 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1139 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1140 TRACE("standalone=%s\n", debug_strval(reader, &val));
1142 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1143 return WC_E_QUOTE;
1144 /* skip "'"|'"' */
1145 reader_skipn(reader, 1);
1147 return reader_add_attr(reader, &name, &val);
1150 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1151 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1153 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1154 static const WCHAR declcloseW[] = {'?','>',0};
1155 HRESULT hr;
1157 /* check if we have "<?xml " */
1158 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1160 reader_skipn(reader, 5);
1161 hr = reader_parse_versioninfo(reader);
1162 if (FAILED(hr))
1163 return hr;
1165 hr = reader_parse_encdecl(reader);
1166 if (FAILED(hr))
1167 return hr;
1169 hr = reader_parse_sddecl(reader);
1170 if (FAILED(hr))
1171 return hr;
1173 reader_skipspaces(reader);
1174 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1175 reader_skipn(reader, 2);
1177 reader_inc_depth(reader);
1178 reader->nodetype = XmlNodeType_XmlDeclaration;
1179 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1180 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1181 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1183 return S_OK;
1186 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1187 static HRESULT reader_parse_comment(xmlreader *reader)
1189 WCHAR *ptr;
1190 UINT start;
1192 if (reader->resumestate == XmlReadResumeState_Comment)
1194 start = reader->resume[XmlReadResume_Body];
1195 ptr = reader_get_ptr(reader);
1197 else
1199 /* skip '<!--' */
1200 reader_skipn(reader, 4);
1201 reader_shrink(reader);
1202 ptr = reader_get_ptr(reader);
1203 start = reader_get_cur(reader);
1204 reader->nodetype = XmlNodeType_Comment;
1205 reader->resume[XmlReadResume_Body] = start;
1206 reader->resumestate = XmlReadResumeState_Comment;
1207 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1208 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1209 reader_set_strvalue(reader, StringValue_Value, NULL);
1212 /* will exit when there's no more data, it won't attempt to
1213 read more from stream */
1214 while (*ptr)
1216 if (ptr[0] == '-')
1218 if (ptr[1] == '-')
1220 if (ptr[2] == '>')
1222 strval value;
1224 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1225 TRACE("%s\n", debug_strval(reader, &value));
1227 /* skip rest of markup '->' */
1228 reader_skipn(reader, 3);
1230 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1231 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1232 reader_set_strvalue(reader, StringValue_Value, &value);
1233 reader->resume[XmlReadResume_Body] = 0;
1234 reader->resumestate = XmlReadResumeState_Initial;
1235 return S_OK;
1237 else
1238 return WC_E_COMMENT;
1242 reader_skipn(reader, 1);
1243 ptr++;
1246 return S_OK;
1249 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1250 static inline BOOL is_char(WCHAR ch)
1252 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1253 (ch >= 0x20 && ch <= 0xd7ff) ||
1254 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1255 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1256 (ch >= 0xe000 && ch <= 0xfffd);
1259 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1260 static inline BOOL is_pubchar(WCHAR ch)
1262 return (ch == ' ') ||
1263 (ch >= 'a' && ch <= 'z') ||
1264 (ch >= 'A' && ch <= 'Z') ||
1265 (ch >= '0' && ch <= '9') ||
1266 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1267 (ch == '=') || (ch == '?') ||
1268 (ch == '@') || (ch == '!') ||
1269 (ch >= '#' && ch <= '%') || /* #$% */
1270 (ch == '_') || (ch == '\r') || (ch == '\n');
1273 static inline BOOL is_namestartchar(WCHAR ch)
1275 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1276 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1277 (ch >= 0xc0 && ch <= 0xd6) ||
1278 (ch >= 0xd8 && ch <= 0xf6) ||
1279 (ch >= 0xf8 && ch <= 0x2ff) ||
1280 (ch >= 0x370 && ch <= 0x37d) ||
1281 (ch >= 0x37f && ch <= 0x1fff) ||
1282 (ch >= 0x200c && ch <= 0x200d) ||
1283 (ch >= 0x2070 && ch <= 0x218f) ||
1284 (ch >= 0x2c00 && ch <= 0x2fef) ||
1285 (ch >= 0x3001 && ch <= 0xd7ff) ||
1286 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1287 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1288 (ch >= 0xf900 && ch <= 0xfdcf) ||
1289 (ch >= 0xfdf0 && ch <= 0xfffd);
1292 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1293 static inline BOOL is_ncnamechar(WCHAR ch)
1295 return (ch >= 'A' && ch <= 'Z') ||
1296 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1297 (ch == '-') || (ch == '.') ||
1298 (ch >= '0' && ch <= '9') ||
1299 (ch == 0xb7) ||
1300 (ch >= 0xc0 && ch <= 0xd6) ||
1301 (ch >= 0xd8 && ch <= 0xf6) ||
1302 (ch >= 0xf8 && ch <= 0x2ff) ||
1303 (ch >= 0x300 && ch <= 0x36f) ||
1304 (ch >= 0x370 && ch <= 0x37d) ||
1305 (ch >= 0x37f && ch <= 0x1fff) ||
1306 (ch >= 0x200c && ch <= 0x200d) ||
1307 (ch >= 0x203f && ch <= 0x2040) ||
1308 (ch >= 0x2070 && ch <= 0x218f) ||
1309 (ch >= 0x2c00 && ch <= 0x2fef) ||
1310 (ch >= 0x3001 && ch <= 0xd7ff) ||
1311 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1312 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1313 (ch >= 0xf900 && ch <= 0xfdcf) ||
1314 (ch >= 0xfdf0 && ch <= 0xfffd);
1317 static inline BOOL is_namechar(WCHAR ch)
1319 return (ch == ':') || is_ncnamechar(ch);
1322 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1324 /* When we're on attribute always return attribute type, container node type is kept.
1325 Note that container is not necessarily an element, and attribute doesn't mean it's
1326 an attribute in XML spec terms. */
1327 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1330 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1331 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1332 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1333 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1334 [5] Name ::= NameStartChar (NameChar)* */
1335 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1337 WCHAR *ptr;
1338 UINT start;
1340 if (reader->resume[XmlReadResume_Name])
1342 start = reader->resume[XmlReadResume_Name];
1343 ptr = reader_get_ptr(reader);
1345 else
1347 ptr = reader_get_ptr(reader);
1348 start = reader_get_cur(reader);
1349 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1352 while (is_namechar(*ptr))
1354 reader_skipn(reader, 1);
1355 ptr = reader_get_ptr(reader);
1358 if (is_reader_pending(reader))
1360 reader->resume[XmlReadResume_Name] = start;
1361 return E_PENDING;
1363 else
1364 reader->resume[XmlReadResume_Name] = 0;
1366 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1367 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1369 return S_OK;
1372 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1373 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1375 static const WCHAR xmlW[] = {'x','m','l'};
1376 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1377 strval name;
1378 WCHAR *ptr;
1379 HRESULT hr;
1380 UINT i;
1382 hr = reader_parse_name(reader, &name);
1383 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1385 /* now that we got name check for illegal content */
1386 if (strval_eq(reader, &name, &xmlval))
1387 return WC_E_LEADINGXML;
1389 /* PITarget can't be a qualified name */
1390 ptr = reader_get_strptr(reader, &name);
1391 for (i = 0; i < name.len; i++)
1392 if (ptr[i] == ':')
1393 return i ? NC_E_NAMECOLON : WC_E_PI;
1395 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1396 *target = name;
1397 return S_OK;
1400 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1401 static HRESULT reader_parse_pi(xmlreader *reader)
1403 strval target;
1404 WCHAR *ptr;
1405 UINT start;
1406 HRESULT hr;
1408 switch (reader->resumestate)
1410 case XmlReadResumeState_Initial:
1411 /* skip '<?' */
1412 reader_skipn(reader, 2);
1413 reader_shrink(reader);
1414 reader->resumestate = XmlReadResumeState_PITarget;
1415 case XmlReadResumeState_PITarget:
1416 hr = reader_parse_pitarget(reader, &target);
1417 if (FAILED(hr)) return hr;
1418 reader_set_strvalue(reader, StringValue_LocalName, &target);
1419 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1420 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1421 reader->resumestate = XmlReadResumeState_PIBody;
1422 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1423 default:
1427 start = reader->resume[XmlReadResume_Body];
1428 ptr = reader_get_ptr(reader);
1429 while (*ptr)
1431 if (ptr[0] == '?')
1433 if (ptr[1] == '>')
1435 UINT cur = reader_get_cur(reader);
1436 strval value;
1438 /* strip all leading whitespace chars */
1439 while (start < cur)
1441 ptr = reader_get_ptr2(reader, start);
1442 if (!is_wchar_space(*ptr)) break;
1443 start++;
1446 reader_init_strvalue(start, cur-start, &value);
1448 /* skip '?>' */
1449 reader_skipn(reader, 2);
1450 TRACE("%s\n", debug_strval(reader, &value));
1451 reader->nodetype = XmlNodeType_ProcessingInstruction;
1452 reader->resumestate = XmlReadResumeState_Initial;
1453 reader->resume[XmlReadResume_Body] = 0;
1454 reader_set_strvalue(reader, StringValue_Value, &value);
1455 return S_OK;
1459 reader_skipn(reader, 1);
1460 ptr = reader_get_ptr(reader);
1463 return S_OK;
1466 /* This one is used to parse significant whitespace nodes, like in Misc production */
1467 static HRESULT reader_parse_whitespace(xmlreader *reader)
1469 switch (reader->resumestate)
1471 case XmlReadResumeState_Initial:
1472 reader_shrink(reader);
1473 reader->resumestate = XmlReadResumeState_Whitespace;
1474 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1475 reader->nodetype = XmlNodeType_Whitespace;
1476 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1477 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1478 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1479 /* fallthrough */
1480 case XmlReadResumeState_Whitespace:
1482 strval value;
1483 UINT start;
1485 reader_skipspaces(reader);
1486 if (is_reader_pending(reader)) return S_OK;
1488 start = reader->resume[XmlReadResume_Body];
1489 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1490 reader_set_strvalue(reader, StringValue_Value, &value);
1491 TRACE("%s\n", debug_strval(reader, &value));
1492 reader->resumestate = XmlReadResumeState_Initial;
1494 default:
1498 return S_OK;
1501 /* [27] Misc ::= Comment | PI | S */
1502 static HRESULT reader_parse_misc(xmlreader *reader)
1504 HRESULT hr = S_FALSE;
1506 if (reader->resumestate != XmlReadResumeState_Initial)
1508 hr = reader_more(reader);
1509 if (FAILED(hr)) return hr;
1511 /* finish current node */
1512 switch (reader->resumestate)
1514 case XmlReadResumeState_PITarget:
1515 case XmlReadResumeState_PIBody:
1516 return reader_parse_pi(reader);
1517 case XmlReadResumeState_Comment:
1518 return reader_parse_comment(reader);
1519 case XmlReadResumeState_Whitespace:
1520 return reader_parse_whitespace(reader);
1521 default:
1522 ERR("unknown resume state %d\n", reader->resumestate);
1526 while (1)
1528 const WCHAR *cur = reader_get_ptr(reader);
1530 if (is_wchar_space(*cur))
1531 hr = reader_parse_whitespace(reader);
1532 else if (!reader_cmp(reader, commentW))
1533 hr = reader_parse_comment(reader);
1534 else if (!reader_cmp(reader, piW))
1535 hr = reader_parse_pi(reader);
1536 else
1537 break;
1539 if (hr != S_FALSE) return hr;
1542 return hr;
1545 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1546 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1548 WCHAR *cur = reader_get_ptr(reader), quote;
1549 UINT start;
1551 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1553 quote = *cur;
1554 reader_skipn(reader, 1);
1556 cur = reader_get_ptr(reader);
1557 start = reader_get_cur(reader);
1558 while (is_char(*cur) && *cur != quote)
1560 reader_skipn(reader, 1);
1561 cur = reader_get_ptr(reader);
1563 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1564 if (*cur == quote) reader_skipn(reader, 1);
1566 TRACE("%s\n", debug_strval(reader, literal));
1567 return S_OK;
1570 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1571 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1572 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1574 WCHAR *cur = reader_get_ptr(reader), quote;
1575 UINT start;
1577 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1579 quote = *cur;
1580 reader_skipn(reader, 1);
1582 start = reader_get_cur(reader);
1583 cur = reader_get_ptr(reader);
1584 while (is_pubchar(*cur) && *cur != quote)
1586 reader_skipn(reader, 1);
1587 cur = reader_get_ptr(reader);
1590 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1591 TRACE("%s\n", debug_strval(reader, literal));
1592 return S_OK;
1595 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1596 static HRESULT reader_parse_externalid(xmlreader *reader)
1598 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1599 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1600 strval name;
1601 HRESULT hr;
1602 int cnt;
1604 if (reader_cmp(reader, systemW))
1606 if (reader_cmp(reader, publicW))
1607 return S_FALSE;
1608 else
1610 strval pub;
1612 /* public id */
1613 reader_skipn(reader, 6);
1614 cnt = reader_skipspaces(reader);
1615 if (!cnt) return WC_E_WHITESPACE;
1617 hr = reader_parse_pub_literal(reader, &pub);
1618 if (FAILED(hr)) return hr;
1620 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1621 return reader_add_attr(reader, &name, &pub);
1624 else
1626 strval sys;
1628 /* system id */
1629 reader_skipn(reader, 6);
1630 cnt = reader_skipspaces(reader);
1631 if (!cnt) return WC_E_WHITESPACE;
1633 hr = reader_parse_sys_literal(reader, &sys);
1634 if (FAILED(hr)) return hr;
1636 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1637 return reader_add_attr(reader, &name, &sys);
1640 return hr;
1643 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1644 static HRESULT reader_parse_dtd(xmlreader *reader)
1646 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1647 strval name;
1648 WCHAR *cur;
1649 HRESULT hr;
1651 /* check if we have "<!DOCTYPE" */
1652 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1653 reader_shrink(reader);
1655 /* DTD processing is not allowed by default */
1656 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1658 reader_skipn(reader, 9);
1659 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1661 /* name */
1662 hr = reader_parse_name(reader, &name);
1663 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1665 reader_skipspaces(reader);
1667 hr = reader_parse_externalid(reader);
1668 if (FAILED(hr)) return hr;
1670 reader_skipspaces(reader);
1672 cur = reader_get_ptr(reader);
1673 if (*cur != '>')
1675 FIXME("internal subset parsing not implemented\n");
1676 return E_NOTIMPL;
1679 /* skip '>' */
1680 reader_skipn(reader, 1);
1682 reader->nodetype = XmlNodeType_DocumentType;
1683 reader_set_strvalue(reader, StringValue_LocalName, &name);
1684 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1686 return S_OK;
1689 /* [11 NS] LocalPart ::= NCName */
1690 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1692 WCHAR *ptr;
1693 UINT start;
1695 if (reader->resume[XmlReadResume_Local])
1697 start = reader->resume[XmlReadResume_Local];
1698 ptr = reader_get_ptr(reader);
1700 else
1702 ptr = reader_get_ptr(reader);
1703 start = reader_get_cur(reader);
1706 while (is_ncnamechar(*ptr))
1708 reader_skipn(reader, 1);
1709 ptr = reader_get_ptr(reader);
1712 if (is_reader_pending(reader))
1714 reader->resume[XmlReadResume_Local] = start;
1715 return E_PENDING;
1717 else
1718 reader->resume[XmlReadResume_Local] = 0;
1720 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1722 return S_OK;
1725 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1726 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1727 [9 NS] UnprefixedName ::= LocalPart
1728 [10 NS] Prefix ::= NCName */
1729 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1731 WCHAR *ptr;
1732 UINT start;
1733 HRESULT hr;
1735 if (reader->resume[XmlReadResume_Name])
1737 start = reader->resume[XmlReadResume_Name];
1738 ptr = reader_get_ptr(reader);
1740 else
1742 ptr = reader_get_ptr(reader);
1743 start = reader_get_cur(reader);
1744 reader->resume[XmlReadResume_Name] = start;
1745 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1748 if (reader->resume[XmlReadResume_Local])
1750 hr = reader_parse_local(reader, local);
1751 if (FAILED(hr)) return hr;
1753 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1754 local->start - reader->resume[XmlReadResume_Name] - 1,
1755 prefix);
1757 else
1759 /* skip prefix part */
1760 while (is_ncnamechar(*ptr))
1762 reader_skipn(reader, 1);
1763 ptr = reader_get_ptr(reader);
1766 if (is_reader_pending(reader)) return E_PENDING;
1768 /* got a qualified name */
1769 if (*ptr == ':')
1771 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1773 /* skip ':' */
1774 reader_skipn(reader, 1);
1775 hr = reader_parse_local(reader, local);
1776 if (FAILED(hr)) return hr;
1778 else
1780 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1781 reader_init_strvalue(0, 0, prefix);
1785 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1787 if (prefix->len)
1788 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1789 else
1790 TRACE("ncname %s\n", debug_strval(reader, local));
1792 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1793 /* count ':' too */
1794 (prefix->len ? prefix->len + 1 : 0) + local->len,
1795 qname);
1797 reader->resume[XmlReadResume_Name] = 0;
1798 reader->resume[XmlReadResume_Local] = 0;
1800 return S_OK;
1803 /* Applies normalization rules to a single char, used for attribute values.
1805 Rules include 2 steps:
1807 1) replacing \r\n with a single \n;
1808 2) replacing all whitespace chars with ' '.
1811 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1813 encoded_buffer *buffer = &reader->input->buffer->utf16;
1815 if (!is_wchar_space(*ptr)) return;
1817 if (*ptr == '\r' && *(ptr+1) == '\n')
1819 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1820 memmove(ptr+1, ptr+2, len);
1822 *ptr = ' ';
1825 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1827 static const WCHAR entltW[] = {'l','t'};
1828 static const WCHAR entgtW[] = {'g','t'};
1829 static const WCHAR entampW[] = {'a','m','p'};
1830 static const WCHAR entaposW[] = {'a','p','o','s'};
1831 static const WCHAR entquotW[] = {'q','u','o','t'};
1832 static const strval lt = { (WCHAR*)entltW, 2 };
1833 static const strval gt = { (WCHAR*)entgtW, 2 };
1834 static const strval amp = { (WCHAR*)entampW, 3 };
1835 static const strval apos = { (WCHAR*)entaposW, 4 };
1836 static const strval quot = { (WCHAR*)entquotW, 4 };
1837 WCHAR *str = reader_get_strptr(reader, name);
1839 switch (*str)
1841 case 'l':
1842 if (strval_eq(reader, name, &lt)) return '<';
1843 break;
1844 case 'g':
1845 if (strval_eq(reader, name, &gt)) return '>';
1846 break;
1847 case 'a':
1848 if (strval_eq(reader, name, &amp))
1849 return '&';
1850 else if (strval_eq(reader, name, &apos))
1851 return '\'';
1852 break;
1853 case 'q':
1854 if (strval_eq(reader, name, &quot)) return '\"';
1855 break;
1856 default:
1860 return 0;
1863 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1864 [67] Reference ::= EntityRef | CharRef
1865 [68] EntityRef ::= '&' Name ';' */
1866 static HRESULT reader_parse_reference(xmlreader *reader)
1868 encoded_buffer *buffer = &reader->input->buffer->utf16;
1869 WCHAR *start = reader_get_ptr(reader), *ptr;
1870 UINT cur = reader_get_cur(reader);
1871 WCHAR ch = 0;
1872 int len;
1874 /* skip '&' */
1875 reader_skipn(reader, 1);
1876 ptr = reader_get_ptr(reader);
1878 if (*ptr == '#')
1880 reader_skipn(reader, 1);
1881 ptr = reader_get_ptr(reader);
1883 /* hex char or decimal */
1884 if (*ptr == 'x')
1886 reader_skipn(reader, 1);
1887 ptr = reader_get_ptr(reader);
1889 while (*ptr != ';')
1891 if ((*ptr >= '0' && *ptr <= '9'))
1892 ch = ch*16 + *ptr - '0';
1893 else if ((*ptr >= 'a' && *ptr <= 'f'))
1894 ch = ch*16 + *ptr - 'a' + 10;
1895 else if ((*ptr >= 'A' && *ptr <= 'F'))
1896 ch = ch*16 + *ptr - 'A' + 10;
1897 else
1898 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
1899 reader_skipn(reader, 1);
1900 ptr = reader_get_ptr(reader);
1903 else
1905 while (*ptr != ';')
1907 if ((*ptr >= '0' && *ptr <= '9'))
1909 ch = ch*10 + *ptr - '0';
1910 reader_skipn(reader, 1);
1911 ptr = reader_get_ptr(reader);
1913 else
1914 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
1918 if (!is_char(ch)) return WC_E_XMLCHARACTER;
1920 /* normalize */
1921 if (is_wchar_space(ch)) ch = ' ';
1923 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1924 memmove(start+1, ptr+1, len);
1925 buffer->cur = cur + 1;
1927 *start = ch;
1929 else
1931 strval name;
1932 HRESULT hr;
1934 hr = reader_parse_name(reader, &name);
1935 if (FAILED(hr)) return hr;
1937 ptr = reader_get_ptr(reader);
1938 if (*ptr != ';') return WC_E_SEMICOLON;
1940 /* predefined entities resolve to a single character */
1941 ch = get_predefined_entity(reader, &name);
1942 if (ch)
1944 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1945 memmove(start+1, ptr+1, len);
1946 buffer->cur = cur + 1;
1948 *start = ch;
1950 else
1952 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
1953 return WC_E_UNDECLAREDENTITY;
1958 return S_OK;
1961 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1962 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
1964 WCHAR *ptr, quote;
1965 UINT start;
1967 ptr = reader_get_ptr(reader);
1969 /* skip opening quote */
1970 quote = *ptr;
1971 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
1972 reader_skipn(reader, 1);
1974 ptr = reader_get_ptr(reader);
1975 start = reader_get_cur(reader);
1976 while (*ptr)
1978 if (*ptr == '<') return WC_E_LESSTHAN;
1980 if (*ptr == quote)
1982 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
1983 /* skip closing quote */
1984 reader_skipn(reader, 1);
1985 return S_OK;
1988 if (*ptr == '&')
1990 HRESULT hr = reader_parse_reference(reader);
1991 if (FAILED(hr)) return hr;
1993 else
1995 reader_normalize_space(reader, ptr);
1996 reader_skipn(reader, 1);
1998 ptr = reader_get_ptr(reader);
2001 return WC_E_QUOTE;
2004 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2005 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2006 [3 NS] DefaultAttName ::= 'xmlns'
2007 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2008 static HRESULT reader_parse_attribute(xmlreader *reader)
2010 static const WCHAR xmlnsW[] = {'x','m','l','n','s',0};
2011 strval prefix, local, qname, xmlns, value;
2012 HRESULT hr;
2014 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2015 if (FAILED(hr)) return hr;
2017 reader_init_cstrvalue((WCHAR*)xmlnsW, 5, &xmlns);
2019 if (strval_eq(reader, &prefix, &xmlns))
2021 FIXME("namespace definitions not supported\n");
2022 return E_NOTIMPL;
2025 if (strval_eq(reader, &qname, &xmlns))
2026 FIXME("default namespace definitions not supported\n");
2028 hr = reader_parse_eq(reader);
2029 if (FAILED(hr)) return hr;
2031 hr = reader_parse_attvalue(reader, &value);
2032 if (FAILED(hr)) return hr;
2034 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2035 return reader_add_attr(reader, &local, &value);
2038 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2039 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2040 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2042 HRESULT hr;
2044 hr = reader_parse_qname(reader, prefix, local, qname);
2045 if (FAILED(hr)) return hr;
2047 while (1)
2049 static const WCHAR endW[] = {'/','>',0};
2051 reader_skipspaces(reader);
2053 /* empty element */
2054 if ((*empty = !reader_cmp(reader, endW)))
2056 /* skip '/>' */
2057 reader_skipn(reader, 2);
2058 reader->empty_element = TRUE;
2059 return S_OK;
2062 /* got a start tag */
2063 if (!reader_cmp(reader, gtW))
2065 /* skip '>' */
2066 reader_skipn(reader, 1);
2067 return reader_push_element(reader, qname, local);
2070 hr = reader_parse_attribute(reader);
2071 if (FAILED(hr)) return hr;
2074 return S_OK;
2077 /* [39] element ::= EmptyElemTag | STag content ETag */
2078 static HRESULT reader_parse_element(xmlreader *reader)
2080 HRESULT hr;
2082 switch (reader->resumestate)
2084 case XmlReadResumeState_Initial:
2085 /* check if we are really on element */
2086 if (reader_cmp(reader, ltW)) return S_FALSE;
2088 /* skip '<' */
2089 reader_skipn(reader, 1);
2091 reader_shrink(reader);
2092 reader->resumestate = XmlReadResumeState_STag;
2093 case XmlReadResumeState_STag:
2095 strval qname, prefix, local;
2096 int empty = 0;
2098 /* this handles empty elements too */
2099 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2100 if (FAILED(hr)) return hr;
2102 /* FIXME: need to check for defined namespace to reject invalid prefix,
2103 currently reject all prefixes */
2104 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
2106 /* if we got empty element and stack is empty go straight to Misc */
2107 if (empty && list_empty(&reader->elements))
2108 reader->instate = XmlReadInState_MiscEnd;
2109 else
2110 reader->instate = XmlReadInState_Content;
2112 reader->nodetype = XmlNodeType_Element;
2113 reader->resumestate = XmlReadResumeState_Initial;
2114 reader_set_strvalue(reader, StringValue_LocalName, &local);
2115 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2116 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2117 break;
2119 default:
2120 hr = E_FAIL;
2123 return hr;
2126 /* [13 NS] ETag ::= '</' QName S? '>' */
2127 static HRESULT reader_parse_endtag(xmlreader *reader)
2129 strval prefix, local, qname;
2130 struct element *elem;
2131 HRESULT hr;
2133 /* skip '</' */
2134 reader_skipn(reader, 2);
2136 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2137 if (FAILED(hr)) return hr;
2139 reader_skipspaces(reader);
2141 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2143 /* skip '>' */
2144 reader_skipn(reader, 1);
2146 /* Element stack should never be empty at this point, cause we shouldn't get to
2147 content parsing if it's empty. */
2148 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2149 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2151 reader_pop_element(reader);
2153 /* It was a root element, the rest is expected as Misc */
2154 if (list_empty(&reader->elements))
2155 reader->instate = XmlReadInState_MiscEnd;
2157 reader->nodetype = XmlNodeType_EndElement;
2158 reader_set_strvalue(reader, StringValue_LocalName, &local);
2159 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2161 return S_OK;
2164 /* [18] CDSect ::= CDStart CData CDEnd
2165 [19] CDStart ::= '<![CDATA['
2166 [20] CData ::= (Char* - (Char* ']]>' Char*))
2167 [21] CDEnd ::= ']]>' */
2168 static HRESULT reader_parse_cdata(xmlreader *reader)
2170 WCHAR *ptr;
2171 UINT start;
2173 if (reader->resumestate == XmlReadResumeState_CDATA)
2175 start = reader->resume[XmlReadResume_Body];
2176 ptr = reader_get_ptr(reader);
2178 else
2180 /* skip markup '<![CDATA[' */
2181 reader_skipn(reader, 9);
2182 reader_shrink(reader);
2183 ptr = reader_get_ptr(reader);
2184 start = reader_get_cur(reader);
2185 reader->nodetype = XmlNodeType_CDATA;
2186 reader->resume[XmlReadResume_Body] = start;
2187 reader->resumestate = XmlReadResumeState_CDATA;
2188 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2189 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2190 reader_set_strvalue(reader, StringValue_Value, NULL);
2193 while (*ptr)
2195 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2197 strval value;
2199 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2201 /* skip ']]>' */
2202 reader_skipn(reader, 3);
2203 TRACE("%s\n", debug_strval(reader, &value));
2205 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2206 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2207 reader_set_strvalue(reader, StringValue_Value, &value);
2208 reader->resume[XmlReadResume_Body] = 0;
2209 reader->resumestate = XmlReadResumeState_Initial;
2210 return S_OK;
2212 else
2214 /* Value normalization is not fully implemented, rules are:
2216 - single '\r' -> '\n';
2217 - sequence '\r\n' -> '\n', in this case value length changes;
2219 if (*ptr == '\r') *ptr = '\n';
2220 reader_skipn(reader, 1);
2221 ptr++;
2225 return S_OK;
2228 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2229 static HRESULT reader_parse_chardata(xmlreader *reader)
2231 WCHAR *ptr;
2232 UINT start;
2234 if (reader->resumestate == XmlReadResumeState_CharData)
2236 start = reader->resume[XmlReadResume_Body];
2237 ptr = reader_get_ptr(reader);
2239 else
2241 reader_shrink(reader);
2242 ptr = reader_get_ptr(reader);
2243 start = reader_get_cur(reader);
2244 /* There's no text */
2245 if (!*ptr || *ptr == '<') return S_OK;
2246 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2247 reader->resume[XmlReadResume_Body] = start;
2248 reader->resumestate = XmlReadResumeState_CharData;
2249 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2250 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2251 reader_set_strvalue(reader, StringValue_Value, NULL);
2254 while (*ptr)
2256 /* CDATA closing sequence ']]>' is not allowed */
2257 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2258 return WC_E_CDSECTEND;
2260 /* Found next markup part */
2261 if (ptr[0] == '<')
2263 strval value;
2265 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2266 reader_set_strvalue(reader, StringValue_Value, &value);
2267 reader->resume[XmlReadResume_Body] = 0;
2268 reader->resumestate = XmlReadResumeState_Initial;
2269 return S_OK;
2272 reader_skipn(reader, 1);
2274 /* this covers a case when text has leading whitespace chars */
2275 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2276 ptr++;
2279 return S_OK;
2282 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2283 static HRESULT reader_parse_content(xmlreader *reader)
2285 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2286 static const WCHAR etagW[] = {'<','/',0};
2287 static const WCHAR ampW[] = {'&',0};
2289 if (reader->resumestate != XmlReadResumeState_Initial)
2291 switch (reader->resumestate)
2293 case XmlReadResumeState_CDATA:
2294 return reader_parse_cdata(reader);
2295 case XmlReadResumeState_Comment:
2296 return reader_parse_comment(reader);
2297 case XmlReadResumeState_PIBody:
2298 case XmlReadResumeState_PITarget:
2299 return reader_parse_pi(reader);
2300 case XmlReadResumeState_CharData:
2301 return reader_parse_chardata(reader);
2302 default:
2303 ERR("unknown resume state %d\n", reader->resumestate);
2307 reader_shrink(reader);
2309 /* handle end tag here, it indicates end of content as well */
2310 if (!reader_cmp(reader, etagW))
2311 return reader_parse_endtag(reader);
2313 if (!reader_cmp(reader, commentW))
2314 return reader_parse_comment(reader);
2316 if (!reader_cmp(reader, piW))
2317 return reader_parse_pi(reader);
2319 if (!reader_cmp(reader, cdstartW))
2320 return reader_parse_cdata(reader);
2322 if (!reader_cmp(reader, ampW))
2323 return reader_parse_reference(reader);
2325 if (!reader_cmp(reader, ltW))
2326 return reader_parse_element(reader);
2328 /* what's left must be CharData */
2329 return reader_parse_chardata(reader);
2332 static HRESULT reader_parse_nextnode(xmlreader *reader)
2334 HRESULT hr;
2336 if (!is_reader_pending(reader))
2337 reader_clear_attrs(reader);
2339 while (1)
2341 switch (reader->instate)
2343 /* if it's a first call for a new input we need to detect stream encoding */
2344 case XmlReadInState_Initial:
2346 xml_encoding enc;
2348 hr = readerinput_growraw(reader->input);
2349 if (FAILED(hr)) return hr;
2351 /* try to detect encoding by BOM or data and set input code page */
2352 hr = readerinput_detectencoding(reader->input, &enc);
2353 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2354 if (FAILED(hr)) return hr;
2356 /* always switch first time cause we have to put something in */
2357 readerinput_switchencoding(reader->input, enc);
2359 /* parse xml declaration */
2360 hr = reader_parse_xmldecl(reader);
2361 if (FAILED(hr)) return hr;
2363 readerinput_shrinkraw(reader->input, -1);
2364 reader->instate = XmlReadInState_Misc_DTD;
2365 if (hr == S_OK) return hr;
2367 break;
2368 case XmlReadInState_Misc_DTD:
2369 hr = reader_parse_misc(reader);
2370 if (FAILED(hr)) return hr;
2372 if (hr == S_FALSE)
2373 reader->instate = XmlReadInState_DTD;
2374 else
2375 return hr;
2376 break;
2377 case XmlReadInState_DTD:
2378 hr = reader_parse_dtd(reader);
2379 if (FAILED(hr)) return hr;
2381 if (hr == S_OK)
2383 reader->instate = XmlReadInState_DTD_Misc;
2384 return hr;
2386 else
2387 reader->instate = XmlReadInState_Element;
2388 break;
2389 case XmlReadInState_DTD_Misc:
2390 hr = reader_parse_misc(reader);
2391 if (FAILED(hr)) return hr;
2393 if (hr == S_FALSE)
2394 reader->instate = XmlReadInState_Element;
2395 else
2396 return hr;
2397 break;
2398 case XmlReadInState_Element:
2399 return reader_parse_element(reader);
2400 case XmlReadInState_Content:
2401 return reader_parse_content(reader);
2402 case XmlReadInState_MiscEnd:
2403 hr = reader_parse_misc(reader);
2404 if (FAILED(hr)) return hr;
2406 if (hr == S_FALSE)
2407 reader->instate = XmlReadInState_Eof;
2408 return hr;
2409 case XmlReadInState_Eof:
2410 return S_FALSE;
2411 default:
2412 FIXME("internal state %d not handled\n", reader->instate);
2413 return E_NOTIMPL;
2417 return E_NOTIMPL;
2420 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2422 xmlreader *This = impl_from_IXmlReader(iface);
2424 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2426 if (IsEqualGUID(riid, &IID_IUnknown) ||
2427 IsEqualGUID(riid, &IID_IXmlReader))
2429 *ppvObject = iface;
2431 else
2433 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2434 *ppvObject = NULL;
2435 return E_NOINTERFACE;
2438 IXmlReader_AddRef(iface);
2440 return S_OK;
2443 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2445 xmlreader *This = impl_from_IXmlReader(iface);
2446 ULONG ref = InterlockedIncrement(&This->ref);
2447 TRACE("(%p)->(%d)\n", This, ref);
2448 return ref;
2451 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2453 xmlreader *This = impl_from_IXmlReader(iface);
2454 LONG ref = InterlockedDecrement(&This->ref);
2456 TRACE("(%p)->(%d)\n", This, ref);
2458 if (ref == 0)
2460 IMalloc *imalloc = This->imalloc;
2461 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2462 reader_clear_attrs(This);
2463 reader_clear_elements(This);
2464 reader_free_strvalues(This);
2465 reader_free(This, This);
2466 if (imalloc) IMalloc_Release(imalloc);
2469 return ref;
2472 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2474 xmlreader *This = impl_from_IXmlReader(iface);
2475 IXmlReaderInput *readerinput;
2476 HRESULT hr;
2478 TRACE("(%p)->(%p)\n", This, input);
2480 if (This->input)
2482 readerinput_release_stream(This->input);
2483 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2484 This->input = NULL;
2487 This->line = This->pos = 0;
2488 reader_clear_elements(This);
2489 This->depth = 0;
2490 This->resumestate = XmlReadResumeState_Initial;
2491 memset(This->resume, 0, sizeof(This->resume));
2493 /* just reset current input */
2494 if (!input)
2496 This->state = XmlReadState_Initial;
2497 return S_OK;
2500 /* now try IXmlReaderInput, ISequentialStream, IStream */
2501 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2502 if (hr == S_OK)
2504 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2505 This->input = impl_from_IXmlReaderInput(readerinput);
2506 else
2508 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2509 readerinput, readerinput->lpVtbl);
2510 IUnknown_Release(readerinput);
2511 return E_FAIL;
2516 if (hr != S_OK || !readerinput)
2518 /* create IXmlReaderInput basing on supplied interface */
2519 hr = CreateXmlReaderInputWithEncodingName(input,
2520 NULL, NULL, FALSE, NULL, &readerinput);
2521 if (hr != S_OK) return hr;
2522 This->input = impl_from_IXmlReaderInput(readerinput);
2525 /* set stream for supplied IXmlReaderInput */
2526 hr = readerinput_query_for_stream(This->input);
2527 if (hr == S_OK)
2529 This->state = XmlReadState_Initial;
2530 This->instate = XmlReadInState_Initial;
2533 return hr;
2536 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2538 xmlreader *This = impl_from_IXmlReader(iface);
2540 TRACE("(%p)->(%s %p)\n", This, debugstr_prop(property), value);
2542 if (!value) return E_INVALIDARG;
2544 switch (property)
2546 case XmlReaderProperty_DtdProcessing:
2547 *value = This->dtdmode;
2548 break;
2549 case XmlReaderProperty_ReadState:
2550 *value = This->state;
2551 break;
2552 default:
2553 FIXME("Unimplemented property (%u)\n", property);
2554 return E_NOTIMPL;
2557 return S_OK;
2560 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2562 xmlreader *This = impl_from_IXmlReader(iface);
2564 TRACE("(%p)->(%s %lu)\n", This, debugstr_prop(property), value);
2566 switch (property)
2568 case XmlReaderProperty_DtdProcessing:
2569 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2570 This->dtdmode = value;
2571 break;
2572 default:
2573 FIXME("Unimplemented property (%u)\n", property);
2574 return E_NOTIMPL;
2577 return S_OK;
2580 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2582 xmlreader *This = impl_from_IXmlReader(iface);
2583 XmlNodeType oldtype = This->nodetype;
2584 HRESULT hr;
2586 TRACE("(%p)->(%p)\n", This, nodetype);
2588 if (This->state == XmlReadState_Closed) return S_FALSE;
2590 hr = reader_parse_nextnode(This);
2591 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2592 This->state = XmlReadState_Interactive;
2593 if (hr == S_OK)
2595 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2596 *nodetype = This->nodetype;
2599 return hr;
2602 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2604 xmlreader *This = impl_from_IXmlReader(iface);
2605 TRACE("(%p)->(%p)\n", This, node_type);
2607 *node_type = reader_get_nodetype(This);
2608 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2611 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2613 xmlreader *This = impl_from_IXmlReader(iface);
2615 TRACE("(%p)\n", This);
2617 if (!This->attr_count) return S_FALSE;
2618 This->attr = LIST_ENTRY(list_head(&This->attrs), struct attribute, entry);
2619 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2620 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2622 return S_OK;
2625 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2627 xmlreader *This = impl_from_IXmlReader(iface);
2628 const struct list *next;
2630 TRACE("(%p)\n", This);
2632 if (!This->attr_count) return S_FALSE;
2634 if (!This->attr)
2635 return IXmlReader_MoveToFirstAttribute(iface);
2637 next = list_next(&This->attrs, &This->attr->entry);
2638 if (next)
2640 This->attr = LIST_ENTRY(next, struct attribute, entry);
2641 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2642 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2645 return next ? S_OK : S_FALSE;
2648 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2649 LPCWSTR local_name,
2650 LPCWSTR namespaceUri)
2652 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2653 return E_NOTIMPL;
2656 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2658 xmlreader *This = impl_from_IXmlReader(iface);
2659 struct element *elem;
2661 TRACE("(%p)\n", This);
2663 if (!This->attr_count) return S_FALSE;
2664 This->attr = NULL;
2666 /* FIXME: support other node types with 'attributes' like DTD */
2667 elem = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2668 if (elem)
2670 reader_set_strvalue(This, StringValue_QualifiedName, &elem->qname);
2671 reader_set_strvalue(This, StringValue_LocalName, &elem->localname);
2674 return S_OK;
2677 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2679 xmlreader *This = impl_from_IXmlReader(iface);
2681 TRACE("(%p)->(%p %p)\n", This, name, len);
2682 *name = This->strvalues[StringValue_QualifiedName].str;
2683 *len = This->strvalues[StringValue_QualifiedName].len;
2684 return S_OK;
2687 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface,
2688 LPCWSTR *namespaceUri,
2689 UINT *namespaceUri_length)
2691 FIXME("(%p %p %p): stub\n", iface, namespaceUri, namespaceUri_length);
2692 return E_NOTIMPL;
2695 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2697 xmlreader *This = impl_from_IXmlReader(iface);
2699 TRACE("(%p)->(%p %p)\n", This, name, len);
2700 *name = This->strvalues[StringValue_LocalName].str;
2701 if (len) *len = This->strvalues[StringValue_LocalName].len;
2702 return S_OK;
2705 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2707 xmlreader *This = impl_from_IXmlReader(iface);
2709 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2710 *prefix = This->strvalues[StringValue_Prefix].str;
2711 if (len) *len = This->strvalues[StringValue_Prefix].len;
2712 return S_OK;
2715 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
2717 xmlreader *reader = impl_from_IXmlReader(iface);
2718 strval *val = &reader->strvalues[StringValue_Value];
2720 TRACE("(%p)->(%p %p)\n", reader, value, len);
2722 *value = NULL;
2724 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
2726 XmlNodeType type;
2727 HRESULT hr;
2729 hr = IXmlReader_Read(iface, &type);
2730 if (FAILED(hr)) return hr;
2732 /* return if still pending, partially read values are not reported */
2733 if (is_reader_pending(reader)) return E_PENDING;
2736 if (!val->str)
2738 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
2739 if (!ptr) return E_OUTOFMEMORY;
2740 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
2741 ptr[val->len] = 0;
2742 val->str = ptr;
2745 *value = val->str;
2746 if (len) *len = val->len;
2747 return S_OK;
2750 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
2752 xmlreader *reader = impl_from_IXmlReader(iface);
2753 strval *val = &reader->strvalues[StringValue_Value];
2754 UINT len;
2756 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
2758 /* Value is already allocated, chunked reads are not possible. */
2759 if (val->str) return S_FALSE;
2761 if (val->len)
2763 len = min(chunk_size, val->len);
2764 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
2765 val->start += len;
2766 val->len -= len;
2767 if (read) *read = len;
2770 return S_OK;
2773 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
2774 LPCWSTR *baseUri,
2775 UINT *baseUri_length)
2777 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
2778 return E_NOTIMPL;
2781 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
2783 FIXME("(%p): stub\n", iface);
2784 return FALSE;
2787 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
2789 xmlreader *This = impl_from_IXmlReader(iface);
2790 TRACE("(%p)\n", This);
2791 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2792 when current node is start tag of an element */
2793 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->empty_element : FALSE;
2796 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
2798 xmlreader *This = impl_from_IXmlReader(iface);
2800 TRACE("(%p %p)\n", This, lineNumber);
2802 if (!lineNumber) return E_INVALIDARG;
2804 *lineNumber = This->line;
2806 return S_OK;
2809 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
2811 xmlreader *This = impl_from_IXmlReader(iface);
2813 TRACE("(%p %p)\n", This, linePosition);
2815 if (!linePosition) return E_INVALIDARG;
2817 *linePosition = This->pos;
2819 return S_OK;
2822 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
2824 xmlreader *This = impl_from_IXmlReader(iface);
2826 TRACE("(%p)->(%p)\n", This, count);
2828 if (!count) return E_INVALIDARG;
2830 *count = This->attr_count;
2831 return S_OK;
2834 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
2836 xmlreader *This = impl_from_IXmlReader(iface);
2837 TRACE("(%p)->(%p)\n", This, depth);
2838 *depth = This->depth;
2839 return S_OK;
2842 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
2844 FIXME("(%p): stub\n", iface);
2845 return E_NOTIMPL;
2848 static const struct IXmlReaderVtbl xmlreader_vtbl =
2850 xmlreader_QueryInterface,
2851 xmlreader_AddRef,
2852 xmlreader_Release,
2853 xmlreader_SetInput,
2854 xmlreader_GetProperty,
2855 xmlreader_SetProperty,
2856 xmlreader_Read,
2857 xmlreader_GetNodeType,
2858 xmlreader_MoveToFirstAttribute,
2859 xmlreader_MoveToNextAttribute,
2860 xmlreader_MoveToAttributeByName,
2861 xmlreader_MoveToElement,
2862 xmlreader_GetQualifiedName,
2863 xmlreader_GetNamespaceUri,
2864 xmlreader_GetLocalName,
2865 xmlreader_GetPrefix,
2866 xmlreader_GetValue,
2867 xmlreader_ReadValueChunk,
2868 xmlreader_GetBaseUri,
2869 xmlreader_IsDefault,
2870 xmlreader_IsEmptyElement,
2871 xmlreader_GetLineNumber,
2872 xmlreader_GetLinePosition,
2873 xmlreader_GetAttributeCount,
2874 xmlreader_GetDepth,
2875 xmlreader_IsEOF
2878 /** IXmlReaderInput **/
2879 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
2881 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2883 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2885 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
2886 IsEqualGUID(riid, &IID_IUnknown))
2888 *ppvObject = iface;
2890 else
2892 WARN("interface %s not implemented\n", debugstr_guid(riid));
2893 *ppvObject = NULL;
2894 return E_NOINTERFACE;
2897 IUnknown_AddRef(iface);
2899 return S_OK;
2902 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
2904 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2905 ULONG ref = InterlockedIncrement(&This->ref);
2906 TRACE("(%p)->(%d)\n", This, ref);
2907 return ref;
2910 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
2912 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2913 LONG ref = InterlockedDecrement(&This->ref);
2915 TRACE("(%p)->(%d)\n", This, ref);
2917 if (ref == 0)
2919 IMalloc *imalloc = This->imalloc;
2920 if (This->input) IUnknown_Release(This->input);
2921 if (This->stream) ISequentialStream_Release(This->stream);
2922 if (This->buffer) free_input_buffer(This->buffer);
2923 readerinput_free(This, This->baseuri);
2924 readerinput_free(This, This);
2925 if (imalloc) IMalloc_Release(imalloc);
2928 return ref;
2931 static const struct IUnknownVtbl xmlreaderinputvtbl =
2933 xmlreaderinput_QueryInterface,
2934 xmlreaderinput_AddRef,
2935 xmlreaderinput_Release
2938 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
2940 xmlreader *reader;
2941 int i;
2943 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
2945 if (!IsEqualGUID(riid, &IID_IXmlReader))
2947 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
2948 return E_FAIL;
2951 if (imalloc)
2952 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
2953 else
2954 reader = heap_alloc(sizeof(*reader));
2955 if(!reader) return E_OUTOFMEMORY;
2957 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
2958 reader->ref = 1;
2959 reader->input = NULL;
2960 reader->state = XmlReadState_Closed;
2961 reader->instate = XmlReadInState_Initial;
2962 reader->resumestate = XmlReadResumeState_Initial;
2963 reader->dtdmode = DtdProcessing_Prohibit;
2964 reader->line = reader->pos = 0;
2965 reader->imalloc = imalloc;
2966 if (imalloc) IMalloc_AddRef(imalloc);
2967 reader->nodetype = XmlNodeType_None;
2968 list_init(&reader->attrs);
2969 reader->attr_count = 0;
2970 reader->attr = NULL;
2971 list_init(&reader->elements);
2972 reader->depth = 0;
2973 reader->max_depth = 256;
2974 reader->empty_element = FALSE;
2975 memset(reader->resume, 0, sizeof(reader->resume));
2977 for (i = 0; i < StringValue_Last; i++)
2978 reader->strvalues[i] = strval_empty;
2980 *obj = &reader->IXmlReader_iface;
2982 TRACE("returning iface %p\n", *obj);
2984 return S_OK;
2987 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
2988 IMalloc *imalloc,
2989 LPCWSTR encoding,
2990 BOOL hint,
2991 LPCWSTR base_uri,
2992 IXmlReaderInput **ppInput)
2994 xmlreaderinput *readerinput;
2995 HRESULT hr;
2997 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
2998 hint, wine_dbgstr_w(base_uri), ppInput);
3000 if (!stream || !ppInput) return E_INVALIDARG;
3002 if (imalloc)
3003 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3004 else
3005 readerinput = heap_alloc(sizeof(*readerinput));
3006 if(!readerinput) return E_OUTOFMEMORY;
3008 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3009 readerinput->ref = 1;
3010 readerinput->imalloc = imalloc;
3011 readerinput->stream = NULL;
3012 if (imalloc) IMalloc_AddRef(imalloc);
3013 readerinput->encoding = parse_encoding_name(encoding, -1);
3014 readerinput->hint = hint;
3015 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3016 readerinput->pending = 0;
3018 hr = alloc_input_buffer(readerinput);
3019 if (hr != S_OK)
3021 readerinput_free(readerinput, readerinput->baseuri);
3022 readerinput_free(readerinput, readerinput);
3023 if (imalloc) IMalloc_Release(imalloc);
3024 return hr;
3026 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3028 *ppInput = &readerinput->IXmlReaderInput_iface;
3030 TRACE("returning iface %p\n", *ppInput);
3032 return S_OK;