dbghelp: Remove an "#if 1" preprocessor directive.
[wine/multimedia.git] / dlls / xmllite / reader.c
blob9fe48bdf94e63dfc64a9002329fa6c7d8837af2d
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlEncoding_UTF16,
44 XmlEncoding_UTF8,
45 XmlEncoding_Unknown
46 } xml_encoding;
48 typedef enum
50 XmlReadInState_Initial,
51 XmlReadInState_XmlDecl,
52 XmlReadInState_Misc_DTD,
53 XmlReadInState_DTD,
54 XmlReadInState_DTD_Misc,
55 XmlReadInState_Element,
56 XmlReadInState_Content,
57 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
58 XmlReadInState_Eof
59 } XmlReaderInternalState;
61 /* This state denotes where parsing was interrupted by input problem.
62 Reader resumes parsing using this information. */
63 typedef enum
65 XmlReadResumeState_Initial,
66 XmlReadResumeState_PITarget,
67 XmlReadResumeState_PIBody,
68 XmlReadResumeState_CDATA,
69 XmlReadResumeState_Comment,
70 XmlReadResumeState_STag,
71 XmlReadResumeState_CharData,
72 XmlReadResumeState_Whitespace
73 } XmlReaderResumeState;
75 /* saved pointer index to resume from particular input position */
76 typedef enum
78 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
79 XmlReadResume_Local, /* local for QName */
80 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
81 XmlReadResume_Last
82 } XmlReaderResume;
84 typedef enum
86 StringValue_LocalName,
87 StringValue_Prefix,
88 StringValue_QualifiedName,
89 StringValue_Value,
90 StringValue_Last
91 } XmlReaderStringValue;
93 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
94 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
96 static const WCHAR dblquoteW[] = {'\"',0};
97 static const WCHAR quoteW[] = {'\'',0};
98 static const WCHAR ltW[] = {'<',0};
99 static const WCHAR gtW[] = {'>',0};
100 static const WCHAR commentW[] = {'<','!','-','-',0};
101 static const WCHAR piW[] = {'<','?',0};
103 static const char *debugstr_nodetype(XmlNodeType nodetype)
105 static const char * const type_names[] =
107 "None",
108 "Element",
109 "Attribute",
110 "Text",
111 "CDATA",
114 "ProcessingInstruction",
115 "Comment",
117 "DocumentType",
120 "Whitespace",
122 "EndElement",
124 "XmlDeclaration"
127 if (nodetype > _XmlNodeType_Last)
128 return wine_dbg_sprintf("unknown type=%d", nodetype);
130 return type_names[nodetype];
133 static const char *debugstr_prop(XmlReaderProperty prop)
135 static const char * const prop_names[] =
137 "MultiLanguage",
138 "ConformanceLevel",
139 "RandomAccess",
140 "XmlResolver",
141 "DtdProcessing",
142 "ReadState",
143 "MaxElementDepth",
144 "MaxEntityExpansion"
147 if (prop > _XmlReaderProperty_Last)
148 return wine_dbg_sprintf("unknown property=%d", prop);
150 return prop_names[prop];
153 struct xml_encoding_data
155 const WCHAR *name;
156 xml_encoding enc;
157 UINT cp;
160 static const struct xml_encoding_data xml_encoding_map[] = {
161 { utf16W, XmlEncoding_UTF16, ~0 },
162 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
165 typedef struct
167 char *data;
168 UINT cur;
169 unsigned int allocated;
170 unsigned int written;
171 } encoded_buffer;
173 typedef struct input_buffer input_buffer;
175 typedef struct
177 IXmlReaderInput IXmlReaderInput_iface;
178 LONG ref;
179 /* reference passed on IXmlReaderInput creation, is kept when input is created */
180 IUnknown *input;
181 IMalloc *imalloc;
182 xml_encoding encoding;
183 BOOL hint;
184 WCHAR *baseuri;
185 /* stream reference set after SetInput() call from reader,
186 stored as sequential stream, cause currently
187 optimizations possible with IStream aren't implemented */
188 ISequentialStream *stream;
189 input_buffer *buffer;
190 unsigned int pending : 1;
191 } xmlreaderinput;
193 static const struct IUnknownVtbl xmlreaderinputvtbl;
195 /* Structure to hold parsed string of specific length.
197 Reader stores node value as 'start' pointer, on request
198 a null-terminated version of it is allocated.
200 To init a strval variable use reader_init_strval(),
201 to set strval as a reader value use reader_set_strval().
203 typedef struct
205 WCHAR *str; /* allocated null-terminated string */
206 UINT len; /* length in WCHARs, altered after ReadValueChunk */
207 UINT start; /* input position where value starts */
208 } strval;
210 static WCHAR emptyW[] = {0};
211 static const strval strval_empty = { emptyW };
213 struct attribute
215 struct list entry;
216 strval localname;
217 strval value;
220 struct element
222 struct list entry;
223 strval qname;
224 strval localname;
227 typedef struct
229 IXmlReader IXmlReader_iface;
230 LONG ref;
231 xmlreaderinput *input;
232 IMalloc *imalloc;
233 XmlReadState state;
234 XmlReaderInternalState instate;
235 XmlReaderResumeState resumestate;
236 XmlNodeType nodetype;
237 DtdProcessing dtdmode;
238 UINT line, pos; /* reader position in XML stream */
239 struct list attrs; /* attributes list for current node */
240 struct attribute *attr; /* current attribute */
241 UINT attr_count;
242 struct list elements;
243 strval strvalues[StringValue_Last];
244 UINT depth;
245 UINT max_depth;
246 BOOL empty_element;
247 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
248 } xmlreader;
250 struct input_buffer
252 encoded_buffer utf16;
253 encoded_buffer encoded;
254 UINT code_page;
255 xmlreaderinput *input;
258 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
260 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
263 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
265 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
268 static inline void *m_alloc(IMalloc *imalloc, size_t len)
270 if (imalloc)
271 return IMalloc_Alloc(imalloc, len);
272 else
273 return heap_alloc(len);
276 static inline void *m_realloc(IMalloc *imalloc, void *mem, size_t len)
278 if (imalloc)
279 return IMalloc_Realloc(imalloc, mem, len);
280 else
281 return heap_realloc(mem, len);
284 static inline void m_free(IMalloc *imalloc, void *mem)
286 if (imalloc)
287 IMalloc_Free(imalloc, mem);
288 else
289 heap_free(mem);
292 /* reader memory allocation functions */
293 static inline void *reader_alloc(xmlreader *reader, size_t len)
295 return m_alloc(reader->imalloc, len);
298 static inline void reader_free(xmlreader *reader, void *mem)
300 m_free(reader->imalloc, mem);
303 /* Just return pointer from offset, no attempt to read more. */
304 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
306 encoded_buffer *buffer = &reader->input->buffer->utf16;
307 return (WCHAR*)buffer->data + offset;
310 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
312 return v->str ? v->str : reader_get_ptr2(reader, v->start);
315 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
317 *dest = *src;
319 if (src->str != strval_empty.str)
321 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
322 if (!dest->str) return E_OUTOFMEMORY;
323 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
324 dest->str[dest->len] = 0;
325 dest->start = 0;
328 return S_OK;
331 /* reader input memory allocation functions */
332 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
334 return m_alloc(input->imalloc, len);
337 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
339 return m_realloc(input->imalloc, mem, len);
342 static inline void readerinput_free(xmlreaderinput *input, void *mem)
344 m_free(input->imalloc, mem);
347 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
349 LPWSTR ret = NULL;
351 if(str) {
352 DWORD size;
354 size = (strlenW(str)+1)*sizeof(WCHAR);
355 ret = readerinput_alloc(input, size);
356 if (ret) memcpy(ret, str, size);
359 return ret;
362 static void reader_clear_attrs(xmlreader *reader)
364 struct attribute *attr, *attr2;
365 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
367 reader_free(reader, attr);
369 list_init(&reader->attrs);
370 reader->attr_count = 0;
373 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
374 while we are on a node with attributes */
375 static HRESULT reader_add_attr(xmlreader *reader, strval *localname, strval *value)
377 struct attribute *attr;
379 attr = reader_alloc(reader, sizeof(*attr));
380 if (!attr) return E_OUTOFMEMORY;
382 attr->localname = *localname;
383 attr->value = *value;
384 list_add_tail(&reader->attrs, &attr->entry);
385 reader->attr_count++;
387 return S_OK;
390 /* This one frees stored string value if needed */
391 static void reader_free_strvalued(xmlreader *reader, strval *v)
393 if (v->str != strval_empty.str)
395 reader_free(reader, v->str);
396 *v = strval_empty;
400 /* returns length in WCHARs from 'start' to current buffer offset */
401 static inline UINT reader_get_len(const xmlreader *reader, UINT start)
403 return reader->input->buffer->utf16.cur - start;
406 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
408 v->start = start;
409 v->len = len;
410 v->str = NULL;
413 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
415 return debugstr_wn(reader_get_strptr(reader, v), v->len);
418 /* used to initialize from constant string */
419 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
421 v->start = 0;
422 v->len = len;
423 v->str = str;
426 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
428 reader_free_strvalued(reader, &reader->strvalues[type]);
431 static void reader_free_strvalues(xmlreader *reader)
433 int type;
434 for (type = 0; type < StringValue_Last; type++)
435 reader_free_strvalue(reader, type);
438 /* This helper should only be used to test if strings are the same,
439 it doesn't try to sort. */
440 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
442 if (str1->len != str2->len) return 0;
443 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
446 static void reader_clear_elements(xmlreader *reader)
448 struct element *elem, *elem2;
449 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
451 reader_free_strvalued(reader, &elem->qname);
452 reader_free(reader, elem);
454 list_init(&reader->elements);
455 reader->empty_element = FALSE;
458 static HRESULT reader_inc_depth(xmlreader *reader)
460 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
461 return S_OK;
464 static void reader_dec_depth(xmlreader *reader)
466 if (reader->depth > 1) reader->depth--;
469 static HRESULT reader_push_element(xmlreader *reader, strval *qname, strval *localname)
471 struct element *elem;
472 HRESULT hr;
474 elem = reader_alloc(reader, sizeof(*elem));
475 if (!elem) return E_OUTOFMEMORY;
477 hr = reader_strvaldup(reader, qname, &elem->qname);
478 if (FAILED(hr)) {
479 reader_free(reader, elem);
480 return hr;
483 hr = reader_strvaldup(reader, localname, &elem->localname);
484 if (FAILED(hr))
486 reader_free_strvalued(reader, &elem->qname);
487 reader_free(reader, elem);
488 return hr;
491 if (!list_empty(&reader->elements))
493 hr = reader_inc_depth(reader);
494 if (FAILED(hr)) {
495 reader_free(reader, elem);
496 return hr;
500 list_add_head(&reader->elements, &elem->entry);
501 reader->empty_element = FALSE;
502 return hr;
505 static void reader_pop_element(xmlreader *reader)
507 struct element *elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
509 if (elem)
511 list_remove(&elem->entry);
512 reader_free_strvalued(reader, &elem->qname);
513 reader_free_strvalued(reader, &elem->localname);
514 reader_free(reader, elem);
515 reader_dec_depth(reader);
519 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
520 means node value is to be determined. */
521 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
523 strval *v = &reader->strvalues[type];
525 reader_free_strvalue(reader, type);
526 if (!value)
528 v->str = NULL;
529 v->start = 0;
530 v->len = 0;
531 return;
534 if (value->str == strval_empty.str)
535 *v = *value;
536 else
538 if (type == StringValue_Value)
540 /* defer allocation for value string */
541 v->str = NULL;
542 v->start = value->start;
543 v->len = value->len;
545 else
547 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
548 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
549 v->str[value->len] = 0;
550 v->len = value->len;
555 static inline int is_reader_pending(xmlreader *reader)
557 return reader->input->pending;
560 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
562 const int initial_len = 0x2000;
563 buffer->data = readerinput_alloc(input, initial_len);
564 if (!buffer->data) return E_OUTOFMEMORY;
566 memset(buffer->data, 0, 4);
567 buffer->cur = 0;
568 buffer->allocated = initial_len;
569 buffer->written = 0;
571 return S_OK;
574 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
576 readerinput_free(input, buffer->data);
579 static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
581 if (encoding == XmlEncoding_Unknown)
583 FIXME("unsupported encoding %d\n", encoding);
584 return E_NOTIMPL;
587 *cp = xml_encoding_map[encoding].cp;
589 return S_OK;
592 static xml_encoding parse_encoding_name(const WCHAR *name, int len)
594 int min, max, n, c;
596 if (!name) return XmlEncoding_Unknown;
598 min = 0;
599 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
601 while (min <= max)
603 n = (min+max)/2;
605 if (len != -1)
606 c = strncmpiW(xml_encoding_map[n].name, name, len);
607 else
608 c = strcmpiW(xml_encoding_map[n].name, name);
609 if (!c)
610 return xml_encoding_map[n].enc;
612 if (c > 0)
613 max = n-1;
614 else
615 min = n+1;
618 return XmlEncoding_Unknown;
621 static HRESULT alloc_input_buffer(xmlreaderinput *input)
623 input_buffer *buffer;
624 HRESULT hr;
626 input->buffer = NULL;
628 buffer = readerinput_alloc(input, sizeof(*buffer));
629 if (!buffer) return E_OUTOFMEMORY;
631 buffer->input = input;
632 buffer->code_page = ~0; /* code page is unknown at this point */
633 hr = init_encoded_buffer(input, &buffer->utf16);
634 if (hr != S_OK) {
635 readerinput_free(input, buffer);
636 return hr;
639 hr = init_encoded_buffer(input, &buffer->encoded);
640 if (hr != S_OK) {
641 free_encoded_buffer(input, &buffer->utf16);
642 readerinput_free(input, buffer);
643 return hr;
646 input->buffer = buffer;
647 return S_OK;
650 static void free_input_buffer(input_buffer *buffer)
652 free_encoded_buffer(buffer->input, &buffer->encoded);
653 free_encoded_buffer(buffer->input, &buffer->utf16);
654 readerinput_free(buffer->input, buffer);
657 static void readerinput_release_stream(xmlreaderinput *readerinput)
659 if (readerinput->stream) {
660 ISequentialStream_Release(readerinput->stream);
661 readerinput->stream = NULL;
665 /* Queries already stored interface for IStream/ISequentialStream.
666 Interface supplied on creation will be overwritten */
667 static HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
669 HRESULT hr;
671 readerinput_release_stream(readerinput);
672 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
673 if (hr != S_OK)
674 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
676 return hr;
679 /* reads a chunk to raw buffer */
680 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
682 encoded_buffer *buffer = &readerinput->buffer->encoded;
683 /* to make sure aligned length won't exceed allocated length */
684 ULONG len = buffer->allocated - buffer->written - 4;
685 ULONG read;
686 HRESULT hr;
688 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
689 variable width encodings like UTF-8 */
690 len = (len + 3) & ~3;
691 /* try to use allocated space or grow */
692 if (buffer->allocated - buffer->written < len)
694 buffer->allocated *= 2;
695 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
696 len = buffer->allocated - buffer->written;
699 read = 0;
700 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
701 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
702 readerinput->pending = hr == E_PENDING;
703 if (FAILED(hr)) return hr;
704 buffer->written += read;
706 return hr;
709 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
710 static void readerinput_grow(xmlreaderinput *readerinput, int length)
712 encoded_buffer *buffer = &readerinput->buffer->utf16;
714 length *= sizeof(WCHAR);
715 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
716 if (buffer->allocated < buffer->written + length + 4)
718 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
719 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
720 buffer->allocated = grown_size;
724 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
726 static const char startA[] = {'<','?'};
727 static const char commentA[] = {'<','!'};
728 encoded_buffer *buffer = &readerinput->buffer->encoded;
729 unsigned char *ptr = (unsigned char*)buffer->data;
731 return !memcmp(buffer->data, startA, sizeof(startA)) ||
732 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
733 /* test start byte */
734 (ptr[0] == '<' &&
736 (ptr[1] && (ptr[1] <= 0x7f)) ||
737 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
738 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
739 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
743 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
745 encoded_buffer *buffer = &readerinput->buffer->encoded;
746 static const WCHAR startW[] = {'<','?'};
747 static const WCHAR commentW[] = {'<','!'};
748 static const char utf8bom[] = {0xef,0xbb,0xbf};
749 static const char utf16lebom[] = {0xff,0xfe};
751 *enc = XmlEncoding_Unknown;
753 if (buffer->written <= 3)
755 HRESULT hr = readerinput_growraw(readerinput);
756 if (FAILED(hr)) return hr;
757 if (buffer->written <= 3) return MX_E_INPUTEND;
760 /* try start symbols if we have enough data to do that, input buffer should contain
761 first chunk already */
762 if (readerinput_is_utf8(readerinput))
763 *enc = XmlEncoding_UTF8;
764 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
765 !memcmp(buffer->data, commentW, sizeof(commentW)))
766 *enc = XmlEncoding_UTF16;
767 /* try with BOM now */
768 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
770 buffer->cur += sizeof(utf8bom);
771 *enc = XmlEncoding_UTF8;
773 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
775 buffer->cur += sizeof(utf16lebom);
776 *enc = XmlEncoding_UTF16;
779 return S_OK;
782 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
784 encoded_buffer *buffer = &readerinput->buffer->encoded;
785 int len = buffer->written;
787 /* complete single byte char */
788 if (!(buffer->data[len-1] & 0x80)) return len;
790 /* find start byte of multibyte char */
791 while (--len && !(buffer->data[len] & 0xc0))
794 return len;
797 /* Returns byte length of complete char sequence for buffer code page,
798 it's relative to current buffer position which is currently used for BOM handling
799 only. */
800 static int readerinput_get_convlen(xmlreaderinput *readerinput)
802 encoded_buffer *buffer = &readerinput->buffer->encoded;
803 int len;
805 if (readerinput->buffer->code_page == CP_UTF8)
806 len = readerinput_get_utf8_convlen(readerinput);
807 else
808 len = buffer->written;
810 TRACE("%d\n", len - buffer->cur);
811 return len - buffer->cur;
814 /* It's possible that raw buffer has some leftovers from last conversion - some char
815 sequence that doesn't represent a full code point. Length argument should be calculated with
816 readerinput_get_convlen(), if it's -1 it will be calculated here. */
817 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
819 encoded_buffer *buffer = &readerinput->buffer->encoded;
821 if (len == -1)
822 len = readerinput_get_convlen(readerinput);
824 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
825 /* everything below cur is lost too */
826 buffer->written -= len + buffer->cur;
827 /* after this point we don't need cur offset really,
828 it's used only to mark where actual data begins when first chunk is read */
829 buffer->cur = 0;
832 /* note that raw buffer content is kept */
833 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
835 encoded_buffer *src = &readerinput->buffer->encoded;
836 encoded_buffer *dest = &readerinput->buffer->utf16;
837 int len, dest_len;
838 HRESULT hr;
839 WCHAR *ptr;
840 UINT cp;
842 hr = get_code_page(enc, &cp);
843 if (FAILED(hr)) return;
845 readerinput->buffer->code_page = cp;
846 len = readerinput_get_convlen(readerinput);
848 TRACE("switching to cp %d\n", cp);
850 /* just copy in this case */
851 if (enc == XmlEncoding_UTF16)
853 readerinput_grow(readerinput, len);
854 memcpy(dest->data, src->data + src->cur, len);
855 dest->written += len*sizeof(WCHAR);
856 return;
859 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
860 readerinput_grow(readerinput, dest_len);
861 ptr = (WCHAR*)dest->data;
862 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
863 ptr[dest_len] = 0;
864 dest->written += dest_len*sizeof(WCHAR);
867 /* shrinks parsed data a buffer begins with */
868 static void reader_shrink(xmlreader *reader)
870 encoded_buffer *buffer = &reader->input->buffer->utf16;
872 /* avoid to move too often using threshold shrink length */
873 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
875 buffer->written -= buffer->cur*sizeof(WCHAR);
876 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
877 buffer->cur = 0;
878 *(WCHAR*)&buffer->data[buffer->written] = 0;
882 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
883 It won't attempt to shrink but will grow destination buffer if needed */
884 static HRESULT reader_more(xmlreader *reader)
886 xmlreaderinput *readerinput = reader->input;
887 encoded_buffer *src = &readerinput->buffer->encoded;
888 encoded_buffer *dest = &readerinput->buffer->utf16;
889 UINT cp = readerinput->buffer->code_page;
890 int len, dest_len;
891 HRESULT hr;
892 WCHAR *ptr;
894 /* get some raw data from stream first */
895 hr = readerinput_growraw(readerinput);
896 len = readerinput_get_convlen(readerinput);
898 /* just copy for UTF-16 case */
899 if (cp == ~0)
901 readerinput_grow(readerinput, len);
902 memcpy(dest->data + dest->written, src->data + src->cur, len);
903 dest->written += len*sizeof(WCHAR);
904 return hr;
907 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
908 readerinput_grow(readerinput, dest_len);
909 ptr = (WCHAR*)(dest->data + dest->written);
910 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
911 ptr[dest_len] = 0;
912 dest->written += dest_len*sizeof(WCHAR);
913 /* get rid of processed data */
914 readerinput_shrinkraw(readerinput, len);
916 return hr;
919 static inline UINT reader_get_cur(xmlreader *reader)
921 return reader->input->buffer->utf16.cur;
924 static inline WCHAR *reader_get_ptr(xmlreader *reader)
926 encoded_buffer *buffer = &reader->input->buffer->utf16;
927 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
928 if (!*ptr) reader_more(reader);
929 return (WCHAR*)buffer->data + buffer->cur;
932 static int reader_cmp(xmlreader *reader, const WCHAR *str)
934 const WCHAR *ptr = reader_get_ptr(reader);
935 return strncmpW(str, ptr, strlenW(str));
938 /* moves cursor n WCHARs forward */
939 static void reader_skipn(xmlreader *reader, int n)
941 encoded_buffer *buffer = &reader->input->buffer->utf16;
942 const WCHAR *ptr = reader_get_ptr(reader);
944 while (*ptr++ && n--)
946 buffer->cur++;
947 reader->pos++;
951 static inline BOOL is_wchar_space(WCHAR ch)
953 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
956 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
957 static int reader_skipspaces(xmlreader *reader)
959 encoded_buffer *buffer = &reader->input->buffer->utf16;
960 const WCHAR *ptr = reader_get_ptr(reader);
961 UINT start = reader_get_cur(reader);
963 while (is_wchar_space(*ptr))
965 if (*ptr == '\r')
966 reader->pos = 0;
967 else if (*ptr == '\n')
969 reader->line++;
970 reader->pos = 0;
972 else
973 reader->pos++;
975 buffer->cur++;
976 ptr = reader_get_ptr(reader);
979 return reader_get_cur(reader) - start;
982 /* [26] VersionNum ::= '1.' [0-9]+ */
983 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
985 static const WCHAR onedotW[] = {'1','.',0};
986 WCHAR *ptr, *ptr2;
987 UINT start;
989 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
991 start = reader_get_cur(reader);
992 /* skip "1." */
993 reader_skipn(reader, 2);
995 ptr2 = ptr = reader_get_ptr(reader);
996 while (*ptr >= '0' && *ptr <= '9')
998 reader_skipn(reader, 1);
999 ptr = reader_get_ptr(reader);
1002 if (ptr2 == ptr) return WC_E_DIGIT;
1003 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1004 TRACE("version=%s\n", debug_strval(reader, val));
1005 return S_OK;
1008 /* [25] Eq ::= S? '=' S? */
1009 static HRESULT reader_parse_eq(xmlreader *reader)
1011 static const WCHAR eqW[] = {'=',0};
1012 reader_skipspaces(reader);
1013 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1014 /* skip '=' */
1015 reader_skipn(reader, 1);
1016 reader_skipspaces(reader);
1017 return S_OK;
1020 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1021 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1023 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1024 strval val, name;
1025 HRESULT hr;
1027 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1029 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1030 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1031 /* skip 'version' */
1032 reader_skipn(reader, 7);
1034 hr = reader_parse_eq(reader);
1035 if (FAILED(hr)) return hr;
1037 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1038 return WC_E_QUOTE;
1039 /* skip "'"|'"' */
1040 reader_skipn(reader, 1);
1042 hr = reader_parse_versionnum(reader, &val);
1043 if (FAILED(hr)) return hr;
1045 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1046 return WC_E_QUOTE;
1048 /* skip "'"|'"' */
1049 reader_skipn(reader, 1);
1051 return reader_add_attr(reader, &name, &val);
1054 /* ([A-Za-z0-9._] | '-') */
1055 static inline BOOL is_wchar_encname(WCHAR ch)
1057 return ((ch >= 'A' && ch <= 'Z') ||
1058 (ch >= 'a' && ch <= 'z') ||
1059 (ch >= '0' && ch <= '9') ||
1060 (ch == '.') || (ch == '_') ||
1061 (ch == '-'));
1064 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1065 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1067 WCHAR *start = reader_get_ptr(reader), *ptr;
1068 xml_encoding enc;
1069 int len;
1071 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1072 return WC_E_ENCNAME;
1074 ptr = start;
1075 while (is_wchar_encname(*++ptr))
1078 len = ptr - start;
1079 enc = parse_encoding_name(start, len);
1080 TRACE("encoding name %s\n", debugstr_wn(start, len));
1081 val->str = start;
1082 val->len = len;
1084 if (enc == XmlEncoding_Unknown)
1085 return WC_E_ENCNAME;
1087 /* skip encoding name */
1088 reader_skipn(reader, len);
1089 return S_OK;
1092 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1093 static HRESULT reader_parse_encdecl(xmlreader *reader)
1095 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1096 strval name, val;
1097 HRESULT hr;
1099 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1101 if (reader_cmp(reader, encodingW)) return S_FALSE;
1102 name.str = reader_get_ptr(reader);
1103 name.len = 8;
1104 /* skip 'encoding' */
1105 reader_skipn(reader, 8);
1107 hr = reader_parse_eq(reader);
1108 if (FAILED(hr)) return hr;
1110 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1111 return WC_E_QUOTE;
1112 /* skip "'"|'"' */
1113 reader_skipn(reader, 1);
1115 hr = reader_parse_encname(reader, &val);
1116 if (FAILED(hr)) return hr;
1118 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1119 return WC_E_QUOTE;
1121 /* skip "'"|'"' */
1122 reader_skipn(reader, 1);
1124 return reader_add_attr(reader, &name, &val);
1127 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1128 static HRESULT reader_parse_sddecl(xmlreader *reader)
1130 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1131 static const WCHAR yesW[] = {'y','e','s',0};
1132 static const WCHAR noW[] = {'n','o',0};
1133 strval name, val;
1134 UINT start;
1135 HRESULT hr;
1137 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1139 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1140 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1141 /* skip 'standalone' */
1142 reader_skipn(reader, 10);
1144 hr = reader_parse_eq(reader);
1145 if (FAILED(hr)) return hr;
1147 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1148 return WC_E_QUOTE;
1149 /* skip "'"|'"' */
1150 reader_skipn(reader, 1);
1152 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1153 return WC_E_XMLDECL;
1155 start = reader_get_cur(reader);
1156 /* skip 'yes'|'no' */
1157 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1158 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1159 TRACE("standalone=%s\n", debug_strval(reader, &val));
1161 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1162 return WC_E_QUOTE;
1163 /* skip "'"|'"' */
1164 reader_skipn(reader, 1);
1166 return reader_add_attr(reader, &name, &val);
1169 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1170 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1172 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1173 static const WCHAR declcloseW[] = {'?','>',0};
1174 HRESULT hr;
1176 /* check if we have "<?xml " */
1177 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1179 reader_skipn(reader, 5);
1180 hr = reader_parse_versioninfo(reader);
1181 if (FAILED(hr))
1182 return hr;
1184 hr = reader_parse_encdecl(reader);
1185 if (FAILED(hr))
1186 return hr;
1188 hr = reader_parse_sddecl(reader);
1189 if (FAILED(hr))
1190 return hr;
1192 reader_skipspaces(reader);
1193 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1194 reader_skipn(reader, 2);
1196 reader_inc_depth(reader);
1197 reader->nodetype = XmlNodeType_XmlDeclaration;
1198 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1199 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1200 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1202 return S_OK;
1205 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1206 static HRESULT reader_parse_comment(xmlreader *reader)
1208 WCHAR *ptr;
1209 UINT start;
1211 if (reader->resumestate == XmlReadResumeState_Comment)
1213 start = reader->resume[XmlReadResume_Body];
1214 ptr = reader_get_ptr(reader);
1216 else
1218 /* skip '<!--' */
1219 reader_skipn(reader, 4);
1220 reader_shrink(reader);
1221 ptr = reader_get_ptr(reader);
1222 start = reader_get_cur(reader);
1223 reader->nodetype = XmlNodeType_Comment;
1224 reader->resume[XmlReadResume_Body] = start;
1225 reader->resumestate = XmlReadResumeState_Comment;
1226 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1227 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1228 reader_set_strvalue(reader, StringValue_Value, NULL);
1231 /* will exit when there's no more data, it won't attempt to
1232 read more from stream */
1233 while (*ptr)
1235 if (ptr[0] == '-')
1237 if (ptr[1] == '-')
1239 if (ptr[2] == '>')
1241 strval value;
1243 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1244 TRACE("%s\n", debug_strval(reader, &value));
1246 /* skip rest of markup '->' */
1247 reader_skipn(reader, 3);
1249 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1250 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1251 reader_set_strvalue(reader, StringValue_Value, &value);
1252 reader->resume[XmlReadResume_Body] = 0;
1253 reader->resumestate = XmlReadResumeState_Initial;
1254 return S_OK;
1256 else
1257 return WC_E_COMMENT;
1261 reader_skipn(reader, 1);
1262 ptr++;
1265 return S_OK;
1268 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1269 static inline BOOL is_char(WCHAR ch)
1271 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1272 (ch >= 0x20 && ch <= 0xd7ff) ||
1273 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1274 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1275 (ch >= 0xe000 && ch <= 0xfffd);
1278 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1279 static inline BOOL is_pubchar(WCHAR ch)
1281 return (ch == ' ') ||
1282 (ch >= 'a' && ch <= 'z') ||
1283 (ch >= 'A' && ch <= 'Z') ||
1284 (ch >= '0' && ch <= '9') ||
1285 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1286 (ch == '=') || (ch == '?') ||
1287 (ch == '@') || (ch == '!') ||
1288 (ch >= '#' && ch <= '%') || /* #$% */
1289 (ch == '_') || (ch == '\r') || (ch == '\n');
1292 static inline BOOL is_namestartchar(WCHAR ch)
1294 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1295 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1296 (ch >= 0xc0 && ch <= 0xd6) ||
1297 (ch >= 0xd8 && ch <= 0xf6) ||
1298 (ch >= 0xf8 && ch <= 0x2ff) ||
1299 (ch >= 0x370 && ch <= 0x37d) ||
1300 (ch >= 0x37f && ch <= 0x1fff) ||
1301 (ch >= 0x200c && ch <= 0x200d) ||
1302 (ch >= 0x2070 && ch <= 0x218f) ||
1303 (ch >= 0x2c00 && ch <= 0x2fef) ||
1304 (ch >= 0x3001 && ch <= 0xd7ff) ||
1305 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1306 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1307 (ch >= 0xf900 && ch <= 0xfdcf) ||
1308 (ch >= 0xfdf0 && ch <= 0xfffd);
1311 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1312 static inline BOOL is_ncnamechar(WCHAR ch)
1314 return (ch >= 'A' && ch <= 'Z') ||
1315 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1316 (ch == '-') || (ch == '.') ||
1317 (ch >= '0' && ch <= '9') ||
1318 (ch == 0xb7) ||
1319 (ch >= 0xc0 && ch <= 0xd6) ||
1320 (ch >= 0xd8 && ch <= 0xf6) ||
1321 (ch >= 0xf8 && ch <= 0x2ff) ||
1322 (ch >= 0x300 && ch <= 0x36f) ||
1323 (ch >= 0x370 && ch <= 0x37d) ||
1324 (ch >= 0x37f && ch <= 0x1fff) ||
1325 (ch >= 0x200c && ch <= 0x200d) ||
1326 (ch >= 0x203f && ch <= 0x2040) ||
1327 (ch >= 0x2070 && ch <= 0x218f) ||
1328 (ch >= 0x2c00 && ch <= 0x2fef) ||
1329 (ch >= 0x3001 && ch <= 0xd7ff) ||
1330 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1331 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1332 (ch >= 0xf900 && ch <= 0xfdcf) ||
1333 (ch >= 0xfdf0 && ch <= 0xfffd);
1336 static inline BOOL is_namechar(WCHAR ch)
1338 return (ch == ':') || is_ncnamechar(ch);
1341 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1343 /* When we're on attribute always return attribute type, container node type is kept.
1344 Note that container is not necessarily an element, and attribute doesn't mean it's
1345 an attribute in XML spec terms. */
1346 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1349 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1350 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1351 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1352 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1353 [5] Name ::= NameStartChar (NameChar)* */
1354 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1356 WCHAR *ptr;
1357 UINT start;
1359 if (reader->resume[XmlReadResume_Name])
1361 start = reader->resume[XmlReadResume_Name];
1362 ptr = reader_get_ptr(reader);
1364 else
1366 ptr = reader_get_ptr(reader);
1367 start = reader_get_cur(reader);
1368 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1371 while (is_namechar(*ptr))
1373 reader_skipn(reader, 1);
1374 ptr = reader_get_ptr(reader);
1377 if (is_reader_pending(reader))
1379 reader->resume[XmlReadResume_Name] = start;
1380 return E_PENDING;
1382 else
1383 reader->resume[XmlReadResume_Name] = 0;
1385 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1386 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1388 return S_OK;
1391 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1392 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1394 static const WCHAR xmlW[] = {'x','m','l'};
1395 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1396 strval name;
1397 WCHAR *ptr;
1398 HRESULT hr;
1399 UINT i;
1401 hr = reader_parse_name(reader, &name);
1402 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1404 /* now that we got name check for illegal content */
1405 if (strval_eq(reader, &name, &xmlval))
1406 return WC_E_LEADINGXML;
1408 /* PITarget can't be a qualified name */
1409 ptr = reader_get_strptr(reader, &name);
1410 for (i = 0; i < name.len; i++)
1411 if (ptr[i] == ':')
1412 return i ? NC_E_NAMECOLON : WC_E_PI;
1414 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1415 *target = name;
1416 return S_OK;
1419 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1420 static HRESULT reader_parse_pi(xmlreader *reader)
1422 strval target;
1423 WCHAR *ptr;
1424 UINT start;
1425 HRESULT hr;
1427 switch (reader->resumestate)
1429 case XmlReadResumeState_Initial:
1430 /* skip '<?' */
1431 reader_skipn(reader, 2);
1432 reader_shrink(reader);
1433 reader->resumestate = XmlReadResumeState_PITarget;
1434 case XmlReadResumeState_PITarget:
1435 hr = reader_parse_pitarget(reader, &target);
1436 if (FAILED(hr)) return hr;
1437 reader_set_strvalue(reader, StringValue_LocalName, &target);
1438 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1439 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1440 reader->resumestate = XmlReadResumeState_PIBody;
1441 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1442 default:
1446 start = reader->resume[XmlReadResume_Body];
1447 ptr = reader_get_ptr(reader);
1448 while (*ptr)
1450 if (ptr[0] == '?')
1452 if (ptr[1] == '>')
1454 UINT cur = reader_get_cur(reader);
1455 strval value;
1457 /* strip all leading whitespace chars */
1458 while (start < cur)
1460 ptr = reader_get_ptr2(reader, start);
1461 if (!is_wchar_space(*ptr)) break;
1462 start++;
1465 reader_init_strvalue(start, cur-start, &value);
1467 /* skip '?>' */
1468 reader_skipn(reader, 2);
1469 TRACE("%s\n", debug_strval(reader, &value));
1470 reader->nodetype = XmlNodeType_ProcessingInstruction;
1471 reader->resumestate = XmlReadResumeState_Initial;
1472 reader->resume[XmlReadResume_Body] = 0;
1473 reader_set_strvalue(reader, StringValue_Value, &value);
1474 return S_OK;
1478 reader_skipn(reader, 1);
1479 ptr = reader_get_ptr(reader);
1482 return S_OK;
1485 /* This one is used to parse significant whitespace nodes, like in Misc production */
1486 static HRESULT reader_parse_whitespace(xmlreader *reader)
1488 switch (reader->resumestate)
1490 case XmlReadResumeState_Initial:
1491 reader_shrink(reader);
1492 reader->resumestate = XmlReadResumeState_Whitespace;
1493 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1494 reader->nodetype = XmlNodeType_Whitespace;
1495 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1496 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1497 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1498 /* fallthrough */
1499 case XmlReadResumeState_Whitespace:
1501 strval value;
1502 UINT start;
1504 reader_skipspaces(reader);
1505 if (is_reader_pending(reader)) return S_OK;
1507 start = reader->resume[XmlReadResume_Body];
1508 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1509 reader_set_strvalue(reader, StringValue_Value, &value);
1510 TRACE("%s\n", debug_strval(reader, &value));
1511 reader->resumestate = XmlReadResumeState_Initial;
1513 default:
1517 return S_OK;
1520 /* [27] Misc ::= Comment | PI | S */
1521 static HRESULT reader_parse_misc(xmlreader *reader)
1523 HRESULT hr = S_FALSE;
1525 if (reader->resumestate != XmlReadResumeState_Initial)
1527 hr = reader_more(reader);
1528 if (FAILED(hr)) return hr;
1530 /* finish current node */
1531 switch (reader->resumestate)
1533 case XmlReadResumeState_PITarget:
1534 case XmlReadResumeState_PIBody:
1535 return reader_parse_pi(reader);
1536 case XmlReadResumeState_Comment:
1537 return reader_parse_comment(reader);
1538 case XmlReadResumeState_Whitespace:
1539 return reader_parse_whitespace(reader);
1540 default:
1541 ERR("unknown resume state %d\n", reader->resumestate);
1545 while (1)
1547 const WCHAR *cur = reader_get_ptr(reader);
1549 if (is_wchar_space(*cur))
1550 hr = reader_parse_whitespace(reader);
1551 else if (!reader_cmp(reader, commentW))
1552 hr = reader_parse_comment(reader);
1553 else if (!reader_cmp(reader, piW))
1554 hr = reader_parse_pi(reader);
1555 else
1556 break;
1558 if (hr != S_FALSE) return hr;
1561 return hr;
1564 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1565 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1567 WCHAR *cur = reader_get_ptr(reader), quote;
1568 UINT start;
1570 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1572 quote = *cur;
1573 reader_skipn(reader, 1);
1575 cur = reader_get_ptr(reader);
1576 start = reader_get_cur(reader);
1577 while (is_char(*cur) && *cur != quote)
1579 reader_skipn(reader, 1);
1580 cur = reader_get_ptr(reader);
1582 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1583 if (*cur == quote) reader_skipn(reader, 1);
1585 TRACE("%s\n", debug_strval(reader, literal));
1586 return S_OK;
1589 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1590 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1591 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1593 WCHAR *cur = reader_get_ptr(reader), quote;
1594 UINT start;
1596 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1598 quote = *cur;
1599 reader_skipn(reader, 1);
1601 start = reader_get_cur(reader);
1602 cur = reader_get_ptr(reader);
1603 while (is_pubchar(*cur) && *cur != quote)
1605 reader_skipn(reader, 1);
1606 cur = reader_get_ptr(reader);
1609 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1610 TRACE("%s\n", debug_strval(reader, literal));
1611 return S_OK;
1614 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1615 static HRESULT reader_parse_externalid(xmlreader *reader)
1617 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1618 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1619 strval name;
1620 HRESULT hr;
1621 int cnt;
1623 if (reader_cmp(reader, systemW))
1625 if (reader_cmp(reader, publicW))
1626 return S_FALSE;
1627 else
1629 strval pub;
1631 /* public id */
1632 reader_skipn(reader, 6);
1633 cnt = reader_skipspaces(reader);
1634 if (!cnt) return WC_E_WHITESPACE;
1636 hr = reader_parse_pub_literal(reader, &pub);
1637 if (FAILED(hr)) return hr;
1639 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1640 return reader_add_attr(reader, &name, &pub);
1643 else
1645 strval sys;
1647 /* system id */
1648 reader_skipn(reader, 6);
1649 cnt = reader_skipspaces(reader);
1650 if (!cnt) return WC_E_WHITESPACE;
1652 hr = reader_parse_sys_literal(reader, &sys);
1653 if (FAILED(hr)) return hr;
1655 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1656 return reader_add_attr(reader, &name, &sys);
1659 return hr;
1662 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1663 static HRESULT reader_parse_dtd(xmlreader *reader)
1665 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1666 strval name;
1667 WCHAR *cur;
1668 HRESULT hr;
1670 /* check if we have "<!DOCTYPE" */
1671 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1672 reader_shrink(reader);
1674 /* DTD processing is not allowed by default */
1675 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1677 reader_skipn(reader, 9);
1678 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1680 /* name */
1681 hr = reader_parse_name(reader, &name);
1682 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1684 reader_skipspaces(reader);
1686 hr = reader_parse_externalid(reader);
1687 if (FAILED(hr)) return hr;
1689 reader_skipspaces(reader);
1691 cur = reader_get_ptr(reader);
1692 if (*cur != '>')
1694 FIXME("internal subset parsing not implemented\n");
1695 return E_NOTIMPL;
1698 /* skip '>' */
1699 reader_skipn(reader, 1);
1701 reader->nodetype = XmlNodeType_DocumentType;
1702 reader_set_strvalue(reader, StringValue_LocalName, &name);
1703 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1705 return S_OK;
1708 /* [11 NS] LocalPart ::= NCName */
1709 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1711 WCHAR *ptr;
1712 UINT start;
1714 if (reader->resume[XmlReadResume_Local])
1716 start = reader->resume[XmlReadResume_Local];
1717 ptr = reader_get_ptr(reader);
1719 else
1721 ptr = reader_get_ptr(reader);
1722 start = reader_get_cur(reader);
1725 while (is_ncnamechar(*ptr))
1727 reader_skipn(reader, 1);
1728 ptr = reader_get_ptr(reader);
1731 if (is_reader_pending(reader))
1733 reader->resume[XmlReadResume_Local] = start;
1734 return E_PENDING;
1736 else
1737 reader->resume[XmlReadResume_Local] = 0;
1739 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1741 return S_OK;
1744 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1745 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1746 [9 NS] UnprefixedName ::= LocalPart
1747 [10 NS] Prefix ::= NCName */
1748 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1750 WCHAR *ptr;
1751 UINT start;
1752 HRESULT hr;
1754 if (reader->resume[XmlReadResume_Name])
1756 start = reader->resume[XmlReadResume_Name];
1757 ptr = reader_get_ptr(reader);
1759 else
1761 ptr = reader_get_ptr(reader);
1762 start = reader_get_cur(reader);
1763 reader->resume[XmlReadResume_Name] = start;
1764 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1767 if (reader->resume[XmlReadResume_Local])
1769 hr = reader_parse_local(reader, local);
1770 if (FAILED(hr)) return hr;
1772 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1773 local->start - reader->resume[XmlReadResume_Name] - 1,
1774 prefix);
1776 else
1778 /* skip prefix part */
1779 while (is_ncnamechar(*ptr))
1781 reader_skipn(reader, 1);
1782 ptr = reader_get_ptr(reader);
1785 if (is_reader_pending(reader)) return E_PENDING;
1787 /* got a qualified name */
1788 if (*ptr == ':')
1790 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1792 /* skip ':' */
1793 reader_skipn(reader, 1);
1794 hr = reader_parse_local(reader, local);
1795 if (FAILED(hr)) return hr;
1797 else
1799 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1800 reader_init_strvalue(0, 0, prefix);
1804 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1806 if (prefix->len)
1807 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1808 else
1809 TRACE("ncname %s\n", debug_strval(reader, local));
1811 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1812 /* count ':' too */
1813 (prefix->len ? prefix->len + 1 : 0) + local->len,
1814 qname);
1816 reader->resume[XmlReadResume_Name] = 0;
1817 reader->resume[XmlReadResume_Local] = 0;
1819 return S_OK;
1822 /* Applies normalization rules to a single char, used for attribute values.
1824 Rules include 2 steps:
1826 1) replacing \r\n with a single \n;
1827 2) replacing all whitespace chars with ' '.
1830 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1832 encoded_buffer *buffer = &reader->input->buffer->utf16;
1834 if (!is_wchar_space(*ptr)) return;
1836 if (*ptr == '\r' && *(ptr+1) == '\n')
1838 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1839 memmove(ptr+1, ptr+2, len);
1841 *ptr = ' ';
1844 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1846 static const WCHAR entltW[] = {'l','t'};
1847 static const WCHAR entgtW[] = {'g','t'};
1848 static const WCHAR entampW[] = {'a','m','p'};
1849 static const WCHAR entaposW[] = {'a','p','o','s'};
1850 static const WCHAR entquotW[] = {'q','u','o','t'};
1851 static const strval lt = { (WCHAR*)entltW, 2 };
1852 static const strval gt = { (WCHAR*)entgtW, 2 };
1853 static const strval amp = { (WCHAR*)entampW, 3 };
1854 static const strval apos = { (WCHAR*)entaposW, 4 };
1855 static const strval quot = { (WCHAR*)entquotW, 4 };
1856 WCHAR *str = reader_get_strptr(reader, name);
1858 switch (*str)
1860 case 'l':
1861 if (strval_eq(reader, name, &lt)) return '<';
1862 break;
1863 case 'g':
1864 if (strval_eq(reader, name, &gt)) return '>';
1865 break;
1866 case 'a':
1867 if (strval_eq(reader, name, &amp))
1868 return '&';
1869 else if (strval_eq(reader, name, &apos))
1870 return '\'';
1871 break;
1872 case 'q':
1873 if (strval_eq(reader, name, &quot)) return '\"';
1874 break;
1875 default:
1879 return 0;
1882 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1883 [67] Reference ::= EntityRef | CharRef
1884 [68] EntityRef ::= '&' Name ';' */
1885 static HRESULT reader_parse_reference(xmlreader *reader)
1887 encoded_buffer *buffer = &reader->input->buffer->utf16;
1888 WCHAR *start = reader_get_ptr(reader), *ptr;
1889 UINT cur = reader_get_cur(reader);
1890 WCHAR ch = 0;
1891 int len;
1893 /* skip '&' */
1894 reader_skipn(reader, 1);
1895 ptr = reader_get_ptr(reader);
1897 if (*ptr == '#')
1899 reader_skipn(reader, 1);
1900 ptr = reader_get_ptr(reader);
1902 /* hex char or decimal */
1903 if (*ptr == 'x')
1905 reader_skipn(reader, 1);
1906 ptr = reader_get_ptr(reader);
1908 while (*ptr != ';')
1910 if ((*ptr >= '0' && *ptr <= '9'))
1911 ch = ch*16 + *ptr - '0';
1912 else if ((*ptr >= 'a' && *ptr <= 'f'))
1913 ch = ch*16 + *ptr - 'a' + 10;
1914 else if ((*ptr >= 'A' && *ptr <= 'F'))
1915 ch = ch*16 + *ptr - 'A' + 10;
1916 else
1917 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
1918 reader_skipn(reader, 1);
1919 ptr = reader_get_ptr(reader);
1922 else
1924 while (*ptr != ';')
1926 if ((*ptr >= '0' && *ptr <= '9'))
1928 ch = ch*10 + *ptr - '0';
1929 reader_skipn(reader, 1);
1930 ptr = reader_get_ptr(reader);
1932 else
1933 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
1937 if (!is_char(ch)) return WC_E_XMLCHARACTER;
1939 /* normalize */
1940 if (is_wchar_space(ch)) ch = ' ';
1942 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1943 memmove(start+1, ptr+1, len);
1944 buffer->cur = cur + 1;
1946 *start = ch;
1948 else
1950 strval name;
1951 HRESULT hr;
1953 hr = reader_parse_name(reader, &name);
1954 if (FAILED(hr)) return hr;
1956 ptr = reader_get_ptr(reader);
1957 if (*ptr != ';') return WC_E_SEMICOLON;
1959 /* predefined entities resolve to a single character */
1960 ch = get_predefined_entity(reader, &name);
1961 if (ch)
1963 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
1964 memmove(start+1, ptr+1, len);
1965 buffer->cur = cur + 1;
1967 *start = ch;
1969 else
1971 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
1972 return WC_E_UNDECLAREDENTITY;
1977 return S_OK;
1980 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1981 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
1983 WCHAR *ptr, quote;
1984 UINT start;
1986 ptr = reader_get_ptr(reader);
1988 /* skip opening quote */
1989 quote = *ptr;
1990 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
1991 reader_skipn(reader, 1);
1993 ptr = reader_get_ptr(reader);
1994 start = reader_get_cur(reader);
1995 while (*ptr)
1997 if (*ptr == '<') return WC_E_LESSTHAN;
1999 if (*ptr == quote)
2001 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2002 /* skip closing quote */
2003 reader_skipn(reader, 1);
2004 return S_OK;
2007 if (*ptr == '&')
2009 HRESULT hr = reader_parse_reference(reader);
2010 if (FAILED(hr)) return hr;
2012 else
2014 reader_normalize_space(reader, ptr);
2015 reader_skipn(reader, 1);
2017 ptr = reader_get_ptr(reader);
2020 return WC_E_QUOTE;
2023 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2024 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2025 [3 NS] DefaultAttName ::= 'xmlns'
2026 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2027 static HRESULT reader_parse_attribute(xmlreader *reader)
2029 static const WCHAR xmlnsW[] = {'x','m','l','n','s',0};
2030 strval prefix, local, qname, xmlns, value;
2031 HRESULT hr;
2033 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2034 if (FAILED(hr)) return hr;
2036 reader_init_cstrvalue((WCHAR*)xmlnsW, 5, &xmlns);
2038 if (strval_eq(reader, &prefix, &xmlns))
2040 FIXME("namespace definitions not supported\n");
2041 return E_NOTIMPL;
2044 if (strval_eq(reader, &qname, &xmlns))
2046 FIXME("default namespace definitions not supported\n");
2047 return E_NOTIMPL;
2050 hr = reader_parse_eq(reader);
2051 if (FAILED(hr)) return hr;
2053 hr = reader_parse_attvalue(reader, &value);
2054 if (FAILED(hr)) return hr;
2056 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2057 return reader_add_attr(reader, &local, &value);
2060 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2061 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2062 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2064 HRESULT hr;
2066 hr = reader_parse_qname(reader, prefix, local, qname);
2067 if (FAILED(hr)) return hr;
2069 while (1)
2071 static const WCHAR endW[] = {'/','>',0};
2073 reader_skipspaces(reader);
2075 /* empty element */
2076 if ((*empty = !reader_cmp(reader, endW)))
2078 /* skip '/>' */
2079 reader_skipn(reader, 2);
2080 reader->empty_element = TRUE;
2081 return S_OK;
2084 /* got a start tag */
2085 if (!reader_cmp(reader, gtW))
2087 /* skip '>' */
2088 reader_skipn(reader, 1);
2089 return reader_push_element(reader, qname, local);
2092 hr = reader_parse_attribute(reader);
2093 if (FAILED(hr)) return hr;
2096 return S_OK;
2099 /* [39] element ::= EmptyElemTag | STag content ETag */
2100 static HRESULT reader_parse_element(xmlreader *reader)
2102 HRESULT hr;
2104 switch (reader->resumestate)
2106 case XmlReadResumeState_Initial:
2107 /* check if we are really on element */
2108 if (reader_cmp(reader, ltW)) return S_FALSE;
2110 /* skip '<' */
2111 reader_skipn(reader, 1);
2113 reader_shrink(reader);
2114 reader->resumestate = XmlReadResumeState_STag;
2115 case XmlReadResumeState_STag:
2117 strval qname, prefix, local;
2118 int empty = 0;
2120 /* this handles empty elements too */
2121 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2122 if (FAILED(hr)) return hr;
2124 /* FIXME: need to check for defined namespace to reject invalid prefix,
2125 currently reject all prefixes */
2126 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
2128 /* if we got empty element and stack is empty go straight to Misc */
2129 if (empty && list_empty(&reader->elements))
2130 reader->instate = XmlReadInState_MiscEnd;
2131 else
2132 reader->instate = XmlReadInState_Content;
2134 reader->nodetype = XmlNodeType_Element;
2135 reader->resumestate = XmlReadResumeState_Initial;
2136 reader_set_strvalue(reader, StringValue_LocalName, &local);
2137 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2138 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2139 break;
2141 default:
2142 hr = E_FAIL;
2145 return hr;
2148 /* [13 NS] ETag ::= '</' QName S? '>' */
2149 static HRESULT reader_parse_endtag(xmlreader *reader)
2151 strval prefix, local, qname;
2152 struct element *elem;
2153 HRESULT hr;
2155 /* skip '</' */
2156 reader_skipn(reader, 2);
2158 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2159 if (FAILED(hr)) return hr;
2161 reader_skipspaces(reader);
2163 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2165 /* skip '>' */
2166 reader_skipn(reader, 1);
2168 /* Element stack should never be empty at this point, cause we shouldn't get to
2169 content parsing if it's empty. */
2170 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2171 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2173 reader_pop_element(reader);
2175 /* It was a root element, the rest is expected as Misc */
2176 if (list_empty(&reader->elements))
2177 reader->instate = XmlReadInState_MiscEnd;
2179 reader->nodetype = XmlNodeType_EndElement;
2180 reader_set_strvalue(reader, StringValue_LocalName, &local);
2181 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2183 return S_OK;
2186 /* [18] CDSect ::= CDStart CData CDEnd
2187 [19] CDStart ::= '<![CDATA['
2188 [20] CData ::= (Char* - (Char* ']]>' Char*))
2189 [21] CDEnd ::= ']]>' */
2190 static HRESULT reader_parse_cdata(xmlreader *reader)
2192 WCHAR *ptr;
2193 UINT start;
2195 if (reader->resumestate == XmlReadResumeState_CDATA)
2197 start = reader->resume[XmlReadResume_Body];
2198 ptr = reader_get_ptr(reader);
2200 else
2202 /* skip markup '<![CDATA[' */
2203 reader_skipn(reader, 9);
2204 reader_shrink(reader);
2205 ptr = reader_get_ptr(reader);
2206 start = reader_get_cur(reader);
2207 reader->nodetype = XmlNodeType_CDATA;
2208 reader->resume[XmlReadResume_Body] = start;
2209 reader->resumestate = XmlReadResumeState_CDATA;
2210 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2211 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2212 reader_set_strvalue(reader, StringValue_Value, NULL);
2215 while (*ptr)
2217 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2219 strval value;
2221 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2223 /* skip ']]>' */
2224 reader_skipn(reader, 3);
2225 TRACE("%s\n", debug_strval(reader, &value));
2227 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2228 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2229 reader_set_strvalue(reader, StringValue_Value, &value);
2230 reader->resume[XmlReadResume_Body] = 0;
2231 reader->resumestate = XmlReadResumeState_Initial;
2232 return S_OK;
2234 else
2236 /* Value normalization is not fully implemented, rules are:
2238 - single '\r' -> '\n';
2239 - sequence '\r\n' -> '\n', in this case value length changes;
2241 if (*ptr == '\r') *ptr = '\n';
2242 reader_skipn(reader, 1);
2243 ptr++;
2247 return S_OK;
2250 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2251 static HRESULT reader_parse_chardata(xmlreader *reader)
2253 WCHAR *ptr;
2254 UINT start;
2256 if (reader->resumestate == XmlReadResumeState_CharData)
2258 start = reader->resume[XmlReadResume_Body];
2259 ptr = reader_get_ptr(reader);
2261 else
2263 reader_shrink(reader);
2264 ptr = reader_get_ptr(reader);
2265 start = reader_get_cur(reader);
2266 /* There's no text */
2267 if (!*ptr || *ptr == '<') return S_OK;
2268 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2269 reader->resume[XmlReadResume_Body] = start;
2270 reader->resumestate = XmlReadResumeState_CharData;
2271 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2272 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2273 reader_set_strvalue(reader, StringValue_Value, NULL);
2276 while (*ptr)
2278 /* CDATA closing sequence ']]>' is not allowed */
2279 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2280 return WC_E_CDSECTEND;
2282 /* Found next markup part */
2283 if (ptr[0] == '<')
2285 strval value;
2287 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2288 reader_set_strvalue(reader, StringValue_Value, &value);
2289 reader->resume[XmlReadResume_Body] = 0;
2290 reader->resumestate = XmlReadResumeState_Initial;
2291 return S_OK;
2294 reader_skipn(reader, 1);
2296 /* this covers a case when text has leading whitespace chars */
2297 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2298 ptr++;
2301 return S_OK;
2304 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2305 static HRESULT reader_parse_content(xmlreader *reader)
2307 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2308 static const WCHAR etagW[] = {'<','/',0};
2309 static const WCHAR ampW[] = {'&',0};
2311 if (reader->resumestate != XmlReadResumeState_Initial)
2313 switch (reader->resumestate)
2315 case XmlReadResumeState_CDATA:
2316 return reader_parse_cdata(reader);
2317 case XmlReadResumeState_Comment:
2318 return reader_parse_comment(reader);
2319 case XmlReadResumeState_PIBody:
2320 case XmlReadResumeState_PITarget:
2321 return reader_parse_pi(reader);
2322 case XmlReadResumeState_CharData:
2323 return reader_parse_chardata(reader);
2324 default:
2325 ERR("unknown resume state %d\n", reader->resumestate);
2329 reader_shrink(reader);
2331 /* handle end tag here, it indicates end of content as well */
2332 if (!reader_cmp(reader, etagW))
2333 return reader_parse_endtag(reader);
2335 if (!reader_cmp(reader, commentW))
2336 return reader_parse_comment(reader);
2338 if (!reader_cmp(reader, piW))
2339 return reader_parse_pi(reader);
2341 if (!reader_cmp(reader, cdstartW))
2342 return reader_parse_cdata(reader);
2344 if (!reader_cmp(reader, ampW))
2345 return reader_parse_reference(reader);
2347 if (!reader_cmp(reader, ltW))
2348 return reader_parse_element(reader);
2350 /* what's left must be CharData */
2351 return reader_parse_chardata(reader);
2354 static HRESULT reader_parse_nextnode(xmlreader *reader)
2356 HRESULT hr;
2358 if (!is_reader_pending(reader))
2359 reader_clear_attrs(reader);
2361 while (1)
2363 switch (reader->instate)
2365 /* if it's a first call for a new input we need to detect stream encoding */
2366 case XmlReadInState_Initial:
2368 xml_encoding enc;
2370 hr = readerinput_growraw(reader->input);
2371 if (FAILED(hr)) return hr;
2373 /* try to detect encoding by BOM or data and set input code page */
2374 hr = readerinput_detectencoding(reader->input, &enc);
2375 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2376 if (FAILED(hr)) return hr;
2378 /* always switch first time cause we have to put something in */
2379 readerinput_switchencoding(reader->input, enc);
2381 /* parse xml declaration */
2382 hr = reader_parse_xmldecl(reader);
2383 if (FAILED(hr)) return hr;
2385 readerinput_shrinkraw(reader->input, -1);
2386 reader->instate = XmlReadInState_Misc_DTD;
2387 if (hr == S_OK) return hr;
2389 break;
2390 case XmlReadInState_Misc_DTD:
2391 hr = reader_parse_misc(reader);
2392 if (FAILED(hr)) return hr;
2394 if (hr == S_FALSE)
2395 reader->instate = XmlReadInState_DTD;
2396 else
2397 return hr;
2398 break;
2399 case XmlReadInState_DTD:
2400 hr = reader_parse_dtd(reader);
2401 if (FAILED(hr)) return hr;
2403 if (hr == S_OK)
2405 reader->instate = XmlReadInState_DTD_Misc;
2406 return hr;
2408 else
2409 reader->instate = XmlReadInState_Element;
2410 break;
2411 case XmlReadInState_DTD_Misc:
2412 hr = reader_parse_misc(reader);
2413 if (FAILED(hr)) return hr;
2415 if (hr == S_FALSE)
2416 reader->instate = XmlReadInState_Element;
2417 else
2418 return hr;
2419 break;
2420 case XmlReadInState_Element:
2421 return reader_parse_element(reader);
2422 case XmlReadInState_Content:
2423 return reader_parse_content(reader);
2424 case XmlReadInState_MiscEnd:
2425 hr = reader_parse_misc(reader);
2426 if (FAILED(hr)) return hr;
2428 if (hr == S_FALSE)
2429 reader->instate = XmlReadInState_Eof;
2430 return hr;
2431 case XmlReadInState_Eof:
2432 return S_FALSE;
2433 default:
2434 FIXME("internal state %d not handled\n", reader->instate);
2435 return E_NOTIMPL;
2439 return E_NOTIMPL;
2442 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2444 xmlreader *This = impl_from_IXmlReader(iface);
2446 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2448 if (IsEqualGUID(riid, &IID_IUnknown) ||
2449 IsEqualGUID(riid, &IID_IXmlReader))
2451 *ppvObject = iface;
2453 else
2455 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2456 *ppvObject = NULL;
2457 return E_NOINTERFACE;
2460 IXmlReader_AddRef(iface);
2462 return S_OK;
2465 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2467 xmlreader *This = impl_from_IXmlReader(iface);
2468 ULONG ref = InterlockedIncrement(&This->ref);
2469 TRACE("(%p)->(%d)\n", This, ref);
2470 return ref;
2473 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2475 xmlreader *This = impl_from_IXmlReader(iface);
2476 LONG ref = InterlockedDecrement(&This->ref);
2478 TRACE("(%p)->(%d)\n", This, ref);
2480 if (ref == 0)
2482 IMalloc *imalloc = This->imalloc;
2483 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2484 reader_clear_attrs(This);
2485 reader_clear_elements(This);
2486 reader_free_strvalues(This);
2487 reader_free(This, This);
2488 if (imalloc) IMalloc_Release(imalloc);
2491 return ref;
2494 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2496 xmlreader *This = impl_from_IXmlReader(iface);
2497 IXmlReaderInput *readerinput;
2498 HRESULT hr;
2500 TRACE("(%p)->(%p)\n", This, input);
2502 if (This->input)
2504 readerinput_release_stream(This->input);
2505 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2506 This->input = NULL;
2509 This->line = This->pos = 0;
2510 reader_clear_elements(This);
2511 This->depth = 0;
2512 This->resumestate = XmlReadResumeState_Initial;
2513 memset(This->resume, 0, sizeof(This->resume));
2515 /* just reset current input */
2516 if (!input)
2518 This->state = XmlReadState_Initial;
2519 return S_OK;
2522 /* now try IXmlReaderInput, ISequentialStream, IStream */
2523 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2524 if (hr == S_OK)
2526 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2527 This->input = impl_from_IXmlReaderInput(readerinput);
2528 else
2530 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2531 readerinput, readerinput->lpVtbl);
2532 IUnknown_Release(readerinput);
2533 return E_FAIL;
2538 if (hr != S_OK || !readerinput)
2540 /* create IXmlReaderInput basing on supplied interface */
2541 hr = CreateXmlReaderInputWithEncodingName(input,
2542 NULL, NULL, FALSE, NULL, &readerinput);
2543 if (hr != S_OK) return hr;
2544 This->input = impl_from_IXmlReaderInput(readerinput);
2547 /* set stream for supplied IXmlReaderInput */
2548 hr = readerinput_query_for_stream(This->input);
2549 if (hr == S_OK)
2551 This->state = XmlReadState_Initial;
2552 This->instate = XmlReadInState_Initial;
2555 return hr;
2558 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2560 xmlreader *This = impl_from_IXmlReader(iface);
2562 TRACE("(%p)->(%s %p)\n", This, debugstr_prop(property), value);
2564 if (!value) return E_INVALIDARG;
2566 switch (property)
2568 case XmlReaderProperty_DtdProcessing:
2569 *value = This->dtdmode;
2570 break;
2571 case XmlReaderProperty_ReadState:
2572 *value = This->state;
2573 break;
2574 default:
2575 FIXME("Unimplemented property (%u)\n", property);
2576 return E_NOTIMPL;
2579 return S_OK;
2582 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2584 xmlreader *This = impl_from_IXmlReader(iface);
2586 TRACE("(%p)->(%s %lu)\n", This, debugstr_prop(property), value);
2588 switch (property)
2590 case XmlReaderProperty_DtdProcessing:
2591 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2592 This->dtdmode = value;
2593 break;
2594 default:
2595 FIXME("Unimplemented property (%u)\n", property);
2596 return E_NOTIMPL;
2599 return S_OK;
2602 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2604 xmlreader *This = impl_from_IXmlReader(iface);
2605 XmlNodeType oldtype = This->nodetype;
2606 HRESULT hr;
2608 TRACE("(%p)->(%p)\n", This, nodetype);
2610 if (This->state == XmlReadState_Closed) return S_FALSE;
2612 hr = reader_parse_nextnode(This);
2613 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2614 This->state = XmlReadState_Interactive;
2615 if (hr == S_OK)
2617 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2618 *nodetype = This->nodetype;
2621 return hr;
2624 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2626 xmlreader *This = impl_from_IXmlReader(iface);
2627 TRACE("(%p)->(%p)\n", This, node_type);
2629 *node_type = reader_get_nodetype(This);
2630 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2633 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2635 xmlreader *This = impl_from_IXmlReader(iface);
2637 TRACE("(%p)\n", This);
2639 if (!This->attr_count) return S_FALSE;
2640 This->attr = LIST_ENTRY(list_head(&This->attrs), struct attribute, entry);
2641 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2642 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2644 return S_OK;
2647 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2649 xmlreader *This = impl_from_IXmlReader(iface);
2650 const struct list *next;
2652 TRACE("(%p)\n", This);
2654 if (!This->attr_count) return S_FALSE;
2656 if (!This->attr)
2657 return IXmlReader_MoveToFirstAttribute(iface);
2659 next = list_next(&This->attrs, &This->attr->entry);
2660 if (next)
2662 This->attr = LIST_ENTRY(next, struct attribute, entry);
2663 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2664 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2667 return next ? S_OK : S_FALSE;
2670 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2671 LPCWSTR local_name,
2672 LPCWSTR namespaceUri)
2674 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2675 return E_NOTIMPL;
2678 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2680 xmlreader *This = impl_from_IXmlReader(iface);
2681 struct element *elem;
2683 TRACE("(%p)\n", This);
2685 if (!This->attr_count) return S_FALSE;
2686 This->attr = NULL;
2688 /* FIXME: support other node types with 'attributes' like DTD */
2689 elem = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2690 if (elem)
2692 reader_set_strvalue(This, StringValue_QualifiedName, &elem->qname);
2693 reader_set_strvalue(This, StringValue_LocalName, &elem->localname);
2696 return S_OK;
2699 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2701 xmlreader *This = impl_from_IXmlReader(iface);
2703 TRACE("(%p)->(%p %p)\n", This, name, len);
2704 *name = This->strvalues[StringValue_QualifiedName].str;
2705 *len = This->strvalues[StringValue_QualifiedName].len;
2706 return S_OK;
2709 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface,
2710 LPCWSTR *namespaceUri,
2711 UINT *namespaceUri_length)
2713 FIXME("(%p %p %p): stub\n", iface, namespaceUri, namespaceUri_length);
2714 return E_NOTIMPL;
2717 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2719 xmlreader *This = impl_from_IXmlReader(iface);
2721 TRACE("(%p)->(%p %p)\n", This, name, len);
2722 *name = This->strvalues[StringValue_LocalName].str;
2723 if (len) *len = This->strvalues[StringValue_LocalName].len;
2724 return S_OK;
2727 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2729 xmlreader *This = impl_from_IXmlReader(iface);
2731 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2732 *prefix = This->strvalues[StringValue_Prefix].str;
2733 if (len) *len = This->strvalues[StringValue_Prefix].len;
2734 return S_OK;
2737 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
2739 xmlreader *reader = impl_from_IXmlReader(iface);
2740 strval *val = &reader->strvalues[StringValue_Value];
2742 TRACE("(%p)->(%p %p)\n", reader, value, len);
2744 *value = NULL;
2746 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
2748 XmlNodeType type;
2749 HRESULT hr;
2751 hr = IXmlReader_Read(iface, &type);
2752 if (FAILED(hr)) return hr;
2754 /* return if still pending, partially read values are not reported */
2755 if (is_reader_pending(reader)) return E_PENDING;
2758 if (!val->str)
2760 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
2761 if (!ptr) return E_OUTOFMEMORY;
2762 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
2763 ptr[val->len] = 0;
2764 val->str = ptr;
2767 *value = val->str;
2768 if (len) *len = val->len;
2769 return S_OK;
2772 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
2774 xmlreader *reader = impl_from_IXmlReader(iface);
2775 strval *val = &reader->strvalues[StringValue_Value];
2776 UINT len;
2778 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
2780 /* Value is already allocated, chunked reads are not possible. */
2781 if (val->str) return S_FALSE;
2783 if (val->len)
2785 len = min(chunk_size, val->len);
2786 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
2787 val->start += len;
2788 val->len -= len;
2789 if (read) *read = len;
2792 return S_OK;
2795 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
2796 LPCWSTR *baseUri,
2797 UINT *baseUri_length)
2799 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
2800 return E_NOTIMPL;
2803 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
2805 FIXME("(%p): stub\n", iface);
2806 return FALSE;
2809 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
2811 xmlreader *This = impl_from_IXmlReader(iface);
2812 TRACE("(%p)\n", This);
2813 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2814 when current node is start tag of an element */
2815 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->empty_element : FALSE;
2818 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
2820 xmlreader *This = impl_from_IXmlReader(iface);
2822 TRACE("(%p %p)\n", This, lineNumber);
2824 if (!lineNumber) return E_INVALIDARG;
2826 *lineNumber = This->line;
2828 return S_OK;
2831 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
2833 xmlreader *This = impl_from_IXmlReader(iface);
2835 TRACE("(%p %p)\n", This, linePosition);
2837 if (!linePosition) return E_INVALIDARG;
2839 *linePosition = This->pos;
2841 return S_OK;
2844 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
2846 xmlreader *This = impl_from_IXmlReader(iface);
2848 TRACE("(%p)->(%p)\n", This, count);
2850 if (!count) return E_INVALIDARG;
2852 *count = This->attr_count;
2853 return S_OK;
2856 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
2858 xmlreader *This = impl_from_IXmlReader(iface);
2859 TRACE("(%p)->(%p)\n", This, depth);
2860 *depth = This->depth;
2861 return S_OK;
2864 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
2866 FIXME("(%p): stub\n", iface);
2867 return E_NOTIMPL;
2870 static const struct IXmlReaderVtbl xmlreader_vtbl =
2872 xmlreader_QueryInterface,
2873 xmlreader_AddRef,
2874 xmlreader_Release,
2875 xmlreader_SetInput,
2876 xmlreader_GetProperty,
2877 xmlreader_SetProperty,
2878 xmlreader_Read,
2879 xmlreader_GetNodeType,
2880 xmlreader_MoveToFirstAttribute,
2881 xmlreader_MoveToNextAttribute,
2882 xmlreader_MoveToAttributeByName,
2883 xmlreader_MoveToElement,
2884 xmlreader_GetQualifiedName,
2885 xmlreader_GetNamespaceUri,
2886 xmlreader_GetLocalName,
2887 xmlreader_GetPrefix,
2888 xmlreader_GetValue,
2889 xmlreader_ReadValueChunk,
2890 xmlreader_GetBaseUri,
2891 xmlreader_IsDefault,
2892 xmlreader_IsEmptyElement,
2893 xmlreader_GetLineNumber,
2894 xmlreader_GetLinePosition,
2895 xmlreader_GetAttributeCount,
2896 xmlreader_GetDepth,
2897 xmlreader_IsEOF
2900 /** IXmlReaderInput **/
2901 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
2903 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2905 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2907 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
2908 IsEqualGUID(riid, &IID_IUnknown))
2910 *ppvObject = iface;
2912 else
2914 WARN("interface %s not implemented\n", debugstr_guid(riid));
2915 *ppvObject = NULL;
2916 return E_NOINTERFACE;
2919 IUnknown_AddRef(iface);
2921 return S_OK;
2924 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
2926 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2927 ULONG ref = InterlockedIncrement(&This->ref);
2928 TRACE("(%p)->(%d)\n", This, ref);
2929 return ref;
2932 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
2934 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2935 LONG ref = InterlockedDecrement(&This->ref);
2937 TRACE("(%p)->(%d)\n", This, ref);
2939 if (ref == 0)
2941 IMalloc *imalloc = This->imalloc;
2942 if (This->input) IUnknown_Release(This->input);
2943 if (This->stream) ISequentialStream_Release(This->stream);
2944 if (This->buffer) free_input_buffer(This->buffer);
2945 readerinput_free(This, This->baseuri);
2946 readerinput_free(This, This);
2947 if (imalloc) IMalloc_Release(imalloc);
2950 return ref;
2953 static const struct IUnknownVtbl xmlreaderinputvtbl =
2955 xmlreaderinput_QueryInterface,
2956 xmlreaderinput_AddRef,
2957 xmlreaderinput_Release
2960 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
2962 xmlreader *reader;
2963 int i;
2965 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
2967 if (!IsEqualGUID(riid, &IID_IXmlReader))
2969 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
2970 return E_FAIL;
2973 if (imalloc)
2974 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
2975 else
2976 reader = heap_alloc(sizeof(*reader));
2977 if(!reader) return E_OUTOFMEMORY;
2979 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
2980 reader->ref = 1;
2981 reader->input = NULL;
2982 reader->state = XmlReadState_Closed;
2983 reader->instate = XmlReadInState_Initial;
2984 reader->resumestate = XmlReadResumeState_Initial;
2985 reader->dtdmode = DtdProcessing_Prohibit;
2986 reader->line = reader->pos = 0;
2987 reader->imalloc = imalloc;
2988 if (imalloc) IMalloc_AddRef(imalloc);
2989 reader->nodetype = XmlNodeType_None;
2990 list_init(&reader->attrs);
2991 reader->attr_count = 0;
2992 reader->attr = NULL;
2993 list_init(&reader->elements);
2994 reader->depth = 0;
2995 reader->max_depth = 256;
2996 reader->empty_element = FALSE;
2997 memset(reader->resume, 0, sizeof(reader->resume));
2999 for (i = 0; i < StringValue_Last; i++)
3000 reader->strvalues[i] = strval_empty;
3002 *obj = &reader->IXmlReader_iface;
3004 TRACE("returning iface %p\n", *obj);
3006 return S_OK;
3009 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3010 IMalloc *imalloc,
3011 LPCWSTR encoding,
3012 BOOL hint,
3013 LPCWSTR base_uri,
3014 IXmlReaderInput **ppInput)
3016 xmlreaderinput *readerinput;
3017 HRESULT hr;
3019 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3020 hint, wine_dbgstr_w(base_uri), ppInput);
3022 if (!stream || !ppInput) return E_INVALIDARG;
3024 if (imalloc)
3025 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3026 else
3027 readerinput = heap_alloc(sizeof(*readerinput));
3028 if(!readerinput) return E_OUTOFMEMORY;
3030 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3031 readerinput->ref = 1;
3032 readerinput->imalloc = imalloc;
3033 readerinput->stream = NULL;
3034 if (imalloc) IMalloc_AddRef(imalloc);
3035 readerinput->encoding = parse_encoding_name(encoding, -1);
3036 readerinput->hint = hint;
3037 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3038 readerinput->pending = 0;
3040 hr = alloc_input_buffer(readerinput);
3041 if (hr != S_OK)
3043 readerinput_free(readerinput, readerinput->baseuri);
3044 readerinput_free(readerinput, readerinput);
3045 if (imalloc) IMalloc_Release(imalloc);
3046 return hr;
3048 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3050 *ppInput = &readerinput->IXmlReaderInput_iface;
3052 TRACE("returning iface %p\n", *ppInput);
3054 return S_OK;