xmllite: Handle nesting depth limit better.
[wine/wine-gecko.git] / dlls / xmllite / reader.c
blobb556d7daa50d4772fe2dd2e30aed40fc5244b4be
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlEncoding_UTF16,
44 XmlEncoding_UTF8,
45 XmlEncoding_Unknown
46 } xml_encoding;
48 typedef enum
50 XmlReadInState_Initial,
51 XmlReadInState_XmlDecl,
52 XmlReadInState_Misc_DTD,
53 XmlReadInState_DTD,
54 XmlReadInState_DTD_Misc,
55 XmlReadInState_Element,
56 XmlReadInState_Content,
57 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
58 XmlReadInState_Eof
59 } XmlReaderInternalState;
61 /* This state denotes where parsing was interrupted by input problem.
62 Reader resumes parsing using this information. */
63 typedef enum
65 XmlReadResumeState_Initial,
66 XmlReadResumeState_PITarget,
67 XmlReadResumeState_PIBody,
68 XmlReadResumeState_CDATA,
69 XmlReadResumeState_Comment,
70 XmlReadResumeState_STag,
71 XmlReadResumeState_CharData
72 } XmlReaderResumeState;
74 /* saved pointer index to resume from particular input position */
75 typedef enum
77 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
78 XmlReadResume_Local, /* local for QName */
79 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
80 XmlReadResume_Last
81 } XmlReaderResume;
83 typedef enum
85 StringValue_LocalName,
86 StringValue_Prefix,
87 StringValue_QualifiedName,
88 StringValue_Value,
89 StringValue_Last
90 } XmlReaderStringValue;
92 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
93 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
95 static const WCHAR dblquoteW[] = {'\"',0};
96 static const WCHAR quoteW[] = {'\'',0};
97 static const WCHAR ltW[] = {'<',0};
98 static const WCHAR gtW[] = {'>',0};
99 static const WCHAR commentW[] = {'<','!','-','-',0};
100 static const WCHAR piW[] = {'<','?',0};
102 static const char *debugstr_nodetype(XmlNodeType nodetype)
104 static const char* type_names[] =
106 "None",
107 "Element",
108 "Attribute",
109 "Text",
110 "CDATA",
113 "ProcessingInstruction",
114 "Comment",
116 "DocumentType",
119 "Whitespace",
121 "EndElement",
123 "XmlDeclaration"
126 if (nodetype > _XmlNodeType_Last)
128 static char buf[25];
129 sprintf(buf, "unknown type=%d", nodetype);
130 return buf;
132 return type_names[nodetype];
135 static const char *debugstr_prop(XmlReaderProperty prop)
137 static const char* prop_names[] =
139 "MultiLanguage",
140 "ConformanceLevel",
141 "RandomAccess",
142 "XmlResolver",
143 "DtdProcessing",
144 "ReadState",
145 "MaxElementDepth",
146 "MaxEntityExpansion"
149 if (prop > _XmlReaderProperty_Last)
151 static char buf[25];
152 sprintf(buf, "unknown property=%d", prop);
153 return buf;
155 return prop_names[prop];
158 struct xml_encoding_data
160 const WCHAR *name;
161 xml_encoding enc;
162 UINT cp;
165 static const struct xml_encoding_data xml_encoding_map[] = {
166 { utf16W, XmlEncoding_UTF16, ~0 },
167 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
170 typedef struct
172 char *data;
173 char *cur;
174 unsigned int allocated;
175 unsigned int written;
176 } encoded_buffer;
178 typedef struct input_buffer input_buffer;
180 typedef struct
182 IXmlReaderInput IXmlReaderInput_iface;
183 LONG ref;
184 /* reference passed on IXmlReaderInput creation, is kept when input is created */
185 IUnknown *input;
186 IMalloc *imalloc;
187 xml_encoding encoding;
188 BOOL hint;
189 WCHAR *baseuri;
190 /* stream reference set after SetInput() call from reader,
191 stored as sequential stream, cause currently
192 optimizations possible with IStream aren't implemented */
193 ISequentialStream *stream;
194 input_buffer *buffer;
195 unsigned int pending : 1;
196 } xmlreaderinput;
198 static const struct IUnknownVtbl xmlreaderinputvtbl;
200 /* Structure to hold parsed string of specific length.
202 Reader stores node value as 'start' pointer, on request
203 a null-terminated version of it is allocated.
205 To init a strval variable use reader_init_strval(),
206 to set strval as a reader value use reader_set_strval().
208 typedef struct
210 WCHAR *start; /* input position where value starts */
211 UINT len; /* length in WCHARs, altered after ReadValueChunk */
212 WCHAR *str; /* allocated null-terminated string */
213 } strval;
215 static WCHAR emptyW[] = {0};
216 static const strval strval_empty = {emptyW, 0, emptyW};
218 struct attribute
220 struct list entry;
221 strval localname;
222 strval value;
225 struct element
227 struct list entry;
228 strval qname;
231 typedef struct
233 IXmlReader IXmlReader_iface;
234 LONG ref;
235 xmlreaderinput *input;
236 IMalloc *imalloc;
237 XmlReadState state;
238 XmlReaderInternalState instate;
239 XmlReaderResumeState resumestate;
240 XmlNodeType nodetype;
241 DtdProcessing dtdmode;
242 UINT line, pos; /* reader position in XML stream */
243 struct list attrs; /* attributes list for current node */
244 struct attribute *attr; /* current attribute */
245 UINT attr_count;
246 struct list elements;
247 strval strvalues[StringValue_Last];
248 UINT depth;
249 UINT max_depth;
250 BOOL empty_element;
251 WCHAR *resume[XmlReadResume_Last]; /* pointers used to resume reader */
252 } xmlreader;
254 struct input_buffer
256 encoded_buffer utf16;
257 encoded_buffer encoded;
258 UINT code_page;
259 xmlreaderinput *input;
262 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
264 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
267 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
269 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
272 static inline void *m_alloc(IMalloc *imalloc, size_t len)
274 if (imalloc)
275 return IMalloc_Alloc(imalloc, len);
276 else
277 return heap_alloc(len);
280 static inline void *m_realloc(IMalloc *imalloc, void *mem, size_t len)
282 if (imalloc)
283 return IMalloc_Realloc(imalloc, mem, len);
284 else
285 return heap_realloc(mem, len);
288 static inline void m_free(IMalloc *imalloc, void *mem)
290 if (imalloc)
291 IMalloc_Free(imalloc, mem);
292 else
293 heap_free(mem);
296 /* reader memory allocation functions */
297 static inline void *reader_alloc(xmlreader *reader, size_t len)
299 return m_alloc(reader->imalloc, len);
302 static inline void reader_free(xmlreader *reader, void *mem)
304 m_free(reader->imalloc, mem);
307 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
309 *dest = *src;
311 if (src->str != strval_empty.str)
313 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
314 if (!dest->str) return E_OUTOFMEMORY;
315 memcpy(dest->str, src->str, dest->len*sizeof(WCHAR));
316 dest->str[dest->len] = 0;
319 return S_OK;
322 /* reader input memory allocation functions */
323 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
325 return m_alloc(input->imalloc, len);
328 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
330 return m_realloc(input->imalloc, mem, len);
333 static inline void readerinput_free(xmlreaderinput *input, void *mem)
335 m_free(input->imalloc, mem);
338 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
340 LPWSTR ret = NULL;
342 if(str) {
343 DWORD size;
345 size = (strlenW(str)+1)*sizeof(WCHAR);
346 ret = readerinput_alloc(input, size);
347 if (ret) memcpy(ret, str, size);
350 return ret;
353 static void reader_clear_attrs(xmlreader *reader)
355 struct attribute *attr, *attr2;
356 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
358 reader_free(reader, attr);
360 list_init(&reader->attrs);
361 reader->attr_count = 0;
364 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
365 while we are on a node with attributes */
366 static HRESULT reader_add_attr(xmlreader *reader, strval *localname, strval *value)
368 struct attribute *attr;
370 attr = reader_alloc(reader, sizeof(*attr));
371 if (!attr) return E_OUTOFMEMORY;
373 attr->localname = *localname;
374 attr->value = *value;
375 list_add_tail(&reader->attrs, &attr->entry);
376 reader->attr_count++;
378 return S_OK;
381 /* This one frees stored string value if needed */
382 static void reader_free_strvalued(xmlreader *reader, strval *v)
384 if (v->str != strval_empty.str)
386 reader_free(reader, v->str);
387 *v = strval_empty;
391 static inline void reader_init_strvalue(WCHAR *str, UINT len, strval *v)
393 v->start = v->str = str;
394 v->len = len;
397 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
399 reader_free_strvalued(reader, &reader->strvalues[type]);
402 static void reader_free_strvalues(xmlreader *reader)
404 int type;
405 for (type = 0; type < StringValue_Last; type++)
406 reader_free_strvalue(reader, type);
409 /* This helper should only be used to test if strings are the same,
410 it doesn't try to sort. */
411 static inline int strval_eq(const strval *str1, const strval *str2)
413 if (str1->len != str2->len) return 0;
414 return !memcmp(str1->str, str2->str, str1->len*sizeof(WCHAR));
417 static void reader_clear_elements(xmlreader *reader)
419 struct element *elem, *elem2;
420 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
422 reader_free_strvalued(reader, &elem->qname);
423 reader_free(reader, elem);
425 list_init(&reader->elements);
426 reader->empty_element = FALSE;
429 static HRESULT reader_inc_depth(xmlreader *reader)
431 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
432 return S_OK;
435 static void reader_dec_depth(xmlreader *reader)
437 if (reader->depth > 1) reader->depth--;
440 static HRESULT reader_push_element(xmlreader *reader, strval *qname)
442 struct element *elem;
443 HRESULT hr;
445 elem = reader_alloc(reader, sizeof(*elem));
446 if (!elem) return E_OUTOFMEMORY;
448 hr = reader_strvaldup(reader, qname, &elem->qname);
449 if (FAILED(hr)) {
450 reader_free(reader, elem);
451 return hr;
454 if (!list_empty(&reader->elements))
456 hr = reader_inc_depth(reader);
457 if (FAILED(hr)) {
458 reader_free(reader, elem);
459 return hr;
463 list_add_head(&reader->elements, &elem->entry);
464 reader->empty_element = FALSE;
465 return hr;
468 static void reader_pop_element(xmlreader *reader)
470 struct element *elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
472 if (elem)
474 list_remove(&elem->entry);
475 reader_free_strvalued(reader, &elem->qname);
476 reader_free(reader, elem);
477 reader_dec_depth(reader);
481 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
482 means node value is to be determined. */
483 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
485 strval *v = &reader->strvalues[type];
487 reader_free_strvalue(reader, type);
488 if (!value)
490 v->str = NULL;
491 v->start = NULL;
492 v->len = 0;
493 return;
496 if (value->str == strval_empty.str)
497 *v = *value;
498 else
500 if (type == StringValue_Value)
502 /* defer allocation for value string */
503 v->str = NULL;
504 v->start = value->start;
505 v->len = value->len;
507 else
509 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
510 memcpy(v->str, value->start, value->len*sizeof(WCHAR));
511 v->str[value->len] = 0;
512 v->len = value->len;
517 static inline int is_reader_pending(xmlreader *reader)
519 return reader->input->pending;
522 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
524 const int initial_len = 0x2000;
525 buffer->data = readerinput_alloc(input, initial_len);
526 if (!buffer->data) return E_OUTOFMEMORY;
528 memset(buffer->data, 0, 4);
529 buffer->cur = buffer->data;
530 buffer->allocated = initial_len;
531 buffer->written = 0;
533 return S_OK;
536 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
538 readerinput_free(input, buffer->data);
541 static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
543 if (encoding == XmlEncoding_Unknown)
545 FIXME("unsupported encoding %d\n", encoding);
546 return E_NOTIMPL;
549 *cp = xml_encoding_map[encoding].cp;
551 return S_OK;
554 static xml_encoding parse_encoding_name(const WCHAR *name, int len)
556 int min, max, n, c;
558 if (!name) return XmlEncoding_Unknown;
560 min = 0;
561 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
563 while (min <= max)
565 n = (min+max)/2;
567 if (len != -1)
568 c = strncmpiW(xml_encoding_map[n].name, name, len);
569 else
570 c = strcmpiW(xml_encoding_map[n].name, name);
571 if (!c)
572 return xml_encoding_map[n].enc;
574 if (c > 0)
575 max = n-1;
576 else
577 min = n+1;
580 return XmlEncoding_Unknown;
583 static HRESULT alloc_input_buffer(xmlreaderinput *input)
585 input_buffer *buffer;
586 HRESULT hr;
588 input->buffer = NULL;
590 buffer = readerinput_alloc(input, sizeof(*buffer));
591 if (!buffer) return E_OUTOFMEMORY;
593 buffer->input = input;
594 buffer->code_page = ~0; /* code page is unknown at this point */
595 hr = init_encoded_buffer(input, &buffer->utf16);
596 if (hr != S_OK) {
597 readerinput_free(input, buffer);
598 return hr;
601 hr = init_encoded_buffer(input, &buffer->encoded);
602 if (hr != S_OK) {
603 free_encoded_buffer(input, &buffer->utf16);
604 readerinput_free(input, buffer);
605 return hr;
608 input->buffer = buffer;
609 return S_OK;
612 static void free_input_buffer(input_buffer *buffer)
614 free_encoded_buffer(buffer->input, &buffer->encoded);
615 free_encoded_buffer(buffer->input, &buffer->utf16);
616 readerinput_free(buffer->input, buffer);
619 static void readerinput_release_stream(xmlreaderinput *readerinput)
621 if (readerinput->stream) {
622 ISequentialStream_Release(readerinput->stream);
623 readerinput->stream = NULL;
627 /* Queries already stored interface for IStream/ISequentialStream.
628 Interface supplied on creation will be overwritten */
629 static HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
631 HRESULT hr;
633 readerinput_release_stream(readerinput);
634 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
635 if (hr != S_OK)
636 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
638 return hr;
641 /* reads a chunk to raw buffer */
642 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
644 encoded_buffer *buffer = &readerinput->buffer->encoded;
645 /* to make sure aligned length won't exceed allocated length */
646 ULONG len = buffer->allocated - buffer->written - 4;
647 ULONG read;
648 HRESULT hr;
650 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
651 variable width encodings like UTF-8 */
652 len = (len + 3) & ~3;
653 /* try to use allocated space or grow */
654 if (buffer->allocated - buffer->written < len)
656 buffer->allocated *= 2;
657 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
658 len = buffer->allocated - buffer->written;
661 read = 0;
662 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
663 TRACE("requested %d, read %d, ret 0x%08x\n", len, read, hr);
664 readerinput->pending = hr == E_PENDING;
665 if (FAILED(hr)) return hr;
666 buffer->written += read;
668 return hr;
671 /* grows UTF-16 buffer so it has at least 'length' bytes free on return */
672 static void readerinput_grow(xmlreaderinput *readerinput, int length)
674 encoded_buffer *buffer = &readerinput->buffer->utf16;
676 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
677 if (buffer->allocated < buffer->written + length + 4)
679 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
680 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
681 buffer->allocated = grown_size;
685 static inline int readerinput_is_utf8(xmlreaderinput *readerinput)
687 static char startA[] = {'<','?'};
688 static char commentA[] = {'<','!'};
689 encoded_buffer *buffer = &readerinput->buffer->encoded;
690 unsigned char *ptr = (unsigned char*)buffer->data;
692 return !memcmp(buffer->data, startA, sizeof(startA)) ||
693 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
694 /* test start byte */
695 (ptr[0] == '<' &&
697 (ptr[1] && (ptr[1] <= 0x7f)) ||
698 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
699 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
700 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
704 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
706 encoded_buffer *buffer = &readerinput->buffer->encoded;
707 static WCHAR startW[] = {'<','?'};
708 static WCHAR commentW[] = {'<','!'};
709 static char utf8bom[] = {0xef,0xbb,0xbf};
710 static char utf16lebom[] = {0xff,0xfe};
712 *enc = XmlEncoding_Unknown;
714 if (buffer->written <= 3)
716 HRESULT hr = readerinput_growraw(readerinput);
717 if (FAILED(hr)) return hr;
718 if (buffer->written <= 3) return MX_E_INPUTEND;
721 /* try start symbols if we have enough data to do that, input buffer should contain
722 first chunk already */
723 if (readerinput_is_utf8(readerinput))
724 *enc = XmlEncoding_UTF8;
725 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
726 !memcmp(buffer->data, commentW, sizeof(commentW)))
727 *enc = XmlEncoding_UTF16;
728 /* try with BOM now */
729 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
731 buffer->cur += sizeof(utf8bom);
732 *enc = XmlEncoding_UTF8;
734 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
736 buffer->cur += sizeof(utf16lebom);
737 *enc = XmlEncoding_UTF16;
740 return S_OK;
743 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
745 encoded_buffer *buffer = &readerinput->buffer->encoded;
746 int len = buffer->written;
748 /* complete single byte char */
749 if (!(buffer->data[len-1] & 0x80)) return len;
751 /* find start byte of multibyte char */
752 while (--len && !(buffer->data[len] & 0xc0))
755 return len;
758 /* Returns byte length of complete char sequence for buffer code page,
759 it's relative to current buffer position which is currently used for BOM handling
760 only. */
761 static int readerinput_get_convlen(xmlreaderinput *readerinput)
763 encoded_buffer *buffer = &readerinput->buffer->encoded;
764 int len;
766 if (readerinput->buffer->code_page == CP_UTF8)
767 len = readerinput_get_utf8_convlen(readerinput);
768 else
769 len = buffer->written;
771 TRACE("%d\n", len - (int)(buffer->cur - buffer->data));
772 return len - (buffer->cur - buffer->data);
775 /* It's possible that raw buffer has some leftovers from last conversion - some char
776 sequence that doesn't represent a full code point. Length argument should be calculated with
777 readerinput_get_convlen(), if it's -1 it will be calculated here. */
778 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
780 encoded_buffer *buffer = &readerinput->buffer->encoded;
782 if (len == -1)
783 len = readerinput_get_convlen(readerinput);
785 memmove(buffer->data, buffer->cur + (buffer->written - len), len);
786 /* everything below cur is lost too */
787 buffer->written -= len + (buffer->cur - buffer->data);
788 /* after this point we don't need cur pointer really,
789 it's used only to mark where actual data begins when first chunk is read */
790 buffer->cur = buffer->data;
793 /* note that raw buffer content is kept */
794 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
796 encoded_buffer *src = &readerinput->buffer->encoded;
797 encoded_buffer *dest = &readerinput->buffer->utf16;
798 int len, dest_len;
799 HRESULT hr;
800 WCHAR *ptr;
801 UINT cp;
803 hr = get_code_page(enc, &cp);
804 if (FAILED(hr)) return;
806 readerinput->buffer->code_page = cp;
807 len = readerinput_get_convlen(readerinput);
809 TRACE("switching to cp %d\n", cp);
811 /* just copy in this case */
812 if (enc == XmlEncoding_UTF16)
814 readerinput_grow(readerinput, len);
815 memcpy(dest->data, src->cur, len);
816 dest->written += len*sizeof(WCHAR);
817 return;
820 dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0);
821 readerinput_grow(readerinput, dest_len);
822 ptr = (WCHAR*)dest->data;
823 MultiByteToWideChar(cp, 0, src->cur, len, ptr, dest_len);
824 ptr[dest_len] = 0;
825 dest->written += dest_len*sizeof(WCHAR);
828 /* shrinks parsed data a buffer begins with */
829 static void reader_shrink(xmlreader *reader)
831 encoded_buffer *buffer = &reader->input->buffer->utf16;
833 /* avoid to move too often using threshold shrink length */
834 if (buffer->cur - buffer->data > buffer->written / 2)
836 buffer->written -= buffer->cur - buffer->data;
837 memmove(buffer->data, buffer->cur, buffer->written);
838 buffer->cur = buffer->data;
839 *(WCHAR*)&buffer->cur[buffer->written] = 0;
843 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
844 It won't attempt to shrink but will grow destination buffer if needed */
845 static HRESULT reader_more(xmlreader *reader)
847 xmlreaderinput *readerinput = reader->input;
848 encoded_buffer *src = &readerinput->buffer->encoded;
849 encoded_buffer *dest = &readerinput->buffer->utf16;
850 UINT cp = readerinput->buffer->code_page;
851 int len, dest_len;
852 HRESULT hr;
853 WCHAR *ptr;
855 /* get some raw data from stream first */
856 hr = readerinput_growraw(readerinput);
857 len = readerinput_get_convlen(readerinput);
859 /* just copy for UTF-16 case */
860 if (cp == ~0)
862 readerinput_grow(readerinput, len);
863 memcpy(dest->data, src->cur, len);
864 dest->written += len*sizeof(WCHAR);
865 return hr;
868 dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0);
869 readerinput_grow(readerinput, dest_len);
870 ptr = (WCHAR*)dest->data;
871 MultiByteToWideChar(cp, 0, src->cur, len, ptr, dest_len);
872 ptr[dest_len] = 0;
873 dest->written += dest_len*sizeof(WCHAR);
874 /* get rid of processed data */
875 readerinput_shrinkraw(readerinput, len);
877 return hr;
880 static inline WCHAR *reader_get_cur(xmlreader *reader)
882 WCHAR *ptr = (WCHAR*)reader->input->buffer->utf16.cur;
883 if (!*ptr) reader_more(reader);
884 return ptr;
887 static int reader_cmp(xmlreader *reader, const WCHAR *str)
889 const WCHAR *ptr = reader_get_cur(reader);
890 return strncmpW(str, ptr, strlenW(str));
893 /* moves cursor n WCHARs forward */
894 static void reader_skipn(xmlreader *reader, int n)
896 encoded_buffer *buffer = &reader->input->buffer->utf16;
897 const WCHAR *ptr = reader_get_cur(reader);
899 while (*ptr++ && n--)
901 buffer->cur += sizeof(WCHAR);
902 reader->pos++;
906 static inline int is_wchar_space(WCHAR ch)
908 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
911 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
912 static int reader_skipspaces(xmlreader *reader)
914 encoded_buffer *buffer = &reader->input->buffer->utf16;
915 const WCHAR *ptr = reader_get_cur(reader), *start = ptr;
917 while (is_wchar_space(*ptr))
919 buffer->cur += sizeof(WCHAR);
920 if (*ptr == '\r')
921 reader->pos = 0;
922 else if (*ptr == '\n')
924 reader->line++;
925 reader->pos = 0;
927 else
928 reader->pos++;
929 ptr++;
932 return ptr - start;
935 /* [26] VersionNum ::= '1.' [0-9]+ */
936 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
938 WCHAR *ptr, *ptr2, *start = reader_get_cur(reader);
939 static const WCHAR onedotW[] = {'1','.',0};
941 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
942 /* skip "1." */
943 reader_skipn(reader, 2);
945 ptr2 = ptr = reader_get_cur(reader);
946 while (*ptr >= '0' && *ptr <= '9')
947 ptr++;
949 if (ptr2 == ptr) return WC_E_DIGIT;
950 TRACE("version=%s\n", debugstr_wn(start, ptr-start));
951 reader_init_strvalue(start, ptr-start, val);
952 reader_skipn(reader, ptr-ptr2);
953 return S_OK;
956 /* [25] Eq ::= S? '=' S? */
957 static HRESULT reader_parse_eq(xmlreader *reader)
959 static const WCHAR eqW[] = {'=',0};
960 reader_skipspaces(reader);
961 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
962 /* skip '=' */
963 reader_skipn(reader, 1);
964 reader_skipspaces(reader);
965 return S_OK;
968 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
969 static HRESULT reader_parse_versioninfo(xmlreader *reader)
971 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
972 strval val, name;
973 HRESULT hr;
975 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
977 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
978 reader_init_strvalue(reader_get_cur(reader), 7, &name);
979 /* skip 'version' */
980 reader_skipn(reader, 7);
982 hr = reader_parse_eq(reader);
983 if (FAILED(hr)) return hr;
985 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
986 return WC_E_QUOTE;
987 /* skip "'"|'"' */
988 reader_skipn(reader, 1);
990 hr = reader_parse_versionnum(reader, &val);
991 if (FAILED(hr)) return hr;
993 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
994 return WC_E_QUOTE;
996 /* skip "'"|'"' */
997 reader_skipn(reader, 1);
999 return reader_add_attr(reader, &name, &val);
1002 /* ([A-Za-z0-9._] | '-') */
1003 static inline int is_wchar_encname(WCHAR ch)
1005 return ((ch >= 'A' && ch <= 'Z') ||
1006 (ch >= 'a' && ch <= 'z') ||
1007 (ch >= '0' && ch <= '9') ||
1008 (ch == '.') || (ch == '_') ||
1009 (ch == '-'));
1012 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1013 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1015 WCHAR *start = reader_get_cur(reader), *ptr;
1016 xml_encoding enc;
1017 int len;
1019 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1020 return WC_E_ENCNAME;
1022 ptr = start;
1023 while (is_wchar_encname(*++ptr))
1026 len = ptr - start;
1027 enc = parse_encoding_name(start, len);
1028 TRACE("encoding name %s\n", debugstr_wn(start, len));
1029 val->str = start;
1030 val->len = len;
1032 if (enc == XmlEncoding_Unknown)
1033 return WC_E_ENCNAME;
1035 /* skip encoding name */
1036 reader_skipn(reader, len);
1037 return S_OK;
1040 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1041 static HRESULT reader_parse_encdecl(xmlreader *reader)
1043 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1044 strval name, val;
1045 HRESULT hr;
1047 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1049 if (reader_cmp(reader, encodingW)) return S_FALSE;
1050 name.str = reader_get_cur(reader);
1051 name.len = 8;
1052 /* skip 'encoding' */
1053 reader_skipn(reader, 8);
1055 hr = reader_parse_eq(reader);
1056 if (FAILED(hr)) return hr;
1058 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1059 return WC_E_QUOTE;
1060 /* skip "'"|'"' */
1061 reader_skipn(reader, 1);
1063 hr = reader_parse_encname(reader, &val);
1064 if (FAILED(hr)) return hr;
1066 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1067 return WC_E_QUOTE;
1069 /* skip "'"|'"' */
1070 reader_skipn(reader, 1);
1072 return reader_add_attr(reader, &name, &val);
1075 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1076 static HRESULT reader_parse_sddecl(xmlreader *reader)
1078 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1079 static const WCHAR yesW[] = {'y','e','s',0};
1080 static const WCHAR noW[] = {'n','o',0};
1081 WCHAR *start, *ptr;
1082 strval name, val;
1083 HRESULT hr;
1085 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1087 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1088 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1089 /* skip 'standalone' */
1090 reader_skipn(reader, 10);
1092 hr = reader_parse_eq(reader);
1093 if (FAILED(hr)) return hr;
1095 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1096 return WC_E_QUOTE;
1097 /* skip "'"|'"' */
1098 reader_skipn(reader, 1);
1100 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1101 return WC_E_XMLDECL;
1103 start = reader_get_cur(reader);
1104 /* skip 'yes'|'no' */
1105 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1106 ptr = reader_get_cur(reader);
1107 TRACE("standalone=%s\n", debugstr_wn(start, ptr-start));
1108 val.str = val.start = start;
1109 val.len = ptr-start;
1111 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1112 return WC_E_QUOTE;
1113 /* skip "'"|'"' */
1114 reader_skipn(reader, 1);
1116 return reader_add_attr(reader, &name, &val);
1119 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1120 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1122 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1123 static const WCHAR declcloseW[] = {'?','>',0};
1124 HRESULT hr;
1126 /* check if we have "<?xml " */
1127 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1129 reader_skipn(reader, 5);
1130 hr = reader_parse_versioninfo(reader);
1131 if (FAILED(hr))
1132 return hr;
1134 hr = reader_parse_encdecl(reader);
1135 if (FAILED(hr))
1136 return hr;
1138 hr = reader_parse_sddecl(reader);
1139 if (FAILED(hr))
1140 return hr;
1142 reader_skipspaces(reader);
1143 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1144 reader_skipn(reader, 2);
1146 reader_inc_depth(reader);
1147 reader->nodetype = XmlNodeType_XmlDeclaration;
1148 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1149 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1150 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1152 return S_OK;
1155 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1156 static HRESULT reader_parse_comment(xmlreader *reader)
1158 WCHAR *start, *ptr;
1160 if (reader->resume[XmlReadResume_Body])
1162 start = reader->resume[XmlReadResume_Body];
1163 ptr = reader_get_cur(reader);
1165 else
1167 /* skip '<!--' */
1168 reader_skipn(reader, 4);
1169 reader_shrink(reader);
1170 ptr = start = reader_get_cur(reader);
1171 reader->nodetype = XmlNodeType_Comment;
1172 reader->resume[XmlReadResume_Body] = start;
1173 reader->resumestate = XmlReadResumeState_Comment;
1174 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1175 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1176 reader_set_strvalue(reader, StringValue_Value, NULL);
1179 /* will exit when there's no more data, it won't attempt to
1180 read more from stream */
1181 while (*ptr)
1183 if (ptr[0] == '-')
1185 if (ptr[1] == '-')
1187 if (ptr[2] == '>')
1189 strval value;
1191 TRACE("%s\n", debugstr_wn(start, ptr-start));
1192 /* skip '-->' */
1193 reader_skipn(reader, 3);
1194 reader_init_strvalue(start, ptr-start, &value);
1195 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1196 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1197 reader_set_strvalue(reader, StringValue_Value, &value);
1198 reader->resume[XmlReadResume_Body] = NULL;
1199 reader->resumestate = XmlReadResumeState_Initial;
1200 return S_OK;
1202 else
1203 return WC_E_COMMENT;
1205 else
1206 ptr++;
1208 else
1210 reader_skipn(reader, 1);
1211 ptr++;
1215 return S_OK;
1218 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1219 static inline int is_char(WCHAR ch)
1221 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1222 (ch >= 0x20 && ch <= 0xd7ff) ||
1223 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1224 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1225 (ch >= 0xe000 && ch <= 0xfffd);
1228 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1229 static inline int is_pubchar(WCHAR ch)
1231 return (ch == ' ') ||
1232 (ch >= 'a' && ch <= 'z') ||
1233 (ch >= 'A' && ch <= 'Z') ||
1234 (ch >= '0' && ch <= '9') ||
1235 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1236 (ch == '=') || (ch == '?') ||
1237 (ch == '@') || (ch == '!') ||
1238 (ch >= '#' && ch <= '%') || /* #$% */
1239 (ch == '_') || (ch == '\r') || (ch == '\n');
1242 static inline int is_namestartchar(WCHAR ch)
1244 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1245 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1246 (ch >= 0xc0 && ch <= 0xd6) ||
1247 (ch >= 0xd8 && ch <= 0xf6) ||
1248 (ch >= 0xf8 && ch <= 0x2ff) ||
1249 (ch >= 0x370 && ch <= 0x37d) ||
1250 (ch >= 0x37f && ch <= 0x1fff) ||
1251 (ch >= 0x200c && ch <= 0x200d) ||
1252 (ch >= 0x2070 && ch <= 0x218f) ||
1253 (ch >= 0x2c00 && ch <= 0x2fef) ||
1254 (ch >= 0x3001 && ch <= 0xd7ff) ||
1255 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1256 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1257 (ch >= 0xf900 && ch <= 0xfdcf) ||
1258 (ch >= 0xfdf0 && ch <= 0xfffd);
1261 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1262 static inline int is_ncnamechar(WCHAR ch)
1264 return (ch >= 'A' && ch <= 'Z') ||
1265 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1266 (ch == '-') || (ch == '.') ||
1267 (ch >= '0' && ch <= '9') ||
1268 (ch == 0xb7) ||
1269 (ch >= 0xc0 && ch <= 0xd6) ||
1270 (ch >= 0xd8 && ch <= 0xf6) ||
1271 (ch >= 0xf8 && ch <= 0x2ff) ||
1272 (ch >= 0x300 && ch <= 0x36f) ||
1273 (ch >= 0x370 && ch <= 0x37d) ||
1274 (ch >= 0x37f && ch <= 0x1fff) ||
1275 (ch >= 0x200c && ch <= 0x200d) ||
1276 (ch >= 0x203f && ch <= 0x2040) ||
1277 (ch >= 0x2070 && ch <= 0x218f) ||
1278 (ch >= 0x2c00 && ch <= 0x2fef) ||
1279 (ch >= 0x3001 && ch <= 0xd7ff) ||
1280 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1281 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1282 (ch >= 0xf900 && ch <= 0xfdcf) ||
1283 (ch >= 0xfdf0 && ch <= 0xfffd);
1286 static inline int is_namechar(WCHAR ch)
1288 return (ch == ':') || is_ncnamechar(ch);
1291 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1293 /* When we're on attribute always return attribute type, container node type is kept.
1294 Note that container is not necessarily an element, and attribute doesn't mean it's
1295 an attribute in XML spec terms. */
1296 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1299 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1300 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1301 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1302 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1303 [5] Name ::= NameStartChar (NameChar)* */
1304 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1306 WCHAR *ptr, *start;
1308 if (reader->resume[XmlReadResume_Name])
1310 start = reader->resume[XmlReadResume_Name];
1311 ptr = reader_get_cur(reader);
1313 else
1315 ptr = start = reader_get_cur(reader);
1316 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1319 while (is_namechar(*ptr))
1321 reader_skipn(reader, 1);
1322 ptr = reader_get_cur(reader);
1325 if (is_reader_pending(reader))
1327 reader->resume[XmlReadResume_Name] = start;
1328 return E_PENDING;
1330 else
1331 reader->resume[XmlReadResume_Name] = NULL;
1333 TRACE("name %s:%d\n", debugstr_wn(start, ptr-start), (int)(ptr-start));
1334 reader_init_strvalue(start, ptr-start, name);
1336 return S_OK;
1339 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1340 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1342 static const WCHAR xmlW[] = {'x','m','l'};
1343 strval name;
1344 HRESULT hr;
1345 UINT i;
1347 hr = reader_parse_name(reader, &name);
1348 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1350 /* now that we got name check for illegal content */
1351 if (name.len == 3 && !strncmpiW(name.str, xmlW, 3))
1352 return WC_E_LEADINGXML;
1354 /* PITarget can't be a qualified name */
1355 for (i = 0; i < name.len; i++)
1356 if (name.str[i] == ':')
1357 return i ? NC_E_NAMECOLON : WC_E_PI;
1359 TRACE("pitarget %s:%d\n", debugstr_wn(name.str, name.len), name.len);
1360 *target = name;
1361 return S_OK;
1364 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1365 static HRESULT reader_parse_pi(xmlreader *reader)
1367 WCHAR *ptr, *start;
1368 strval target;
1369 HRESULT hr;
1371 switch (reader->resumestate)
1373 case XmlReadResumeState_Initial:
1374 /* skip '<?' */
1375 reader_skipn(reader, 2);
1376 reader_shrink(reader);
1377 reader->resumestate = XmlReadResumeState_PITarget;
1378 case XmlReadResumeState_PITarget:
1379 hr = reader_parse_pitarget(reader, &target);
1380 if (FAILED(hr)) return hr;
1381 reader_set_strvalue(reader, StringValue_LocalName, &target);
1382 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1383 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1384 reader->resumestate = XmlReadResumeState_PIBody;
1385 default:
1389 ptr = reader_get_cur(reader);
1390 /* exit earlier if there's no content */
1391 if (ptr[0] == '?' && ptr[1] == '>')
1393 /* skip '?>' */
1394 reader_skipn(reader, 2);
1395 reader->nodetype = XmlNodeType_ProcessingInstruction;
1396 reader->resumestate = XmlReadResumeState_Initial;
1397 return S_OK;
1400 if (!reader->resume[XmlReadResume_Body])
1402 /* now at least a single space char should be there */
1403 if (!is_wchar_space(*ptr)) return WC_E_WHITESPACE;
1404 reader_skipspaces(reader);
1405 ptr = start = reader_get_cur(reader);
1406 reader->resume[XmlReadResume_Body] = start;
1408 else
1410 start = reader->resume[XmlReadResume_Body];
1411 ptr = reader_get_cur(reader);
1414 while (*ptr)
1416 if (ptr[0] == '?')
1418 if (ptr[1] == '>')
1420 strval value;
1422 TRACE("%s\n", debugstr_wn(start, ptr-start));
1423 /* skip '?>' */
1424 reader_skipn(reader, 2);
1425 reader->nodetype = XmlNodeType_ProcessingInstruction;
1426 reader->resumestate = XmlReadResumeState_Initial;
1427 reader->resume[XmlReadResume_Body] = NULL;
1428 reader_init_strvalue(start, ptr-start, &value);
1429 reader_set_strvalue(reader, StringValue_Value, &value);
1430 return S_OK;
1432 else
1434 ptr++;
1435 reader_more(reader);
1438 else
1440 reader_skipn(reader, 1);
1441 ptr = reader_get_cur(reader);
1445 return S_OK;
1448 /* This one is used to parse significant whitespace nodes, like in Misc production */
1449 static HRESULT reader_parse_whitespace(xmlreader *reader)
1451 WCHAR *start, *ptr;
1453 reader_shrink(reader);
1454 start = reader_get_cur(reader);
1456 reader_skipspaces(reader);
1457 ptr = reader_get_cur(reader);
1458 TRACE("%s\n", debugstr_wn(start, ptr-start));
1460 reader->nodetype = XmlNodeType_Whitespace;
1461 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1462 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1463 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1464 return S_OK;
1467 /* [27] Misc ::= Comment | PI | S */
1468 static HRESULT reader_parse_misc(xmlreader *reader)
1470 HRESULT hr = S_FALSE;
1472 if (reader->resumestate != XmlReadResumeState_Initial)
1474 hr = reader_more(reader);
1475 if (FAILED(hr)) return hr;
1477 /* finish current node */
1478 switch (reader->resumestate)
1480 case XmlReadResumeState_PITarget:
1481 case XmlReadResumeState_PIBody:
1482 return reader_parse_pi(reader);
1483 case XmlReadResumeState_Comment:
1484 return reader_parse_comment(reader);
1485 default:
1486 ERR("unknown resume state %d\n", reader->resumestate);
1490 while (1)
1492 const WCHAR *cur = reader_get_cur(reader);
1494 if (is_wchar_space(*cur))
1495 hr = reader_parse_whitespace(reader);
1496 else if (!reader_cmp(reader, commentW))
1497 hr = reader_parse_comment(reader);
1498 else if (!reader_cmp(reader, piW))
1499 hr = reader_parse_pi(reader);
1500 else
1501 break;
1503 if (hr != S_FALSE) return hr;
1506 return hr;
1509 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1510 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1512 WCHAR *start = reader_get_cur(reader), *cur, quote;
1514 if (*start != '"' && *start != '\'') return WC_E_QUOTE;
1516 quote = *start;
1517 reader_skipn(reader, 1);
1519 cur = start = reader_get_cur(reader);
1520 while (is_char(*cur) && *cur != quote)
1522 reader_skipn(reader, 1);
1523 cur = reader_get_cur(reader);
1525 if (*cur == quote) reader_skipn(reader, 1);
1527 literal->str = start;
1528 literal->len = cur-start;
1529 TRACE("%s\n", debugstr_wn(start, cur-start));
1530 return S_OK;
1533 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1534 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1535 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1537 WCHAR *start = reader_get_cur(reader), *cur, quote;
1539 if (*start != '"' && *start != '\'') return WC_E_QUOTE;
1541 quote = *start;
1542 reader_skipn(reader, 1);
1544 cur = start;
1545 while (is_pubchar(*cur) && *cur != quote)
1547 reader_skipn(reader, 1);
1548 cur = reader_get_cur(reader);
1551 reader_init_strvalue(start, cur-start, literal);
1552 TRACE("%s\n", debugstr_wn(start, cur-start));
1553 return S_OK;
1556 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1557 static HRESULT reader_parse_externalid(xmlreader *reader)
1559 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1560 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1561 strval name;
1562 HRESULT hr;
1563 int cnt;
1565 if (reader_cmp(reader, systemW))
1567 if (reader_cmp(reader, publicW))
1568 return S_FALSE;
1569 else
1571 strval pub;
1573 /* public id */
1574 reader_skipn(reader, 6);
1575 cnt = reader_skipspaces(reader);
1576 if (!cnt) return WC_E_WHITESPACE;
1578 hr = reader_parse_pub_literal(reader, &pub);
1579 if (FAILED(hr)) return hr;
1581 reader_init_strvalue(publicW, strlenW(publicW), &name);
1582 return reader_add_attr(reader, &name, &pub);
1585 else
1587 strval sys;
1589 /* system id */
1590 reader_skipn(reader, 6);
1591 cnt = reader_skipspaces(reader);
1592 if (!cnt) return WC_E_WHITESPACE;
1594 hr = reader_parse_sys_literal(reader, &sys);
1595 if (FAILED(hr)) return hr;
1597 reader_init_strvalue(systemW, strlenW(systemW), &name);
1598 return reader_add_attr(reader, &name, &sys);
1601 return hr;
1604 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1605 static HRESULT reader_parse_dtd(xmlreader *reader)
1607 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1608 strval name;
1609 WCHAR *cur;
1610 HRESULT hr;
1612 /* check if we have "<!DOCTYPE" */
1613 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1614 reader_shrink(reader);
1616 /* DTD processing is not allowed by default */
1617 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1619 reader_skipn(reader, 9);
1620 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1622 /* name */
1623 hr = reader_parse_name(reader, &name);
1624 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1626 reader_skipspaces(reader);
1628 hr = reader_parse_externalid(reader);
1629 if (FAILED(hr)) return hr;
1631 reader_skipspaces(reader);
1633 cur = reader_get_cur(reader);
1634 if (*cur != '>')
1636 FIXME("internal subset parsing not implemented\n");
1637 return E_NOTIMPL;
1640 /* skip '>' */
1641 reader_skipn(reader, 1);
1643 reader->nodetype = XmlNodeType_DocumentType;
1644 reader_set_strvalue(reader, StringValue_LocalName, &name);
1645 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1647 return S_OK;
1650 /* [11 NS] LocalPart ::= NCName */
1651 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1653 WCHAR *ptr, *start;
1655 if (reader->resume[XmlReadResume_Local])
1657 start = reader->resume[XmlReadResume_Local];
1658 ptr = reader_get_cur(reader);
1660 else
1662 ptr = start = reader_get_cur(reader);
1665 while (is_ncnamechar(*ptr))
1667 reader_skipn(reader, 1);
1668 ptr = reader_get_cur(reader);
1671 if (is_reader_pending(reader))
1673 reader->resume[XmlReadResume_Local] = start;
1674 return E_PENDING;
1676 else
1677 reader->resume[XmlReadResume_Local] = NULL;
1679 reader_init_strvalue(start, ptr-start, local);
1681 return S_OK;
1684 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1685 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1686 [9 NS] UnprefixedName ::= LocalPart
1687 [10 NS] Prefix ::= NCName */
1688 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1690 WCHAR *ptr, *start;
1691 HRESULT hr;
1693 if (reader->resume[XmlReadResume_Name])
1695 start = reader->resume[XmlReadResume_Name];
1696 ptr = reader_get_cur(reader);
1698 else
1700 ptr = start = reader_get_cur(reader);
1701 reader->resume[XmlReadResume_Name] = start;
1702 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1705 if (reader->resume[XmlReadResume_Local])
1707 hr = reader_parse_local(reader, local);
1708 if (FAILED(hr)) return hr;
1710 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1711 local->start - reader->resume[XmlReadResume_Name] - 1,
1712 prefix);
1714 else
1716 /* skip prefix part */
1717 while (is_ncnamechar(*ptr))
1719 reader_skipn(reader, 1);
1720 ptr = reader_get_cur(reader);
1723 if (is_reader_pending(reader)) return E_PENDING;
1725 /* got a qualified name */
1726 if (*ptr == ':')
1728 reader_init_strvalue(start, ptr-start, prefix);
1730 /* skip ':' */
1731 reader_skipn(reader, 1);
1732 hr = reader_parse_local(reader, local);
1733 if (FAILED(hr)) return hr;
1735 else
1737 reader_init_strvalue(reader->resume[XmlReadResume_Name], ptr-reader->resume[XmlReadResume_Name], local);
1738 reader_init_strvalue(NULL, 0, prefix);
1742 reader_init_strvalue(start, ptr-start, local);
1744 if (prefix->len)
1745 TRACE("qname %s:%s\n", debugstr_wn(prefix->start, prefix->len), debugstr_wn(local->start, local->len));
1746 else
1747 TRACE("ncname %s\n", debugstr_wn(local->start, local->len));
1749 reader_init_strvalue(prefix->start ? prefix->start : local->start,
1750 /* count ':' too */
1751 (prefix->len ? prefix->len + 1 : 0) + local->len,
1752 qname);
1754 reader->resume[XmlReadResume_Name] = NULL;
1755 reader->resume[XmlReadResume_Local] = NULL;
1757 return S_OK;
1760 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
1761 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
1762 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
1764 static const WCHAR endW[] = {'/','>',0};
1765 HRESULT hr;
1767 hr = reader_parse_qname(reader, prefix, local, qname);
1768 if (FAILED(hr)) return hr;
1770 reader_skipspaces(reader);
1772 /* empty element */
1773 if ((*empty = !reader_cmp(reader, endW)))
1775 /* skip '/>' */
1776 reader_skipn(reader, 2);
1777 reader->empty_element = TRUE;
1778 return S_OK;
1781 /* got a start tag */
1782 if (!reader_cmp(reader, gtW))
1784 /* skip '>' */
1785 reader_skipn(reader, 1);
1786 return reader_push_element(reader, qname);
1789 FIXME("only empty elements/start tags without attribute list supported\n");
1790 return E_NOTIMPL;
1793 /* [39] element ::= EmptyElemTag | STag content ETag */
1794 static HRESULT reader_parse_element(xmlreader *reader)
1796 HRESULT hr;
1798 switch (reader->resumestate)
1800 case XmlReadResumeState_Initial:
1801 /* check if we are really on element */
1802 if (reader_cmp(reader, ltW)) return S_FALSE;
1804 /* skip '<' */
1805 reader_skipn(reader, 1);
1807 reader_shrink(reader);
1808 reader->resumestate = XmlReadResumeState_STag;
1809 case XmlReadResumeState_STag:
1811 strval qname, prefix, local;
1812 int empty = 0;
1814 /* this handles empty elements too */
1815 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
1816 if (FAILED(hr)) return hr;
1818 /* FIXME: need to check for defined namespace to reject invalid prefix,
1819 currently reject all prefixes */
1820 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
1822 /* if we got empty element and stack is empty go straight to Misc */
1823 if (empty && list_empty(&reader->elements))
1824 reader->instate = XmlReadInState_MiscEnd;
1825 else
1826 reader->instate = XmlReadInState_Content;
1828 reader->nodetype = XmlNodeType_Element;
1829 reader->resumestate = XmlReadResumeState_Initial;
1830 reader_set_strvalue(reader, StringValue_LocalName, &local);
1831 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
1832 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
1833 break;
1835 default:
1836 hr = E_FAIL;
1839 return hr;
1842 /* [13 NS] ETag ::= '</' QName S? '>' */
1843 static HRESULT reader_parse_endtag(xmlreader *reader)
1845 strval prefix, local, qname;
1846 struct element *elem;
1847 HRESULT hr;
1849 /* skip '</' */
1850 reader_skipn(reader, 2);
1852 hr = reader_parse_qname(reader, &prefix, &local, &qname);
1853 if (FAILED(hr)) return hr;
1855 reader_skipspaces(reader);
1857 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
1859 /* skip '>' */
1860 reader_skipn(reader, 1);
1862 /* Element stack should never be empty at this point, cause we shouldn't get to
1863 content parsing if it's empty. */
1864 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
1865 if (!strval_eq(&elem->qname, &qname)) return WC_E_ELEMENTMATCH;
1867 reader_pop_element(reader);
1869 /* It was a root element, the rest is expected as Misc */
1870 if (list_empty(&reader->elements))
1871 reader->instate = XmlReadInState_MiscEnd;
1873 reader->nodetype = XmlNodeType_EndElement;
1874 reader_set_strvalue(reader, StringValue_LocalName, &local);
1875 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
1877 return S_OK;
1880 /* [18] CDSect ::= CDStart CData CDEnd
1881 [19] CDStart ::= '<![CDATA['
1882 [20] CData ::= (Char* - (Char* ']]>' Char*))
1883 [21] CDEnd ::= ']]>' */
1884 static HRESULT reader_parse_cdata(xmlreader *reader)
1886 WCHAR *start, *ptr;
1888 if (reader->resume[XmlReadResume_Body])
1890 start = reader->resume[XmlReadResume_Body];
1891 ptr = reader_get_cur(reader);
1893 else
1895 /* skip markup '<![CDATA[' */
1896 reader_skipn(reader, 9);
1897 reader_shrink(reader);
1898 ptr = start = reader_get_cur(reader);
1899 reader->nodetype = XmlNodeType_CDATA;
1900 reader->resume[XmlReadResume_Body] = start;
1901 reader->resumestate = XmlReadResumeState_CDATA;
1902 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1903 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1904 reader_set_strvalue(reader, StringValue_Value, NULL);
1907 while (*ptr)
1909 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
1911 strval value;
1913 TRACE("%s\n", debugstr_wn(start, ptr-start));
1914 /* skip ']]>' */
1915 reader_skipn(reader, 3);
1916 reader_init_strvalue(start, ptr-start, &value);
1917 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1918 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1919 reader_set_strvalue(reader, StringValue_Value, &value);
1920 reader->resume[XmlReadResume_Body] = NULL;
1921 reader->resumestate = XmlReadResumeState_Initial;
1922 return S_OK;
1924 else
1926 /* Value normalization is not fully implemented, rules are:
1928 - single '\r' -> '\n';
1929 - sequence '\r\n' -> '\n', in this case value length changes;
1931 if (*ptr == '\r') *ptr = '\n';
1932 reader_skipn(reader, 1);
1933 ptr++;
1937 return S_OK;
1940 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1941 [67] Reference ::= EntityRef | CharRef
1942 [68] EntityRef ::= '&' Name ';' */
1943 static HRESULT reader_parse_reference(xmlreader *reader)
1945 FIXME("References not supported\n");
1946 return E_NOTIMPL;
1949 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
1950 static HRESULT reader_parse_chardata(xmlreader *reader)
1952 WCHAR *start, *ptr;
1954 if (reader->resume[XmlReadResume_Body])
1956 start = reader->resume[XmlReadResume_Body];
1957 ptr = reader_get_cur(reader);
1959 else
1961 reader_shrink(reader);
1962 ptr = start = reader_get_cur(reader);
1963 /* There's no text */
1964 if (!*ptr || *ptr == '<') return S_OK;
1965 reader->nodetype = XmlNodeType_Text;
1966 reader->resume[XmlReadResume_Body] = start;
1967 reader->resumestate = XmlReadResumeState_CharData;
1968 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1969 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1970 reader_set_strvalue(reader, StringValue_Value, NULL);
1973 while (*ptr)
1975 /* CDATA closing sequence ']]>' is not allowed */
1976 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
1977 return WC_E_CDSECTEND;
1979 /* Found next markup part */
1980 if (ptr[0] == '<')
1982 strval value;
1984 reader_init_strvalue(start, ptr-start, &value);
1985 reader_set_strvalue(reader, StringValue_Value, &value);
1986 return S_OK;
1989 reader_skipn(reader, 1);
1990 ptr++;
1993 return S_OK;
1996 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
1997 static HRESULT reader_parse_content(xmlreader *reader)
1999 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2000 static const WCHAR etagW[] = {'<','/',0};
2001 static const WCHAR ampW[] = {'&',0};
2003 if (reader->resumestate != XmlReadResumeState_Initial)
2005 switch (reader->resumestate)
2007 case XmlReadResumeState_CDATA:
2008 return reader_parse_cdata(reader);
2009 case XmlReadResumeState_Comment:
2010 return reader_parse_comment(reader);
2011 case XmlReadResumeState_PIBody:
2012 case XmlReadResumeState_PITarget:
2013 return reader_parse_pi(reader);
2014 case XmlReadResumeState_CharData:
2015 return reader_parse_chardata(reader);
2016 default:
2017 ERR("unknown resume state %d\n", reader->resumestate);
2021 reader_shrink(reader);
2023 /* handle end tag here, it indicates end of content as well */
2024 if (!reader_cmp(reader, etagW))
2025 return reader_parse_endtag(reader);
2027 if (!reader_cmp(reader, commentW))
2028 return reader_parse_comment(reader);
2030 if (!reader_cmp(reader, piW))
2031 return reader_parse_pi(reader);
2033 if (!reader_cmp(reader, cdstartW))
2034 return reader_parse_cdata(reader);
2036 if (!reader_cmp(reader, ampW))
2037 return reader_parse_reference(reader);
2039 if (!reader_cmp(reader, ltW))
2040 return reader_parse_element(reader);
2042 /* what's left must be CharData */
2043 return reader_parse_chardata(reader);
2046 static HRESULT reader_parse_nextnode(xmlreader *reader)
2048 HRESULT hr;
2050 if (!is_reader_pending(reader))
2051 reader_clear_attrs(reader);
2053 while (1)
2055 switch (reader->instate)
2057 /* if it's a first call for a new input we need to detect stream encoding */
2058 case XmlReadInState_Initial:
2060 xml_encoding enc;
2062 hr = readerinput_growraw(reader->input);
2063 if (FAILED(hr)) return hr;
2065 /* try to detect encoding by BOM or data and set input code page */
2066 hr = readerinput_detectencoding(reader->input, &enc);
2067 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2068 if (FAILED(hr)) return hr;
2070 /* always switch first time cause we have to put something in */
2071 readerinput_switchencoding(reader->input, enc);
2073 /* parse xml declaration */
2074 hr = reader_parse_xmldecl(reader);
2075 if (FAILED(hr)) return hr;
2077 readerinput_shrinkraw(reader->input, -1);
2078 reader->instate = XmlReadInState_Misc_DTD;
2079 if (hr == S_OK) return hr;
2081 break;
2082 case XmlReadInState_Misc_DTD:
2083 hr = reader_parse_misc(reader);
2084 if (FAILED(hr)) return hr;
2086 if (hr == S_FALSE)
2087 reader->instate = XmlReadInState_DTD;
2088 else
2089 return hr;
2090 break;
2091 case XmlReadInState_DTD:
2092 hr = reader_parse_dtd(reader);
2093 if (FAILED(hr)) return hr;
2095 if (hr == S_OK)
2097 reader->instate = XmlReadInState_DTD_Misc;
2098 return hr;
2100 else
2101 reader->instate = XmlReadInState_Element;
2102 break;
2103 case XmlReadInState_DTD_Misc:
2104 hr = reader_parse_misc(reader);
2105 if (FAILED(hr)) return hr;
2107 if (hr == S_FALSE)
2108 reader->instate = XmlReadInState_Element;
2109 else
2110 return hr;
2111 break;
2112 case XmlReadInState_Element:
2113 return reader_parse_element(reader);
2114 case XmlReadInState_Content:
2115 return reader_parse_content(reader);
2116 case XmlReadInState_MiscEnd:
2117 hr = reader_parse_misc(reader);
2118 if (FAILED(hr)) return hr;
2120 if (hr == S_FALSE)
2121 reader->instate = XmlReadInState_Eof;
2122 return hr;
2123 case XmlReadInState_Eof:
2124 return S_FALSE;
2125 default:
2126 FIXME("internal state %d not handled\n", reader->instate);
2127 return E_NOTIMPL;
2131 return E_NOTIMPL;
2134 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2136 xmlreader *This = impl_from_IXmlReader(iface);
2138 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2140 if (IsEqualGUID(riid, &IID_IUnknown) ||
2141 IsEqualGUID(riid, &IID_IXmlReader))
2143 *ppvObject = iface;
2145 else
2147 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2148 *ppvObject = NULL;
2149 return E_NOINTERFACE;
2152 IXmlReader_AddRef(iface);
2154 return S_OK;
2157 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2159 xmlreader *This = impl_from_IXmlReader(iface);
2160 ULONG ref = InterlockedIncrement(&This->ref);
2161 TRACE("(%p)->(%d)\n", This, ref);
2162 return ref;
2165 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2167 xmlreader *This = impl_from_IXmlReader(iface);
2168 LONG ref = InterlockedDecrement(&This->ref);
2170 TRACE("(%p)->(%d)\n", This, ref);
2172 if (ref == 0)
2174 IMalloc *imalloc = This->imalloc;
2175 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2176 reader_clear_attrs(This);
2177 reader_clear_elements(This);
2178 reader_free_strvalues(This);
2179 reader_free(This, This);
2180 if (imalloc) IMalloc_Release(imalloc);
2183 return ref;
2186 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2188 xmlreader *This = impl_from_IXmlReader(iface);
2189 IXmlReaderInput *readerinput;
2190 HRESULT hr;
2192 TRACE("(%p)->(%p)\n", This, input);
2194 if (This->input)
2196 readerinput_release_stream(This->input);
2197 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2198 This->input = NULL;
2201 This->line = This->pos = 0;
2202 reader_clear_elements(This);
2203 This->depth = 0;
2204 This->resumestate = XmlReadResumeState_Initial;
2205 memset(This->resume, 0, sizeof(This->resume));
2207 /* just reset current input */
2208 if (!input)
2210 This->state = XmlReadState_Initial;
2211 return S_OK;
2214 /* now try IXmlReaderInput, ISequentialStream, IStream */
2215 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2216 if (hr == S_OK)
2218 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2219 This->input = impl_from_IXmlReaderInput(readerinput);
2220 else
2222 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2223 readerinput, readerinput->lpVtbl);
2224 IUnknown_Release(readerinput);
2225 return E_FAIL;
2230 if (hr != S_OK || !readerinput)
2232 /* create IXmlReaderInput basing on supplied interface */
2233 hr = CreateXmlReaderInputWithEncodingName(input,
2234 NULL, NULL, FALSE, NULL, &readerinput);
2235 if (hr != S_OK) return hr;
2236 This->input = impl_from_IXmlReaderInput(readerinput);
2239 /* set stream for supplied IXmlReaderInput */
2240 hr = readerinput_query_for_stream(This->input);
2241 if (hr == S_OK)
2243 This->state = XmlReadState_Initial;
2244 This->instate = XmlReadInState_Initial;
2247 return hr;
2250 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2252 xmlreader *This = impl_from_IXmlReader(iface);
2254 TRACE("(%p)->(%s %p)\n", This, debugstr_prop(property), value);
2256 if (!value) return E_INVALIDARG;
2258 switch (property)
2260 case XmlReaderProperty_DtdProcessing:
2261 *value = This->dtdmode;
2262 break;
2263 case XmlReaderProperty_ReadState:
2264 *value = This->state;
2265 break;
2266 default:
2267 FIXME("Unimplemented property (%u)\n", property);
2268 return E_NOTIMPL;
2271 return S_OK;
2274 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2276 xmlreader *This = impl_from_IXmlReader(iface);
2278 TRACE("(%p)->(%s %lu)\n", This, debugstr_prop(property), value);
2280 switch (property)
2282 case XmlReaderProperty_DtdProcessing:
2283 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2284 This->dtdmode = value;
2285 break;
2286 default:
2287 FIXME("Unimplemented property (%u)\n", property);
2288 return E_NOTIMPL;
2291 return S_OK;
2294 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2296 xmlreader *This = impl_from_IXmlReader(iface);
2297 XmlNodeType oldtype = This->nodetype;
2298 HRESULT hr;
2300 TRACE("(%p)->(%p)\n", This, nodetype);
2302 if (This->state == XmlReadState_Closed) return S_FALSE;
2304 hr = reader_parse_nextnode(This);
2305 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2306 This->state = XmlReadState_Interactive;
2307 if (hr == S_OK)
2309 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2310 *nodetype = This->nodetype;
2313 return hr;
2316 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2318 xmlreader *This = impl_from_IXmlReader(iface);
2319 TRACE("(%p)->(%p)\n", This, node_type);
2321 *node_type = reader_get_nodetype(This);
2322 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2325 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2327 xmlreader *This = impl_from_IXmlReader(iface);
2329 TRACE("(%p)\n", This);
2331 if (!This->attr_count) return S_FALSE;
2332 This->attr = LIST_ENTRY(list_head(&This->attrs), struct attribute, entry);
2333 return S_OK;
2336 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2338 xmlreader *This = impl_from_IXmlReader(iface);
2339 const struct list *next;
2341 TRACE("(%p)\n", This);
2343 if (!This->attr_count) return S_FALSE;
2345 if (!This->attr)
2346 return IXmlReader_MoveToFirstAttribute(iface);
2348 next = list_next(&This->attrs, &This->attr->entry);
2349 if (next)
2350 This->attr = LIST_ENTRY(next, struct attribute, entry);
2352 return next ? S_OK : S_FALSE;
2355 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2356 LPCWSTR local_name,
2357 LPCWSTR namespaceUri)
2359 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2360 return E_NOTIMPL;
2363 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2365 xmlreader *This = impl_from_IXmlReader(iface);
2367 TRACE("(%p)\n", This);
2369 if (!This->attr_count) return S_FALSE;
2370 This->attr = NULL;
2371 return S_OK;
2374 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2376 xmlreader *This = impl_from_IXmlReader(iface);
2378 TRACE("(%p)->(%p %p)\n", This, name, len);
2379 *name = This->strvalues[StringValue_QualifiedName].str;
2380 *len = This->strvalues[StringValue_QualifiedName].len;
2381 return S_OK;
2384 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface,
2385 LPCWSTR *namespaceUri,
2386 UINT *namespaceUri_length)
2388 FIXME("(%p %p %p): stub\n", iface, namespaceUri, namespaceUri_length);
2389 return E_NOTIMPL;
2392 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2394 xmlreader *This = impl_from_IXmlReader(iface);
2396 TRACE("(%p)->(%p %p)\n", This, name, len);
2397 *name = This->strvalues[StringValue_LocalName].str;
2398 if (len) *len = This->strvalues[StringValue_LocalName].len;
2399 return S_OK;
2402 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2404 xmlreader *This = impl_from_IXmlReader(iface);
2406 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2407 *prefix = This->strvalues[StringValue_Prefix].str;
2408 if (len) *len = This->strvalues[StringValue_Prefix].len;
2409 return S_OK;
2412 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
2414 xmlreader *reader = impl_from_IXmlReader(iface);
2415 strval *val = &reader->strvalues[StringValue_Value];
2417 TRACE("(%p)->(%p %p)\n", reader, value, len);
2419 *value = NULL;
2421 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
2423 XmlNodeType type;
2424 HRESULT hr;
2426 hr = IXmlReader_Read(iface, &type);
2427 if (FAILED(hr)) return hr;
2429 /* return if still pending, partially read values are not reported */
2430 if (is_reader_pending(reader)) return E_PENDING;
2433 if (!val->str)
2435 val->str = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
2436 if (!val->str) return E_OUTOFMEMORY;
2437 memcpy(val->str, val->start, val->len*sizeof(WCHAR));
2438 val->str[val->len] = 0;
2441 *value = val->str;
2442 if (len) *len = val->len;
2443 return S_OK;
2446 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
2448 xmlreader *reader = impl_from_IXmlReader(iface);
2449 strval *val = &reader->strvalues[StringValue_Value];
2450 UINT len;
2452 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
2454 /* Value is already allocated, chunked reads are not possible. */
2455 if (val->str) return S_FALSE;
2457 if (val->len)
2459 len = min(chunk_size, val->len);
2460 memcpy(buffer, val->start, len);
2461 val->start += len;
2462 val->len -= len;
2463 if (read) *read = len;
2466 return S_OK;
2469 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
2470 LPCWSTR *baseUri,
2471 UINT *baseUri_length)
2473 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
2474 return E_NOTIMPL;
2477 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
2479 FIXME("(%p): stub\n", iface);
2480 return FALSE;
2483 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
2485 xmlreader *This = impl_from_IXmlReader(iface);
2486 TRACE("(%p)\n", This);
2487 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2488 when current node is start tag of an element */
2489 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->empty_element : FALSE;
2492 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
2494 xmlreader *This = impl_from_IXmlReader(iface);
2496 TRACE("(%p %p)\n", This, lineNumber);
2498 if (!lineNumber) return E_INVALIDARG;
2500 *lineNumber = This->line;
2502 return S_OK;
2505 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
2507 xmlreader *This = impl_from_IXmlReader(iface);
2509 TRACE("(%p %p)\n", This, linePosition);
2511 if (!linePosition) return E_INVALIDARG;
2513 *linePosition = This->pos;
2515 return S_OK;
2518 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
2520 xmlreader *This = impl_from_IXmlReader(iface);
2522 TRACE("(%p)->(%p)\n", This, count);
2524 if (!count) return E_INVALIDARG;
2526 *count = This->attr_count;
2527 return S_OK;
2530 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
2532 xmlreader *This = impl_from_IXmlReader(iface);
2533 TRACE("(%p)->(%p)\n", This, depth);
2534 *depth = This->depth;
2535 return S_OK;
2538 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
2540 FIXME("(%p): stub\n", iface);
2541 return E_NOTIMPL;
2544 static const struct IXmlReaderVtbl xmlreader_vtbl =
2546 xmlreader_QueryInterface,
2547 xmlreader_AddRef,
2548 xmlreader_Release,
2549 xmlreader_SetInput,
2550 xmlreader_GetProperty,
2551 xmlreader_SetProperty,
2552 xmlreader_Read,
2553 xmlreader_GetNodeType,
2554 xmlreader_MoveToFirstAttribute,
2555 xmlreader_MoveToNextAttribute,
2556 xmlreader_MoveToAttributeByName,
2557 xmlreader_MoveToElement,
2558 xmlreader_GetQualifiedName,
2559 xmlreader_GetNamespaceUri,
2560 xmlreader_GetLocalName,
2561 xmlreader_GetPrefix,
2562 xmlreader_GetValue,
2563 xmlreader_ReadValueChunk,
2564 xmlreader_GetBaseUri,
2565 xmlreader_IsDefault,
2566 xmlreader_IsEmptyElement,
2567 xmlreader_GetLineNumber,
2568 xmlreader_GetLinePosition,
2569 xmlreader_GetAttributeCount,
2570 xmlreader_GetDepth,
2571 xmlreader_IsEOF
2574 /** IXmlReaderInput **/
2575 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
2577 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2579 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2581 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
2582 IsEqualGUID(riid, &IID_IUnknown))
2584 *ppvObject = iface;
2586 else
2588 WARN("interface %s not implemented\n", debugstr_guid(riid));
2589 *ppvObject = NULL;
2590 return E_NOINTERFACE;
2593 IUnknown_AddRef(iface);
2595 return S_OK;
2598 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
2600 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2601 ULONG ref = InterlockedIncrement(&This->ref);
2602 TRACE("(%p)->(%d)\n", This, ref);
2603 return ref;
2606 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
2608 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
2609 LONG ref = InterlockedDecrement(&This->ref);
2611 TRACE("(%p)->(%d)\n", This, ref);
2613 if (ref == 0)
2615 IMalloc *imalloc = This->imalloc;
2616 if (This->input) IUnknown_Release(This->input);
2617 if (This->stream) ISequentialStream_Release(This->stream);
2618 if (This->buffer) free_input_buffer(This->buffer);
2619 readerinput_free(This, This->baseuri);
2620 readerinput_free(This, This);
2621 if (imalloc) IMalloc_Release(imalloc);
2624 return ref;
2627 static const struct IUnknownVtbl xmlreaderinputvtbl =
2629 xmlreaderinput_QueryInterface,
2630 xmlreaderinput_AddRef,
2631 xmlreaderinput_Release
2634 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
2636 xmlreader *reader;
2637 int i;
2639 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
2641 if (!IsEqualGUID(riid, &IID_IXmlReader))
2643 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
2644 return E_FAIL;
2647 if (imalloc)
2648 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
2649 else
2650 reader = heap_alloc(sizeof(*reader));
2651 if(!reader) return E_OUTOFMEMORY;
2653 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
2654 reader->ref = 1;
2655 reader->input = NULL;
2656 reader->state = XmlReadState_Closed;
2657 reader->instate = XmlReadInState_Initial;
2658 reader->resumestate = XmlReadResumeState_Initial;
2659 reader->dtdmode = DtdProcessing_Prohibit;
2660 reader->line = reader->pos = 0;
2661 reader->imalloc = imalloc;
2662 if (imalloc) IMalloc_AddRef(imalloc);
2663 reader->nodetype = XmlNodeType_None;
2664 list_init(&reader->attrs);
2665 reader->attr_count = 0;
2666 reader->attr = NULL;
2667 list_init(&reader->elements);
2668 reader->depth = 0;
2669 reader->max_depth = 256;
2670 reader->empty_element = FALSE;
2671 memset(reader->resume, 0, sizeof(reader->resume));
2673 for (i = 0; i < StringValue_Last; i++)
2674 reader->strvalues[i] = strval_empty;
2676 *obj = &reader->IXmlReader_iface;
2678 TRACE("returning iface %p\n", *obj);
2680 return S_OK;
2683 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
2684 IMalloc *imalloc,
2685 LPCWSTR encoding,
2686 BOOL hint,
2687 LPCWSTR base_uri,
2688 IXmlReaderInput **ppInput)
2690 xmlreaderinput *readerinput;
2691 HRESULT hr;
2693 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
2694 hint, wine_dbgstr_w(base_uri), ppInput);
2696 if (!stream || !ppInput) return E_INVALIDARG;
2698 if (imalloc)
2699 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
2700 else
2701 readerinput = heap_alloc(sizeof(*readerinput));
2702 if(!readerinput) return E_OUTOFMEMORY;
2704 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
2705 readerinput->ref = 1;
2706 readerinput->imalloc = imalloc;
2707 readerinput->stream = NULL;
2708 if (imalloc) IMalloc_AddRef(imalloc);
2709 readerinput->encoding = parse_encoding_name(encoding, -1);
2710 readerinput->hint = hint;
2711 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
2712 readerinput->pending = 0;
2714 hr = alloc_input_buffer(readerinput);
2715 if (hr != S_OK)
2717 readerinput_free(readerinput, readerinput->baseuri);
2718 readerinput_free(readerinput, readerinput);
2719 if (imalloc) IMalloc_Release(imalloc);
2720 return hr;
2722 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
2724 *ppInput = &readerinput->IXmlReaderInput_iface;
2726 TRACE("returning iface %p\n", *ppInput);
2728 return S_OK;