xmllite/reader: Enforce maximum element depth limit.
[wine.git] / dlls / xmllite / reader.c
bloba52e928be410e663bccfc83a0548bf830945f34f
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW[] = {'\"',0};
90 static const WCHAR quoteW[] = {'\'',0};
91 static const WCHAR ltW[] = {'<',0};
92 static const WCHAR gtW[] = {'>',0};
93 static const WCHAR commentW[] = {'<','!','-','-',0};
94 static const WCHAR piW[] = {'<','?',0};
96 static BOOL is_namestartchar(WCHAR ch);
98 static const char *debugstr_nodetype(XmlNodeType nodetype)
100 static const char * const type_names[] =
102 "None",
103 "Element",
104 "Attribute",
105 "Text",
106 "CDATA",
109 "ProcessingInstruction",
110 "Comment",
112 "DocumentType",
115 "Whitespace",
117 "EndElement",
119 "XmlDeclaration"
122 if (nodetype > _XmlNodeType_Last)
123 return wine_dbg_sprintf("unknown type=%d", nodetype);
125 return type_names[nodetype];
128 static const char *debugstr_reader_prop(XmlReaderProperty prop)
130 static const char * const prop_names[] =
132 "MultiLanguage",
133 "ConformanceLevel",
134 "RandomAccess",
135 "XmlResolver",
136 "DtdProcessing",
137 "ReadState",
138 "MaxElementDepth",
139 "MaxEntityExpansion"
142 if (prop > _XmlReaderProperty_Last)
143 return wine_dbg_sprintf("unknown property=%d", prop);
145 return prop_names[prop];
148 struct xml_encoding_data
150 const WCHAR *name;
151 xml_encoding enc;
152 UINT cp;
155 static const struct xml_encoding_data xml_encoding_map[] = {
156 { utf16W, XmlEncoding_UTF16, ~0 },
157 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
160 const WCHAR *get_encoding_name(xml_encoding encoding)
162 return xml_encoding_map[encoding].name;
165 xml_encoding get_encoding_from_codepage(UINT codepage)
167 int i;
168 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
170 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
172 return XmlEncoding_Unknown;
175 typedef struct
177 char *data;
178 UINT cur;
179 unsigned int allocated;
180 unsigned int written;
181 } encoded_buffer;
183 typedef struct input_buffer input_buffer;
185 typedef struct
187 IXmlReaderInput IXmlReaderInput_iface;
188 LONG ref;
189 /* reference passed on IXmlReaderInput creation, is kept when input is created */
190 IUnknown *input;
191 IMalloc *imalloc;
192 xml_encoding encoding;
193 BOOL hint;
194 WCHAR *baseuri;
195 /* stream reference set after SetInput() call from reader,
196 stored as sequential stream, cause currently
197 optimizations possible with IStream aren't implemented */
198 ISequentialStream *stream;
199 input_buffer *buffer;
200 unsigned int pending : 1;
201 } xmlreaderinput;
203 static const struct IUnknownVtbl xmlreaderinputvtbl;
205 /* Structure to hold parsed string of specific length.
207 Reader stores node value as 'start' pointer, on request
208 a null-terminated version of it is allocated.
210 To init a strval variable use reader_init_strval(),
211 to set strval as a reader value use reader_set_strval().
213 typedef struct
215 WCHAR *str; /* allocated null-terminated string */
216 UINT len; /* length in WCHARs, altered after ReadValueChunk */
217 UINT start; /* input position where value starts */
218 } strval;
220 static WCHAR emptyW[] = {0};
221 static WCHAR xmlW[] = {'x','m','l',0};
222 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
223 static const strval strval_empty = { emptyW };
224 static const strval strval_xml = { xmlW, 3 };
225 static const strval strval_xmlns = { xmlnsW, 5 };
227 struct attribute
229 struct list entry;
230 strval prefix;
231 strval localname;
232 strval qname;
233 strval value;
236 struct element
238 struct list entry;
239 strval prefix;
240 strval localname;
241 strval qname;
244 struct ns
246 struct list entry;
247 strval prefix;
248 strval uri;
249 struct element *element;
252 typedef struct
254 IXmlReader IXmlReader_iface;
255 LONG ref;
256 xmlreaderinput *input;
257 IMalloc *imalloc;
258 XmlReadState state;
259 XmlReaderInternalState instate;
260 XmlReaderResumeState resumestate;
261 XmlNodeType nodetype;
262 DtdProcessing dtdmode;
263 IXmlResolver *resolver;
264 IUnknown *mlang;
265 UINT line, pos; /* reader position in XML stream */
266 struct list attrs; /* attributes list for current node */
267 struct attribute *attr; /* current attribute */
268 UINT attr_count;
269 struct list nsdef;
270 struct list ns;
271 struct list elements;
272 strval strvalues[StringValue_Last];
273 UINT depth;
274 UINT max_depth;
275 BOOL is_empty_element;
276 struct element empty_element;
277 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
278 } xmlreader;
280 struct input_buffer
282 encoded_buffer utf16;
283 encoded_buffer encoded;
284 UINT code_page;
285 xmlreaderinput *input;
288 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
290 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
293 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
295 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
298 /* reader memory allocation functions */
299 static inline void *reader_alloc(xmlreader *reader, size_t len)
301 return m_alloc(reader->imalloc, len);
304 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
306 void *ret = reader_alloc(reader, len);
307 if (ret)
308 memset(ret, 0, len);
309 return ret;
312 static inline void reader_free(xmlreader *reader, void *mem)
314 m_free(reader->imalloc, mem);
317 /* Just return pointer from offset, no attempt to read more. */
318 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
320 encoded_buffer *buffer = &reader->input->buffer->utf16;
321 return (WCHAR*)buffer->data + offset;
324 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
326 return v->str ? v->str : reader_get_ptr2(reader, v->start);
329 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
331 *dest = *src;
333 if (src->str != strval_empty.str)
335 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
336 if (!dest->str) return E_OUTOFMEMORY;
337 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
338 dest->str[dest->len] = 0;
339 dest->start = 0;
342 return S_OK;
345 /* reader input memory allocation functions */
346 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
348 return m_alloc(input->imalloc, len);
351 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
353 return m_realloc(input->imalloc, mem, len);
356 static inline void readerinput_free(xmlreaderinput *input, void *mem)
358 m_free(input->imalloc, mem);
361 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
363 LPWSTR ret = NULL;
365 if(str) {
366 DWORD size;
368 size = (strlenW(str)+1)*sizeof(WCHAR);
369 ret = readerinput_alloc(input, size);
370 if (ret) memcpy(ret, str, size);
373 return ret;
376 static void reader_clear_attrs(xmlreader *reader)
378 struct attribute *attr, *attr2;
379 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
381 reader_free(reader, attr);
383 list_init(&reader->attrs);
384 reader->attr_count = 0;
385 reader->attr = NULL;
388 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
389 while we are on a node with attributes */
390 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname,
391 strval *value)
393 struct attribute *attr;
395 attr = reader_alloc(reader, sizeof(*attr));
396 if (!attr) return E_OUTOFMEMORY;
398 if (prefix)
399 attr->prefix = *prefix;
400 else
401 memset(&attr->prefix, 0, sizeof(attr->prefix));
402 attr->localname = *localname;
403 attr->qname = qname ? *qname : *localname;
404 attr->value = *value;
405 list_add_tail(&reader->attrs, &attr->entry);
406 reader->attr_count++;
408 return S_OK;
411 /* This one frees stored string value if needed */
412 static void reader_free_strvalued(xmlreader *reader, strval *v)
414 if (v->str != strval_empty.str)
416 reader_free(reader, v->str);
417 *v = strval_empty;
421 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
423 v->start = start;
424 v->len = len;
425 v->str = NULL;
428 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
430 return debugstr_wn(reader_get_strptr(reader, v), v->len);
433 /* used to initialize from constant string */
434 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
436 v->start = 0;
437 v->len = len;
438 v->str = str;
441 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
443 reader_free_strvalued(reader, &reader->strvalues[type]);
446 static void reader_free_strvalues(xmlreader *reader)
448 int type;
449 for (type = 0; type < StringValue_Last; type++)
450 reader_free_strvalue(reader, type);
453 /* This helper should only be used to test if strings are the same,
454 it doesn't try to sort. */
455 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
457 if (str1->len != str2->len) return 0;
458 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
461 static void reader_clear_elements(xmlreader *reader)
463 struct element *elem, *elem2;
464 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
466 reader_free_strvalued(reader, &elem->prefix);
467 reader_free_strvalued(reader, &elem->localname);
468 reader_free_strvalued(reader, &elem->qname);
469 reader_free(reader, elem);
471 list_init(&reader->elements);
472 reader->is_empty_element = FALSE;
475 static HRESULT reader_inc_depth(xmlreader *reader)
477 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
480 static void reader_dec_depth(xmlreader *reader)
482 if (reader->depth)
483 reader->depth--;
486 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
488 struct ns *ns;
489 HRESULT hr;
491 ns = reader_alloc(reader, sizeof(*ns));
492 if (!ns) return E_OUTOFMEMORY;
494 if (def)
495 memset(&ns->prefix, 0, sizeof(ns->prefix));
496 else {
497 hr = reader_strvaldup(reader, prefix, &ns->prefix);
498 if (FAILED(hr)) {
499 reader_free(reader, ns);
500 return hr;
504 hr = reader_strvaldup(reader, uri, &ns->uri);
505 if (FAILED(hr)) {
506 reader_free_strvalued(reader, &ns->prefix);
507 reader_free(reader, ns);
508 return hr;
511 ns->element = NULL;
512 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
513 return hr;
516 static void reader_free_element(xmlreader *reader, struct element *element)
518 reader_free_strvalued(reader, &element->prefix);
519 reader_free_strvalued(reader, &element->localname);
520 reader_free_strvalued(reader, &element->qname);
521 reader_free(reader, element);
524 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
526 struct ns *ns;
528 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
529 if (ns->element)
530 break;
531 ns->element = element;
534 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
535 if (ns->element)
536 break;
537 ns->element = element;
541 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
542 strval *qname)
544 struct element *element;
545 HRESULT hr;
547 element = reader_alloc_zero(reader, sizeof(*element));
548 if (!element)
549 return E_OUTOFMEMORY;
551 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK &&
552 (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK &&
553 (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK)
555 list_add_head(&reader->elements, &element->entry);
556 reader_mark_ns_nodes(reader, element);
557 reader->is_empty_element = FALSE;
559 else
560 reader_free_element(reader, element);
562 return hr;
565 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
567 struct ns *ns, *ns2;
569 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
570 if (ns->element != element)
571 break;
573 list_remove(&ns->entry);
574 reader_free_strvalued(reader, &ns->prefix);
575 reader_free_strvalued(reader, &ns->uri);
576 reader_free(reader, ns);
579 if (!list_empty(&reader->nsdef)) {
580 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
581 if (ns->element == element) {
582 list_remove(&ns->entry);
583 reader_free_strvalued(reader, &ns->prefix);
584 reader_free_strvalued(reader, &ns->uri);
585 reader_free(reader, ns);
590 static void reader_pop_element(xmlreader *reader)
592 struct element *element;
594 if (list_empty(&reader->elements))
595 return;
597 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
598 list_remove(&element->entry);
600 reader_pop_ns_nodes(reader, element);
601 reader_free_element(reader, element);
603 /* It was a root element, the rest is expected as Misc */
604 if (list_empty(&reader->elements))
605 reader->instate = XmlReadInState_MiscEnd;
608 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
609 means node value is to be determined. */
610 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
612 strval *v = &reader->strvalues[type];
614 reader_free_strvalue(reader, type);
615 if (!value)
617 v->str = NULL;
618 v->start = 0;
619 v->len = 0;
620 return;
623 if (value->str == strval_empty.str)
624 *v = *value;
625 else
627 if (type == StringValue_Value)
629 /* defer allocation for value string */
630 v->str = NULL;
631 v->start = value->start;
632 v->len = value->len;
634 else
636 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
637 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
638 v->str[value->len] = 0;
639 v->len = value->len;
644 static inline int is_reader_pending(xmlreader *reader)
646 return reader->input->pending;
649 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
651 const int initial_len = 0x2000;
652 buffer->data = readerinput_alloc(input, initial_len);
653 if (!buffer->data) return E_OUTOFMEMORY;
655 memset(buffer->data, 0, 4);
656 buffer->cur = 0;
657 buffer->allocated = initial_len;
658 buffer->written = 0;
660 return S_OK;
663 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
665 readerinput_free(input, buffer->data);
668 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
670 if (encoding == XmlEncoding_Unknown)
672 FIXME("unsupported encoding %d\n", encoding);
673 return E_NOTIMPL;
676 *cp = xml_encoding_map[encoding].cp;
678 return S_OK;
681 xml_encoding parse_encoding_name(const WCHAR *name, int len)
683 int min, max, n, c;
685 if (!name) return XmlEncoding_Unknown;
687 min = 0;
688 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
690 while (min <= max)
692 n = (min+max)/2;
694 if (len != -1)
695 c = strncmpiW(xml_encoding_map[n].name, name, len);
696 else
697 c = strcmpiW(xml_encoding_map[n].name, name);
698 if (!c)
699 return xml_encoding_map[n].enc;
701 if (c > 0)
702 max = n-1;
703 else
704 min = n+1;
707 return XmlEncoding_Unknown;
710 static HRESULT alloc_input_buffer(xmlreaderinput *input)
712 input_buffer *buffer;
713 HRESULT hr;
715 input->buffer = NULL;
717 buffer = readerinput_alloc(input, sizeof(*buffer));
718 if (!buffer) return E_OUTOFMEMORY;
720 buffer->input = input;
721 buffer->code_page = ~0; /* code page is unknown at this point */
722 hr = init_encoded_buffer(input, &buffer->utf16);
723 if (hr != S_OK) {
724 readerinput_free(input, buffer);
725 return hr;
728 hr = init_encoded_buffer(input, &buffer->encoded);
729 if (hr != S_OK) {
730 free_encoded_buffer(input, &buffer->utf16);
731 readerinput_free(input, buffer);
732 return hr;
735 input->buffer = buffer;
736 return S_OK;
739 static void free_input_buffer(input_buffer *buffer)
741 free_encoded_buffer(buffer->input, &buffer->encoded);
742 free_encoded_buffer(buffer->input, &buffer->utf16);
743 readerinput_free(buffer->input, buffer);
746 static void readerinput_release_stream(xmlreaderinput *readerinput)
748 if (readerinput->stream) {
749 ISequentialStream_Release(readerinput->stream);
750 readerinput->stream = NULL;
754 /* Queries already stored interface for IStream/ISequentialStream.
755 Interface supplied on creation will be overwritten */
756 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
758 HRESULT hr;
760 readerinput_release_stream(readerinput);
761 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
762 if (hr != S_OK)
763 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
765 return hr;
768 /* reads a chunk to raw buffer */
769 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
771 encoded_buffer *buffer = &readerinput->buffer->encoded;
772 /* to make sure aligned length won't exceed allocated length */
773 ULONG len = buffer->allocated - buffer->written - 4;
774 ULONG read;
775 HRESULT hr;
777 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
778 variable width encodings like UTF-8 */
779 len = (len + 3) & ~3;
780 /* try to use allocated space or grow */
781 if (buffer->allocated - buffer->written < len)
783 buffer->allocated *= 2;
784 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
785 len = buffer->allocated - buffer->written;
788 read = 0;
789 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
790 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
791 readerinput->pending = hr == E_PENDING;
792 if (FAILED(hr)) return hr;
793 buffer->written += read;
795 return hr;
798 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
799 static void readerinput_grow(xmlreaderinput *readerinput, int length)
801 encoded_buffer *buffer = &readerinput->buffer->utf16;
803 length *= sizeof(WCHAR);
804 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
805 if (buffer->allocated < buffer->written + length + 4)
807 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
808 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
809 buffer->allocated = grown_size;
813 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
815 static const char startA[] = {'<','?'};
816 static const char commentA[] = {'<','!'};
817 encoded_buffer *buffer = &readerinput->buffer->encoded;
818 unsigned char *ptr = (unsigned char*)buffer->data;
820 return !memcmp(buffer->data, startA, sizeof(startA)) ||
821 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
822 /* test start byte */
823 (ptr[0] == '<' &&
825 (ptr[1] && (ptr[1] <= 0x7f)) ||
826 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
827 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
828 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
832 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
834 encoded_buffer *buffer = &readerinput->buffer->encoded;
835 static const char utf8bom[] = {0xef,0xbb,0xbf};
836 static const char utf16lebom[] = {0xff,0xfe};
837 WCHAR *ptrW;
839 *enc = XmlEncoding_Unknown;
841 if (buffer->written <= 3)
843 HRESULT hr = readerinput_growraw(readerinput);
844 if (FAILED(hr)) return hr;
845 if (buffer->written <= 3) return MX_E_INPUTEND;
848 ptrW = (WCHAR *)buffer->data;
849 /* try start symbols if we have enough data to do that, input buffer should contain
850 first chunk already */
851 if (readerinput_is_utf8(readerinput))
852 *enc = XmlEncoding_UTF8;
853 else if (*ptrW == '<')
855 ptrW++;
856 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
857 *enc = XmlEncoding_UTF16;
859 /* try with BOM now */
860 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
862 buffer->cur += sizeof(utf8bom);
863 *enc = XmlEncoding_UTF8;
865 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
867 buffer->cur += sizeof(utf16lebom);
868 *enc = XmlEncoding_UTF16;
871 return S_OK;
874 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
876 encoded_buffer *buffer = &readerinput->buffer->encoded;
877 int len = buffer->written;
879 /* complete single byte char */
880 if (!(buffer->data[len-1] & 0x80)) return len;
882 /* find start byte of multibyte char */
883 while (--len && !(buffer->data[len] & 0xc0))
886 return len;
889 /* Returns byte length of complete char sequence for buffer code page,
890 it's relative to current buffer position which is currently used for BOM handling
891 only. */
892 static int readerinput_get_convlen(xmlreaderinput *readerinput)
894 encoded_buffer *buffer = &readerinput->buffer->encoded;
895 int len;
897 if (readerinput->buffer->code_page == CP_UTF8)
898 len = readerinput_get_utf8_convlen(readerinput);
899 else
900 len = buffer->written;
902 TRACE("%d\n", len - buffer->cur);
903 return len - buffer->cur;
906 /* It's possible that raw buffer has some leftovers from last conversion - some char
907 sequence that doesn't represent a full code point. Length argument should be calculated with
908 readerinput_get_convlen(), if it's -1 it will be calculated here. */
909 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
911 encoded_buffer *buffer = &readerinput->buffer->encoded;
913 if (len == -1)
914 len = readerinput_get_convlen(readerinput);
916 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
917 /* everything below cur is lost too */
918 buffer->written -= len + buffer->cur;
919 /* after this point we don't need cur offset really,
920 it's used only to mark where actual data begins when first chunk is read */
921 buffer->cur = 0;
924 /* note that raw buffer content is kept */
925 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
927 encoded_buffer *src = &readerinput->buffer->encoded;
928 encoded_buffer *dest = &readerinput->buffer->utf16;
929 int len, dest_len;
930 HRESULT hr;
931 WCHAR *ptr;
932 UINT cp;
934 hr = get_code_page(enc, &cp);
935 if (FAILED(hr)) return;
937 readerinput->buffer->code_page = cp;
938 len = readerinput_get_convlen(readerinput);
940 TRACE("switching to cp %d\n", cp);
942 /* just copy in this case */
943 if (enc == XmlEncoding_UTF16)
945 readerinput_grow(readerinput, len);
946 memcpy(dest->data, src->data + src->cur, len);
947 dest->written += len*sizeof(WCHAR);
948 return;
951 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
952 readerinput_grow(readerinput, dest_len);
953 ptr = (WCHAR*)dest->data;
954 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
955 ptr[dest_len] = 0;
956 dest->written += dest_len*sizeof(WCHAR);
959 /* shrinks parsed data a buffer begins with */
960 static void reader_shrink(xmlreader *reader)
962 encoded_buffer *buffer = &reader->input->buffer->utf16;
964 /* avoid to move too often using threshold shrink length */
965 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
967 buffer->written -= buffer->cur*sizeof(WCHAR);
968 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
969 buffer->cur = 0;
970 *(WCHAR*)&buffer->data[buffer->written] = 0;
974 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
975 It won't attempt to shrink but will grow destination buffer if needed */
976 static HRESULT reader_more(xmlreader *reader)
978 xmlreaderinput *readerinput = reader->input;
979 encoded_buffer *src = &readerinput->buffer->encoded;
980 encoded_buffer *dest = &readerinput->buffer->utf16;
981 UINT cp = readerinput->buffer->code_page;
982 int len, dest_len;
983 HRESULT hr;
984 WCHAR *ptr;
986 /* get some raw data from stream first */
987 hr = readerinput_growraw(readerinput);
988 len = readerinput_get_convlen(readerinput);
990 /* just copy for UTF-16 case */
991 if (cp == ~0)
993 readerinput_grow(readerinput, len);
994 memcpy(dest->data + dest->written, src->data + src->cur, len);
995 dest->written += len*sizeof(WCHAR);
996 return hr;
999 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1000 readerinput_grow(readerinput, dest_len);
1001 ptr = (WCHAR*)(dest->data + dest->written);
1002 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1003 ptr[dest_len] = 0;
1004 dest->written += dest_len*sizeof(WCHAR);
1005 /* get rid of processed data */
1006 readerinput_shrinkraw(readerinput, len);
1008 return hr;
1011 static inline UINT reader_get_cur(xmlreader *reader)
1013 return reader->input->buffer->utf16.cur;
1016 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1018 encoded_buffer *buffer = &reader->input->buffer->utf16;
1019 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1020 if (!*ptr) reader_more(reader);
1021 return (WCHAR*)buffer->data + buffer->cur;
1024 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1026 int i=0;
1027 const WCHAR *ptr = reader_get_ptr(reader);
1028 while (str[i])
1030 if (!ptr[i])
1032 reader_more(reader);
1033 ptr = reader_get_ptr(reader);
1035 if (str[i] != ptr[i])
1036 return ptr[i] - str[i];
1037 i++;
1039 return 0;
1042 /* moves cursor n WCHARs forward */
1043 static void reader_skipn(xmlreader *reader, int n)
1045 encoded_buffer *buffer = &reader->input->buffer->utf16;
1046 const WCHAR *ptr = reader_get_ptr(reader);
1048 while (*ptr++ && n--)
1050 buffer->cur++;
1051 reader->pos++;
1055 static inline BOOL is_wchar_space(WCHAR ch)
1057 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1060 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1061 static int reader_skipspaces(xmlreader *reader)
1063 encoded_buffer *buffer = &reader->input->buffer->utf16;
1064 const WCHAR *ptr = reader_get_ptr(reader);
1065 UINT start = reader_get_cur(reader);
1067 while (is_wchar_space(*ptr))
1069 if (*ptr == '\r')
1070 reader->pos = 0;
1071 else if (*ptr == '\n')
1073 reader->line++;
1074 reader->pos = 0;
1076 else
1077 reader->pos++;
1079 buffer->cur++;
1080 ptr = reader_get_ptr(reader);
1083 return reader_get_cur(reader) - start;
1086 /* [26] VersionNum ::= '1.' [0-9]+ */
1087 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1089 static const WCHAR onedotW[] = {'1','.',0};
1090 WCHAR *ptr, *ptr2;
1091 UINT start;
1093 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1095 start = reader_get_cur(reader);
1096 /* skip "1." */
1097 reader_skipn(reader, 2);
1099 ptr2 = ptr = reader_get_ptr(reader);
1100 while (*ptr >= '0' && *ptr <= '9')
1102 reader_skipn(reader, 1);
1103 ptr = reader_get_ptr(reader);
1106 if (ptr2 == ptr) return WC_E_DIGIT;
1107 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1108 TRACE("version=%s\n", debug_strval(reader, val));
1109 return S_OK;
1112 /* [25] Eq ::= S? '=' S? */
1113 static HRESULT reader_parse_eq(xmlreader *reader)
1115 static const WCHAR eqW[] = {'=',0};
1116 reader_skipspaces(reader);
1117 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1118 /* skip '=' */
1119 reader_skipn(reader, 1);
1120 reader_skipspaces(reader);
1121 return S_OK;
1124 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1125 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1127 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1128 strval val, name;
1129 HRESULT hr;
1131 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1133 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1134 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1135 /* skip 'version' */
1136 reader_skipn(reader, 7);
1138 hr = reader_parse_eq(reader);
1139 if (FAILED(hr)) return hr;
1141 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1142 return WC_E_QUOTE;
1143 /* skip "'"|'"' */
1144 reader_skipn(reader, 1);
1146 hr = reader_parse_versionnum(reader, &val);
1147 if (FAILED(hr)) return hr;
1149 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1150 return WC_E_QUOTE;
1152 /* skip "'"|'"' */
1153 reader_skipn(reader, 1);
1155 return reader_add_attr(reader, NULL, &name, NULL, &val);
1158 /* ([A-Za-z0-9._] | '-') */
1159 static inline BOOL is_wchar_encname(WCHAR ch)
1161 return ((ch >= 'A' && ch <= 'Z') ||
1162 (ch >= 'a' && ch <= 'z') ||
1163 (ch >= '0' && ch <= '9') ||
1164 (ch == '.') || (ch == '_') ||
1165 (ch == '-'));
1168 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1169 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1171 WCHAR *start = reader_get_ptr(reader), *ptr;
1172 xml_encoding enc;
1173 int len;
1175 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1176 return WC_E_ENCNAME;
1178 val->start = reader_get_cur(reader);
1180 ptr = start;
1181 while (is_wchar_encname(*++ptr))
1184 len = ptr - start;
1185 enc = parse_encoding_name(start, len);
1186 TRACE("encoding name %s\n", debugstr_wn(start, len));
1187 val->str = start;
1188 val->len = len;
1190 if (enc == XmlEncoding_Unknown)
1191 return WC_E_ENCNAME;
1193 /* skip encoding name */
1194 reader_skipn(reader, len);
1195 return S_OK;
1198 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1199 static HRESULT reader_parse_encdecl(xmlreader *reader)
1201 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1202 strval name, val;
1203 HRESULT hr;
1205 if (!reader_skipspaces(reader)) return S_FALSE;
1207 if (reader_cmp(reader, encodingW)) return S_FALSE;
1208 name.str = reader_get_ptr(reader);
1209 name.start = reader_get_cur(reader);
1210 name.len = 8;
1211 /* skip 'encoding' */
1212 reader_skipn(reader, 8);
1214 hr = reader_parse_eq(reader);
1215 if (FAILED(hr)) return hr;
1217 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1218 return WC_E_QUOTE;
1219 /* skip "'"|'"' */
1220 reader_skipn(reader, 1);
1222 hr = reader_parse_encname(reader, &val);
1223 if (FAILED(hr)) return hr;
1225 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1226 return WC_E_QUOTE;
1228 /* skip "'"|'"' */
1229 reader_skipn(reader, 1);
1231 return reader_add_attr(reader, NULL, &name, NULL, &val);
1234 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1235 static HRESULT reader_parse_sddecl(xmlreader *reader)
1237 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1238 static const WCHAR yesW[] = {'y','e','s',0};
1239 static const WCHAR noW[] = {'n','o',0};
1240 strval name, val;
1241 UINT start;
1242 HRESULT hr;
1244 if (!reader_skipspaces(reader)) return S_FALSE;
1246 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1247 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1248 /* skip 'standalone' */
1249 reader_skipn(reader, 10);
1251 hr = reader_parse_eq(reader);
1252 if (FAILED(hr)) return hr;
1254 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1255 return WC_E_QUOTE;
1256 /* skip "'"|'"' */
1257 reader_skipn(reader, 1);
1259 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1260 return WC_E_XMLDECL;
1262 start = reader_get_cur(reader);
1263 /* skip 'yes'|'no' */
1264 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1265 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1266 TRACE("standalone=%s\n", debug_strval(reader, &val));
1268 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1269 return WC_E_QUOTE;
1270 /* skip "'"|'"' */
1271 reader_skipn(reader, 1);
1273 return reader_add_attr(reader, NULL, &name, NULL, &val);
1276 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1277 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1279 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1280 static const WCHAR declcloseW[] = {'?','>',0};
1281 HRESULT hr;
1283 /* check if we have "<?xml " */
1284 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1286 reader_skipn(reader, 5);
1287 hr = reader_parse_versioninfo(reader);
1288 if (FAILED(hr))
1289 return hr;
1291 hr = reader_parse_encdecl(reader);
1292 if (FAILED(hr))
1293 return hr;
1295 hr = reader_parse_sddecl(reader);
1296 if (FAILED(hr))
1297 return hr;
1299 reader_skipspaces(reader);
1300 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1301 reader_skipn(reader, 2);
1303 reader->nodetype = XmlNodeType_XmlDeclaration;
1304 reader_set_strvalue(reader, StringValue_LocalName, &strval_xml);
1305 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml);
1306 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1308 return S_OK;
1311 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1312 static HRESULT reader_parse_comment(xmlreader *reader)
1314 WCHAR *ptr;
1315 UINT start;
1317 if (reader->resumestate == XmlReadResumeState_Comment)
1319 start = reader->resume[XmlReadResume_Body];
1320 ptr = reader_get_ptr(reader);
1322 else
1324 /* skip '<!--' */
1325 reader_skipn(reader, 4);
1326 reader_shrink(reader);
1327 ptr = reader_get_ptr(reader);
1328 start = reader_get_cur(reader);
1329 reader->nodetype = XmlNodeType_Comment;
1330 reader->resume[XmlReadResume_Body] = start;
1331 reader->resumestate = XmlReadResumeState_Comment;
1332 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1333 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1334 reader_set_strvalue(reader, StringValue_Value, NULL);
1337 /* will exit when there's no more data, it won't attempt to
1338 read more from stream */
1339 while (*ptr)
1341 if (ptr[0] == '-')
1343 if (ptr[1] == '-')
1345 if (ptr[2] == '>')
1347 strval value;
1349 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1350 TRACE("%s\n", debug_strval(reader, &value));
1352 /* skip rest of markup '->' */
1353 reader_skipn(reader, 3);
1355 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1356 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1357 reader_set_strvalue(reader, StringValue_Value, &value);
1358 reader->resume[XmlReadResume_Body] = 0;
1359 reader->resumestate = XmlReadResumeState_Initial;
1360 return S_OK;
1362 else
1363 return WC_E_COMMENT;
1367 reader_skipn(reader, 1);
1368 ptr++;
1371 return S_OK;
1374 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1375 static inline BOOL is_char(WCHAR ch)
1377 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1378 (ch >= 0x20 && ch <= 0xd7ff) ||
1379 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1380 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1381 (ch >= 0xe000 && ch <= 0xfffd);
1384 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1385 static inline BOOL is_pubchar(WCHAR ch)
1387 return (ch == ' ') ||
1388 (ch >= 'a' && ch <= 'z') ||
1389 (ch >= 'A' && ch <= 'Z') ||
1390 (ch >= '0' && ch <= '9') ||
1391 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1392 (ch == '=') || (ch == '?') ||
1393 (ch == '@') || (ch == '!') ||
1394 (ch >= '#' && ch <= '%') || /* #$% */
1395 (ch == '_') || (ch == '\r') || (ch == '\n');
1398 static inline BOOL is_namestartchar(WCHAR ch)
1400 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1401 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1402 (ch >= 0xc0 && ch <= 0xd6) ||
1403 (ch >= 0xd8 && ch <= 0xf6) ||
1404 (ch >= 0xf8 && ch <= 0x2ff) ||
1405 (ch >= 0x370 && ch <= 0x37d) ||
1406 (ch >= 0x37f && ch <= 0x1fff) ||
1407 (ch >= 0x200c && ch <= 0x200d) ||
1408 (ch >= 0x2070 && ch <= 0x218f) ||
1409 (ch >= 0x2c00 && ch <= 0x2fef) ||
1410 (ch >= 0x3001 && ch <= 0xd7ff) ||
1411 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1412 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1413 (ch >= 0xf900 && ch <= 0xfdcf) ||
1414 (ch >= 0xfdf0 && ch <= 0xfffd);
1417 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1418 static inline BOOL is_ncnamechar(WCHAR ch)
1420 return (ch >= 'A' && ch <= 'Z') ||
1421 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1422 (ch == '-') || (ch == '.') ||
1423 (ch >= '0' && ch <= '9') ||
1424 (ch == 0xb7) ||
1425 (ch >= 0xc0 && ch <= 0xd6) ||
1426 (ch >= 0xd8 && ch <= 0xf6) ||
1427 (ch >= 0xf8 && ch <= 0x2ff) ||
1428 (ch >= 0x300 && ch <= 0x36f) ||
1429 (ch >= 0x370 && ch <= 0x37d) ||
1430 (ch >= 0x37f && ch <= 0x1fff) ||
1431 (ch >= 0x200c && ch <= 0x200d) ||
1432 (ch >= 0x203f && ch <= 0x2040) ||
1433 (ch >= 0x2070 && ch <= 0x218f) ||
1434 (ch >= 0x2c00 && ch <= 0x2fef) ||
1435 (ch >= 0x3001 && ch <= 0xd7ff) ||
1436 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1437 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1438 (ch >= 0xf900 && ch <= 0xfdcf) ||
1439 (ch >= 0xfdf0 && ch <= 0xfffd);
1442 static inline BOOL is_namechar(WCHAR ch)
1444 return (ch == ':') || is_ncnamechar(ch);
1447 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1449 /* When we're on attribute always return attribute type, container node type is kept.
1450 Note that container is not necessarily an element, and attribute doesn't mean it's
1451 an attribute in XML spec terms. */
1452 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1455 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1456 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1457 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1458 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1459 [5] Name ::= NameStartChar (NameChar)* */
1460 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1462 WCHAR *ptr;
1463 UINT start;
1465 if (reader->resume[XmlReadResume_Name])
1467 start = reader->resume[XmlReadResume_Name];
1468 ptr = reader_get_ptr(reader);
1470 else
1472 ptr = reader_get_ptr(reader);
1473 start = reader_get_cur(reader);
1474 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1477 while (is_namechar(*ptr))
1479 reader_skipn(reader, 1);
1480 ptr = reader_get_ptr(reader);
1483 if (is_reader_pending(reader))
1485 reader->resume[XmlReadResume_Name] = start;
1486 return E_PENDING;
1488 else
1489 reader->resume[XmlReadResume_Name] = 0;
1491 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1492 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1494 return S_OK;
1497 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1498 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1500 static const WCHAR xmlW[] = {'x','m','l'};
1501 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1502 strval name;
1503 WCHAR *ptr;
1504 HRESULT hr;
1505 UINT i;
1507 hr = reader_parse_name(reader, &name);
1508 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1510 /* now that we got name check for illegal content */
1511 if (strval_eq(reader, &name, &xmlval))
1512 return WC_E_LEADINGXML;
1514 /* PITarget can't be a qualified name */
1515 ptr = reader_get_strptr(reader, &name);
1516 for (i = 0; i < name.len; i++)
1517 if (ptr[i] == ':')
1518 return i ? NC_E_NAMECOLON : WC_E_PI;
1520 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1521 *target = name;
1522 return S_OK;
1525 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1526 static HRESULT reader_parse_pi(xmlreader *reader)
1528 strval target;
1529 WCHAR *ptr;
1530 UINT start;
1531 HRESULT hr;
1533 switch (reader->resumestate)
1535 case XmlReadResumeState_Initial:
1536 /* skip '<?' */
1537 reader_skipn(reader, 2);
1538 reader_shrink(reader);
1539 reader->resumestate = XmlReadResumeState_PITarget;
1540 case XmlReadResumeState_PITarget:
1541 hr = reader_parse_pitarget(reader, &target);
1542 if (FAILED(hr)) return hr;
1543 reader_set_strvalue(reader, StringValue_LocalName, &target);
1544 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1545 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1546 reader->resumestate = XmlReadResumeState_PIBody;
1547 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1548 default:
1552 start = reader->resume[XmlReadResume_Body];
1553 ptr = reader_get_ptr(reader);
1554 while (*ptr)
1556 if (ptr[0] == '?')
1558 if (ptr[1] == '>')
1560 UINT cur = reader_get_cur(reader);
1561 strval value;
1563 /* strip all leading whitespace chars */
1564 while (start < cur)
1566 ptr = reader_get_ptr2(reader, start);
1567 if (!is_wchar_space(*ptr)) break;
1568 start++;
1571 reader_init_strvalue(start, cur-start, &value);
1573 /* skip '?>' */
1574 reader_skipn(reader, 2);
1575 TRACE("%s\n", debug_strval(reader, &value));
1576 reader->nodetype = XmlNodeType_ProcessingInstruction;
1577 reader->resumestate = XmlReadResumeState_Initial;
1578 reader->resume[XmlReadResume_Body] = 0;
1579 reader_set_strvalue(reader, StringValue_Value, &value);
1580 return S_OK;
1584 reader_skipn(reader, 1);
1585 ptr = reader_get_ptr(reader);
1588 return S_OK;
1591 /* This one is used to parse significant whitespace nodes, like in Misc production */
1592 static HRESULT reader_parse_whitespace(xmlreader *reader)
1594 switch (reader->resumestate)
1596 case XmlReadResumeState_Initial:
1597 reader_shrink(reader);
1598 reader->resumestate = XmlReadResumeState_Whitespace;
1599 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1600 reader->nodetype = XmlNodeType_Whitespace;
1601 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1602 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1603 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1604 /* fallthrough */
1605 case XmlReadResumeState_Whitespace:
1607 strval value;
1608 UINT start;
1610 reader_skipspaces(reader);
1611 if (is_reader_pending(reader)) return S_OK;
1613 start = reader->resume[XmlReadResume_Body];
1614 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1615 reader_set_strvalue(reader, StringValue_Value, &value);
1616 TRACE("%s\n", debug_strval(reader, &value));
1617 reader->resumestate = XmlReadResumeState_Initial;
1619 default:
1623 return S_OK;
1626 /* [27] Misc ::= Comment | PI | S */
1627 static HRESULT reader_parse_misc(xmlreader *reader)
1629 HRESULT hr = S_FALSE;
1631 if (reader->resumestate != XmlReadResumeState_Initial)
1633 hr = reader_more(reader);
1634 if (FAILED(hr)) return hr;
1636 /* finish current node */
1637 switch (reader->resumestate)
1639 case XmlReadResumeState_PITarget:
1640 case XmlReadResumeState_PIBody:
1641 return reader_parse_pi(reader);
1642 case XmlReadResumeState_Comment:
1643 return reader_parse_comment(reader);
1644 case XmlReadResumeState_Whitespace:
1645 return reader_parse_whitespace(reader);
1646 default:
1647 ERR("unknown resume state %d\n", reader->resumestate);
1651 while (1)
1653 const WCHAR *cur = reader_get_ptr(reader);
1655 if (is_wchar_space(*cur))
1656 hr = reader_parse_whitespace(reader);
1657 else if (!reader_cmp(reader, commentW))
1658 hr = reader_parse_comment(reader);
1659 else if (!reader_cmp(reader, piW))
1660 hr = reader_parse_pi(reader);
1661 else
1662 break;
1664 if (hr != S_FALSE) return hr;
1667 return hr;
1670 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1671 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1673 WCHAR *cur = reader_get_ptr(reader), quote;
1674 UINT start;
1676 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1678 quote = *cur;
1679 reader_skipn(reader, 1);
1681 cur = reader_get_ptr(reader);
1682 start = reader_get_cur(reader);
1683 while (is_char(*cur) && *cur != quote)
1685 reader_skipn(reader, 1);
1686 cur = reader_get_ptr(reader);
1688 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1689 if (*cur == quote) reader_skipn(reader, 1);
1691 TRACE("%s\n", debug_strval(reader, literal));
1692 return S_OK;
1695 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1696 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1697 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1699 WCHAR *cur = reader_get_ptr(reader), quote;
1700 UINT start;
1702 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1704 quote = *cur;
1705 reader_skipn(reader, 1);
1707 start = reader_get_cur(reader);
1708 cur = reader_get_ptr(reader);
1709 while (is_pubchar(*cur) && *cur != quote)
1711 reader_skipn(reader, 1);
1712 cur = reader_get_ptr(reader);
1714 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1715 if (*cur == quote) reader_skipn(reader, 1);
1717 TRACE("%s\n", debug_strval(reader, literal));
1718 return S_OK;
1721 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1722 static HRESULT reader_parse_externalid(xmlreader *reader)
1724 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1725 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1726 strval name, sys;
1727 HRESULT hr;
1728 int cnt;
1730 if (!reader_cmp(reader, publicW)) {
1731 strval pub;
1733 /* public id */
1734 reader_skipn(reader, 6);
1735 cnt = reader_skipspaces(reader);
1736 if (!cnt) return WC_E_WHITESPACE;
1738 hr = reader_parse_pub_literal(reader, &pub);
1739 if (FAILED(hr)) return hr;
1741 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1742 hr = reader_add_attr(reader, NULL, &name, NULL, &pub);
1743 if (FAILED(hr)) return hr;
1745 cnt = reader_skipspaces(reader);
1746 if (!cnt) return S_OK;
1748 /* optional system id */
1749 hr = reader_parse_sys_literal(reader, &sys);
1750 if (FAILED(hr)) return S_OK;
1752 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1753 hr = reader_add_attr(reader, NULL, &name, NULL, &sys);
1754 if (FAILED(hr)) return hr;
1756 return S_OK;
1757 } else if (!reader_cmp(reader, systemW)) {
1758 /* system id */
1759 reader_skipn(reader, 6);
1760 cnt = reader_skipspaces(reader);
1761 if (!cnt) return WC_E_WHITESPACE;
1763 hr = reader_parse_sys_literal(reader, &sys);
1764 if (FAILED(hr)) return hr;
1766 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1767 return reader_add_attr(reader, NULL, &name, NULL, &sys);
1770 return S_FALSE;
1773 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1774 static HRESULT reader_parse_dtd(xmlreader *reader)
1776 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1777 strval name;
1778 WCHAR *cur;
1779 HRESULT hr;
1781 /* check if we have "<!DOCTYPE" */
1782 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1783 reader_shrink(reader);
1785 /* DTD processing is not allowed by default */
1786 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1788 reader_skipn(reader, 9);
1789 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1791 /* name */
1792 hr = reader_parse_name(reader, &name);
1793 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1795 reader_skipspaces(reader);
1797 hr = reader_parse_externalid(reader);
1798 if (FAILED(hr)) return hr;
1800 reader_skipspaces(reader);
1802 cur = reader_get_ptr(reader);
1803 if (*cur != '>')
1805 FIXME("internal subset parsing not implemented\n");
1806 return E_NOTIMPL;
1809 /* skip '>' */
1810 reader_skipn(reader, 1);
1812 reader->nodetype = XmlNodeType_DocumentType;
1813 reader_set_strvalue(reader, StringValue_LocalName, &name);
1814 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1816 return S_OK;
1819 /* [11 NS] LocalPart ::= NCName */
1820 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1822 WCHAR *ptr;
1823 UINT start;
1825 if (reader->resume[XmlReadResume_Local])
1827 start = reader->resume[XmlReadResume_Local];
1828 ptr = reader_get_ptr(reader);
1830 else
1832 ptr = reader_get_ptr(reader);
1833 start = reader_get_cur(reader);
1836 while (is_ncnamechar(*ptr))
1838 reader_skipn(reader, 1);
1839 ptr = reader_get_ptr(reader);
1842 if (is_reader_pending(reader))
1844 reader->resume[XmlReadResume_Local] = start;
1845 return E_PENDING;
1847 else
1848 reader->resume[XmlReadResume_Local] = 0;
1850 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1852 return S_OK;
1855 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1856 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1857 [9 NS] UnprefixedName ::= LocalPart
1858 [10 NS] Prefix ::= NCName */
1859 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1861 WCHAR *ptr;
1862 UINT start;
1863 HRESULT hr;
1865 if (reader->resume[XmlReadResume_Name])
1867 start = reader->resume[XmlReadResume_Name];
1868 ptr = reader_get_ptr(reader);
1870 else
1872 ptr = reader_get_ptr(reader);
1873 start = reader_get_cur(reader);
1874 reader->resume[XmlReadResume_Name] = start;
1875 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1878 if (reader->resume[XmlReadResume_Local])
1880 hr = reader_parse_local(reader, local);
1881 if (FAILED(hr)) return hr;
1883 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1884 local->start - reader->resume[XmlReadResume_Name] - 1,
1885 prefix);
1887 else
1889 /* skip prefix part */
1890 while (is_ncnamechar(*ptr))
1892 reader_skipn(reader, 1);
1893 ptr = reader_get_ptr(reader);
1896 if (is_reader_pending(reader)) return E_PENDING;
1898 /* got a qualified name */
1899 if (*ptr == ':')
1901 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1903 /* skip ':' */
1904 reader_skipn(reader, 1);
1905 hr = reader_parse_local(reader, local);
1906 if (FAILED(hr)) return hr;
1908 else
1910 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1911 reader_init_strvalue(0, 0, prefix);
1915 if (prefix->len)
1916 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1917 else
1918 TRACE("ncname %s\n", debug_strval(reader, local));
1920 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1921 /* count ':' too */
1922 (prefix->len ? prefix->len + 1 : 0) + local->len,
1923 qname);
1925 reader->resume[XmlReadResume_Name] = 0;
1926 reader->resume[XmlReadResume_Local] = 0;
1928 return S_OK;
1931 /* Applies normalization rules to a single char, used for attribute values.
1933 Rules include 2 steps:
1935 1) replacing \r\n with a single \n;
1936 2) replacing all whitespace chars with ' '.
1939 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1941 encoded_buffer *buffer = &reader->input->buffer->utf16;
1943 if (!is_wchar_space(*ptr)) return;
1945 if (*ptr == '\r' && *(ptr+1) == '\n')
1947 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1948 memmove(ptr+1, ptr+2, len);
1950 *ptr = ' ';
1953 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1955 static const WCHAR entltW[] = {'l','t'};
1956 static const WCHAR entgtW[] = {'g','t'};
1957 static const WCHAR entampW[] = {'a','m','p'};
1958 static const WCHAR entaposW[] = {'a','p','o','s'};
1959 static const WCHAR entquotW[] = {'q','u','o','t'};
1960 static const strval lt = { (WCHAR*)entltW, 2 };
1961 static const strval gt = { (WCHAR*)entgtW, 2 };
1962 static const strval amp = { (WCHAR*)entampW, 3 };
1963 static const strval apos = { (WCHAR*)entaposW, 4 };
1964 static const strval quot = { (WCHAR*)entquotW, 4 };
1965 WCHAR *str = reader_get_strptr(reader, name);
1967 switch (*str)
1969 case 'l':
1970 if (strval_eq(reader, name, &lt)) return '<';
1971 break;
1972 case 'g':
1973 if (strval_eq(reader, name, &gt)) return '>';
1974 break;
1975 case 'a':
1976 if (strval_eq(reader, name, &amp))
1977 return '&';
1978 else if (strval_eq(reader, name, &apos))
1979 return '\'';
1980 break;
1981 case 'q':
1982 if (strval_eq(reader, name, &quot)) return '\"';
1983 break;
1984 default:
1988 return 0;
1991 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1992 [67] Reference ::= EntityRef | CharRef
1993 [68] EntityRef ::= '&' Name ';' */
1994 static HRESULT reader_parse_reference(xmlreader *reader)
1996 encoded_buffer *buffer = &reader->input->buffer->utf16;
1997 WCHAR *start = reader_get_ptr(reader), *ptr;
1998 UINT cur = reader_get_cur(reader);
1999 WCHAR ch = 0;
2000 int len;
2002 /* skip '&' */
2003 reader_skipn(reader, 1);
2004 ptr = reader_get_ptr(reader);
2006 if (*ptr == '#')
2008 reader_skipn(reader, 1);
2009 ptr = reader_get_ptr(reader);
2011 /* hex char or decimal */
2012 if (*ptr == 'x')
2014 reader_skipn(reader, 1);
2015 ptr = reader_get_ptr(reader);
2017 while (*ptr != ';')
2019 if ((*ptr >= '0' && *ptr <= '9'))
2020 ch = ch*16 + *ptr - '0';
2021 else if ((*ptr >= 'a' && *ptr <= 'f'))
2022 ch = ch*16 + *ptr - 'a' + 10;
2023 else if ((*ptr >= 'A' && *ptr <= 'F'))
2024 ch = ch*16 + *ptr - 'A' + 10;
2025 else
2026 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2027 reader_skipn(reader, 1);
2028 ptr = reader_get_ptr(reader);
2031 else
2033 while (*ptr != ';')
2035 if ((*ptr >= '0' && *ptr <= '9'))
2037 ch = ch*10 + *ptr - '0';
2038 reader_skipn(reader, 1);
2039 ptr = reader_get_ptr(reader);
2041 else
2042 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2046 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2048 /* normalize */
2049 if (is_wchar_space(ch)) ch = ' ';
2051 ptr = reader_get_ptr(reader);
2052 start = reader_get_ptr2(reader, cur);
2053 len = buffer->written - ((char *)ptr - buffer->data);
2054 memmove(start + 1, ptr + 1, len);
2056 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2057 buffer->cur = cur + 1;
2059 *start = ch;
2061 else
2063 strval name;
2064 HRESULT hr;
2066 hr = reader_parse_name(reader, &name);
2067 if (FAILED(hr)) return hr;
2069 ptr = reader_get_ptr(reader);
2070 if (*ptr != ';') return WC_E_SEMICOLON;
2072 /* predefined entities resolve to a single character */
2073 ch = get_predefined_entity(reader, &name);
2074 if (ch)
2076 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2077 memmove(start+1, ptr+1, len);
2078 buffer->cur = cur + 1;
2080 *start = ch;
2082 else
2084 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2085 return WC_E_UNDECLAREDENTITY;
2090 return S_OK;
2093 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2094 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2096 WCHAR *ptr, quote;
2097 UINT start;
2099 ptr = reader_get_ptr(reader);
2101 /* skip opening quote */
2102 quote = *ptr;
2103 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2104 reader_skipn(reader, 1);
2106 ptr = reader_get_ptr(reader);
2107 start = reader_get_cur(reader);
2108 while (*ptr)
2110 if (*ptr == '<') return WC_E_LESSTHAN;
2112 if (*ptr == quote)
2114 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2115 /* skip closing quote */
2116 reader_skipn(reader, 1);
2117 return S_OK;
2120 if (*ptr == '&')
2122 HRESULT hr = reader_parse_reference(reader);
2123 if (FAILED(hr)) return hr;
2125 else
2127 reader_normalize_space(reader, ptr);
2128 reader_skipn(reader, 1);
2130 ptr = reader_get_ptr(reader);
2133 return WC_E_QUOTE;
2136 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2137 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2138 [3 NS] DefaultAttName ::= 'xmlns'
2139 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2140 static HRESULT reader_parse_attribute(xmlreader *reader)
2142 strval prefix, local, qname, value;
2143 BOOL ns = FALSE, nsdef = FALSE;
2144 HRESULT hr;
2146 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2147 if (FAILED(hr)) return hr;
2149 if (strval_eq(reader, &prefix, &strval_xmlns))
2150 ns = TRUE;
2152 if (strval_eq(reader, &qname, &strval_xmlns))
2153 ns = nsdef = TRUE;
2155 hr = reader_parse_eq(reader);
2156 if (FAILED(hr)) return hr;
2158 hr = reader_parse_attvalue(reader, &value);
2159 if (FAILED(hr)) return hr;
2161 if (ns)
2162 reader_push_ns(reader, nsdef ? &strval_xmlns : &local, &value, nsdef);
2164 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2165 return reader_add_attr(reader, &prefix, &local, &qname, &value);
2168 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2169 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2170 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2172 HRESULT hr;
2174 hr = reader_parse_qname(reader, prefix, local, qname);
2175 if (FAILED(hr)) return hr;
2177 while (1)
2179 static const WCHAR endW[] = {'/','>',0};
2181 reader_skipspaces(reader);
2183 /* empty element */
2184 if ((*empty = !reader_cmp(reader, endW)))
2186 /* skip '/>' */
2187 reader_skipn(reader, 2);
2188 reader->is_empty_element = TRUE;
2189 reader->empty_element.prefix = *prefix;
2190 reader->empty_element.localname = *local;
2191 reader->empty_element.qname = *qname;
2192 reader_mark_ns_nodes(reader, &reader->empty_element);
2193 return S_OK;
2196 /* got a start tag */
2197 if (!reader_cmp(reader, gtW))
2199 /* skip '>' */
2200 reader_skipn(reader, 1);
2201 return reader_push_element(reader, prefix, local, qname);
2204 hr = reader_parse_attribute(reader);
2205 if (FAILED(hr)) return hr;
2208 return S_OK;
2211 /* [39] element ::= EmptyElemTag | STag content ETag */
2212 static HRESULT reader_parse_element(xmlreader *reader)
2214 HRESULT hr;
2216 switch (reader->resumestate)
2218 case XmlReadResumeState_Initial:
2219 /* check if we are really on element */
2220 if (reader_cmp(reader, ltW)) return S_FALSE;
2222 /* skip '<' */
2223 reader_skipn(reader, 1);
2225 reader_shrink(reader);
2226 reader->resumestate = XmlReadResumeState_STag;
2227 case XmlReadResumeState_STag:
2229 strval qname, prefix, local;
2230 int empty = 0;
2232 /* this handles empty elements too */
2233 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2234 if (FAILED(hr)) return hr;
2236 /* FIXME: need to check for defined namespace to reject invalid prefix */
2238 /* if we got empty element and stack is empty go straight to Misc */
2239 if (empty && list_empty(&reader->elements))
2240 reader->instate = XmlReadInState_MiscEnd;
2241 else
2242 reader->instate = XmlReadInState_Content;
2244 reader->nodetype = XmlNodeType_Element;
2245 reader->resumestate = XmlReadResumeState_Initial;
2246 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2247 reader_set_strvalue(reader, StringValue_LocalName, &local);
2248 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2249 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
2250 break;
2252 default:
2253 hr = E_FAIL;
2256 return hr;
2259 /* [13 NS] ETag ::= '</' QName S? '>' */
2260 static HRESULT reader_parse_endtag(xmlreader *reader)
2262 strval prefix, local, qname;
2263 struct element *elem;
2264 HRESULT hr;
2266 /* skip '</' */
2267 reader_skipn(reader, 2);
2269 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2270 if (FAILED(hr)) return hr;
2272 reader_skipspaces(reader);
2274 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2276 /* skip '>' */
2277 reader_skipn(reader, 1);
2279 /* Element stack should never be empty at this point, cause we shouldn't get to
2280 content parsing if it's empty. */
2281 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2282 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2284 reader->nodetype = XmlNodeType_EndElement;
2285 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2286 reader_set_strvalue(reader, StringValue_LocalName, &local);
2287 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2289 return S_OK;
2292 /* [18] CDSect ::= CDStart CData CDEnd
2293 [19] CDStart ::= '<![CDATA['
2294 [20] CData ::= (Char* - (Char* ']]>' Char*))
2295 [21] CDEnd ::= ']]>' */
2296 static HRESULT reader_parse_cdata(xmlreader *reader)
2298 WCHAR *ptr;
2299 UINT start;
2301 if (reader->resumestate == XmlReadResumeState_CDATA)
2303 start = reader->resume[XmlReadResume_Body];
2304 ptr = reader_get_ptr(reader);
2306 else
2308 /* skip markup '<![CDATA[' */
2309 reader_skipn(reader, 9);
2310 reader_shrink(reader);
2311 ptr = reader_get_ptr(reader);
2312 start = reader_get_cur(reader);
2313 reader->nodetype = XmlNodeType_CDATA;
2314 reader->resume[XmlReadResume_Body] = start;
2315 reader->resumestate = XmlReadResumeState_CDATA;
2316 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2317 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2318 reader_set_strvalue(reader, StringValue_Value, NULL);
2321 while (*ptr)
2323 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2325 strval value;
2327 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2329 /* skip ']]>' */
2330 reader_skipn(reader, 3);
2331 TRACE("%s\n", debug_strval(reader, &value));
2333 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2334 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2335 reader_set_strvalue(reader, StringValue_Value, &value);
2336 reader->resume[XmlReadResume_Body] = 0;
2337 reader->resumestate = XmlReadResumeState_Initial;
2338 return S_OK;
2340 else
2342 /* Value normalization is not fully implemented, rules are:
2344 - single '\r' -> '\n';
2345 - sequence '\r\n' -> '\n', in this case value length changes;
2347 if (*ptr == '\r') *ptr = '\n';
2348 reader_skipn(reader, 1);
2349 ptr++;
2353 return S_OK;
2356 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2357 static HRESULT reader_parse_chardata(xmlreader *reader)
2359 WCHAR *ptr;
2360 UINT start;
2362 if (reader->resumestate == XmlReadResumeState_CharData)
2364 start = reader->resume[XmlReadResume_Body];
2365 ptr = reader_get_ptr(reader);
2367 else
2369 reader_shrink(reader);
2370 ptr = reader_get_ptr(reader);
2371 start = reader_get_cur(reader);
2372 /* There's no text */
2373 if (!*ptr || *ptr == '<') return S_OK;
2374 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2375 reader->resume[XmlReadResume_Body] = start;
2376 reader->resumestate = XmlReadResumeState_CharData;
2377 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2378 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2379 reader_set_strvalue(reader, StringValue_Value, NULL);
2382 while (*ptr)
2384 static const WCHAR ampW[] = {'&',0};
2386 /* CDATA closing sequence ']]>' is not allowed */
2387 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2388 return WC_E_CDSECTEND;
2390 /* Found next markup part */
2391 if (ptr[0] == '<')
2393 strval value;
2395 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2396 reader_set_strvalue(reader, StringValue_Value, &value);
2397 reader->resume[XmlReadResume_Body] = 0;
2398 reader->resumestate = XmlReadResumeState_Initial;
2399 return S_OK;
2402 /* this covers a case when text has leading whitespace chars */
2403 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2405 if (!reader_cmp(reader, ampW))
2406 reader_parse_reference(reader);
2407 else
2408 reader_skipn(reader, 1);
2410 ptr = reader_get_ptr(reader);
2413 return S_OK;
2416 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2417 static HRESULT reader_parse_content(xmlreader *reader)
2419 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2420 static const WCHAR etagW[] = {'<','/',0};
2422 if (reader->resumestate != XmlReadResumeState_Initial)
2424 switch (reader->resumestate)
2426 case XmlReadResumeState_CDATA:
2427 return reader_parse_cdata(reader);
2428 case XmlReadResumeState_Comment:
2429 return reader_parse_comment(reader);
2430 case XmlReadResumeState_PIBody:
2431 case XmlReadResumeState_PITarget:
2432 return reader_parse_pi(reader);
2433 case XmlReadResumeState_CharData:
2434 return reader_parse_chardata(reader);
2435 default:
2436 ERR("unknown resume state %d\n", reader->resumestate);
2440 reader_shrink(reader);
2442 /* handle end tag here, it indicates end of content as well */
2443 if (!reader_cmp(reader, etagW))
2444 return reader_parse_endtag(reader);
2446 if (!reader_cmp(reader, commentW))
2447 return reader_parse_comment(reader);
2449 if (!reader_cmp(reader, piW))
2450 return reader_parse_pi(reader);
2452 if (!reader_cmp(reader, cdstartW))
2453 return reader_parse_cdata(reader);
2455 if (!reader_cmp(reader, ltW))
2456 return reader_parse_element(reader);
2458 /* what's left must be CharData */
2459 return reader_parse_chardata(reader);
2462 static HRESULT reader_parse_nextnode(xmlreader *reader)
2464 XmlNodeType nodetype = reader_get_nodetype(reader);
2465 HRESULT hr;
2467 if (!is_reader_pending(reader))
2468 reader_clear_attrs(reader);
2470 /* When moving from EndElement or empty element, pop its own namespace definitions */
2471 switch (nodetype)
2473 case XmlNodeType_Attribute:
2474 reader_dec_depth(reader);
2475 /* fallthrough */
2476 case XmlNodeType_Element:
2477 if (reader->is_empty_element)
2478 reader_pop_ns_nodes(reader, &reader->empty_element);
2479 else if (FAILED(hr = reader_inc_depth(reader)))
2480 return hr;
2481 break;
2482 case XmlNodeType_EndElement:
2483 reader_pop_element(reader);
2484 reader_dec_depth(reader);
2485 break;
2486 default:
2490 while (1)
2492 switch (reader->instate)
2494 /* if it's a first call for a new input we need to detect stream encoding */
2495 case XmlReadInState_Initial:
2497 xml_encoding enc;
2499 hr = readerinput_growraw(reader->input);
2500 if (FAILED(hr)) return hr;
2502 /* try to detect encoding by BOM or data and set input code page */
2503 hr = readerinput_detectencoding(reader->input, &enc);
2504 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2505 debugstr_w(xml_encoding_map[enc].name), hr);
2506 if (FAILED(hr)) return hr;
2508 /* always switch first time cause we have to put something in */
2509 readerinput_switchencoding(reader->input, enc);
2511 /* parse xml declaration */
2512 hr = reader_parse_xmldecl(reader);
2513 if (FAILED(hr)) return hr;
2515 readerinput_shrinkraw(reader->input, -1);
2516 reader->instate = XmlReadInState_Misc_DTD;
2517 if (hr == S_OK) return hr;
2519 break;
2520 case XmlReadInState_Misc_DTD:
2521 hr = reader_parse_misc(reader);
2522 if (FAILED(hr)) return hr;
2524 if (hr == S_FALSE)
2525 reader->instate = XmlReadInState_DTD;
2526 else
2527 return hr;
2528 break;
2529 case XmlReadInState_DTD:
2530 hr = reader_parse_dtd(reader);
2531 if (FAILED(hr)) return hr;
2533 if (hr == S_OK)
2535 reader->instate = XmlReadInState_DTD_Misc;
2536 return hr;
2538 else
2539 reader->instate = XmlReadInState_Element;
2540 break;
2541 case XmlReadInState_DTD_Misc:
2542 hr = reader_parse_misc(reader);
2543 if (FAILED(hr)) return hr;
2545 if (hr == S_FALSE)
2546 reader->instate = XmlReadInState_Element;
2547 else
2548 return hr;
2549 break;
2550 case XmlReadInState_Element:
2551 return reader_parse_element(reader);
2552 case XmlReadInState_Content:
2553 return reader_parse_content(reader);
2554 case XmlReadInState_MiscEnd:
2555 hr = reader_parse_misc(reader);
2556 if (FAILED(hr)) return hr;
2558 if (hr == S_FALSE)
2560 reader->instate = XmlReadInState_Eof;
2561 reader->state = XmlReadState_EndOfFile;
2562 reader->nodetype = XmlNodeType_None;
2564 return hr;
2565 case XmlReadInState_Eof:
2566 return S_FALSE;
2567 default:
2568 FIXME("internal state %d not handled\n", reader->instate);
2569 return E_NOTIMPL;
2573 return E_NOTIMPL;
2576 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2578 xmlreader *This = impl_from_IXmlReader(iface);
2580 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2582 if (IsEqualGUID(riid, &IID_IUnknown) ||
2583 IsEqualGUID(riid, &IID_IXmlReader))
2585 *ppvObject = iface;
2587 else
2589 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2590 *ppvObject = NULL;
2591 return E_NOINTERFACE;
2594 IXmlReader_AddRef(iface);
2596 return S_OK;
2599 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2601 xmlreader *This = impl_from_IXmlReader(iface);
2602 ULONG ref = InterlockedIncrement(&This->ref);
2603 TRACE("(%p)->(%d)\n", This, ref);
2604 return ref;
2607 static void reader_clear_ns(xmlreader *reader)
2609 struct ns *ns, *ns2;
2611 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2612 reader_free_strvalued(reader, &ns->prefix);
2613 reader_free_strvalued(reader, &ns->uri);
2614 reader_free(reader, ns);
2617 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2618 reader_free_strvalued(reader, &ns->uri);
2619 reader_free(reader, ns);
2623 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2625 xmlreader *This = impl_from_IXmlReader(iface);
2626 LONG ref = InterlockedDecrement(&This->ref);
2628 TRACE("(%p)->(%d)\n", This, ref);
2630 if (ref == 0)
2632 IMalloc *imalloc = This->imalloc;
2633 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2634 if (This->resolver) IXmlResolver_Release(This->resolver);
2635 if (This->mlang) IUnknown_Release(This->mlang);
2636 reader_clear_attrs(This);
2637 reader_clear_ns(This);
2638 reader_clear_elements(This);
2639 reader_free_strvalues(This);
2640 reader_free(This, This);
2641 if (imalloc) IMalloc_Release(imalloc);
2644 return ref;
2647 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2649 xmlreader *This = impl_from_IXmlReader(iface);
2650 IXmlReaderInput *readerinput;
2651 HRESULT hr;
2653 TRACE("(%p)->(%p)\n", This, input);
2655 if (This->input)
2657 readerinput_release_stream(This->input);
2658 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2659 This->input = NULL;
2662 This->line = This->pos = 0;
2663 reader_clear_elements(This);
2664 This->depth = 0;
2665 This->nodetype = XmlNodeType_None;
2666 This->resumestate = XmlReadResumeState_Initial;
2667 memset(This->resume, 0, sizeof(This->resume));
2669 /* just reset current input */
2670 if (!input)
2672 This->state = XmlReadState_Initial;
2673 return S_OK;
2676 /* now try IXmlReaderInput, ISequentialStream, IStream */
2677 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2678 if (hr == S_OK)
2680 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2681 This->input = impl_from_IXmlReaderInput(readerinput);
2682 else
2684 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2685 readerinput, readerinput->lpVtbl);
2686 IUnknown_Release(readerinput);
2687 return E_FAIL;
2692 if (hr != S_OK || !readerinput)
2694 /* create IXmlReaderInput basing on supplied interface */
2695 hr = CreateXmlReaderInputWithEncodingName(input,
2696 This->imalloc, NULL, FALSE, NULL, &readerinput);
2697 if (hr != S_OK) return hr;
2698 This->input = impl_from_IXmlReaderInput(readerinput);
2701 /* set stream for supplied IXmlReaderInput */
2702 hr = readerinput_query_for_stream(This->input);
2703 if (hr == S_OK)
2705 This->state = XmlReadState_Initial;
2706 This->instate = XmlReadInState_Initial;
2709 return hr;
2712 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2714 xmlreader *This = impl_from_IXmlReader(iface);
2716 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2718 if (!value) return E_INVALIDARG;
2720 switch (property)
2722 case XmlReaderProperty_MultiLanguage:
2723 *value = (LONG_PTR)This->mlang;
2724 if (This->mlang)
2725 IUnknown_AddRef(This->mlang);
2726 break;
2727 case XmlReaderProperty_XmlResolver:
2728 *value = (LONG_PTR)This->resolver;
2729 if (This->resolver)
2730 IXmlResolver_AddRef(This->resolver);
2731 break;
2732 case XmlReaderProperty_DtdProcessing:
2733 *value = This->dtdmode;
2734 break;
2735 case XmlReaderProperty_ReadState:
2736 *value = This->state;
2737 break;
2738 case XmlReaderProperty_MaxElementDepth:
2739 *value = This->max_depth;
2740 break;
2741 default:
2742 FIXME("Unimplemented property (%u)\n", property);
2743 return E_NOTIMPL;
2746 return S_OK;
2749 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2751 xmlreader *This = impl_from_IXmlReader(iface);
2753 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2755 switch (property)
2757 case XmlReaderProperty_MultiLanguage:
2758 if (This->mlang)
2759 IUnknown_Release(This->mlang);
2760 This->mlang = (IUnknown*)value;
2761 if (This->mlang)
2762 IUnknown_AddRef(This->mlang);
2763 if (This->mlang)
2764 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2765 break;
2766 case XmlReaderProperty_XmlResolver:
2767 if (This->resolver)
2768 IXmlResolver_Release(This->resolver);
2769 This->resolver = (IXmlResolver*)value;
2770 if (This->resolver)
2771 IXmlResolver_AddRef(This->resolver);
2772 break;
2773 case XmlReaderProperty_DtdProcessing:
2774 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2775 This->dtdmode = value;
2776 break;
2777 case XmlReaderProperty_MaxElementDepth:
2778 This->max_depth = value;
2779 break;
2780 default:
2781 FIXME("Unimplemented property (%u)\n", property);
2782 return E_NOTIMPL;
2785 return S_OK;
2788 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2790 xmlreader *This = impl_from_IXmlReader(iface);
2791 XmlNodeType oldtype = This->nodetype;
2792 HRESULT hr;
2794 TRACE("(%p)->(%p)\n", This, nodetype);
2796 if (This->state == XmlReadState_Closed) return S_FALSE;
2798 hr = reader_parse_nextnode(This);
2799 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2800 This->state = XmlReadState_Interactive;
2802 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2803 if (nodetype)
2804 *nodetype = This->nodetype;
2806 return hr;
2809 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2811 xmlreader *This = impl_from_IXmlReader(iface);
2813 TRACE("(%p)->(%p)\n", This, node_type);
2815 if (!node_type)
2816 return E_INVALIDARG;
2818 *node_type = reader_get_nodetype(This);
2819 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2822 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2824 if (!reader->attr_count)
2825 return S_FALSE;
2827 if (!reader->attr)
2828 reader_inc_depth(reader);
2830 reader->attr = LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry);
2831 reader_set_strvalue(reader, StringValue_Prefix, &reader->attr->prefix);
2832 reader_set_strvalue(reader, StringValue_LocalName, &reader->attr->localname);
2833 reader_set_strvalue(reader, StringValue_QualifiedName, &reader->attr->qname);
2834 reader_set_strvalue(reader, StringValue_Value, &reader->attr->value);
2836 return S_OK;
2839 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2841 xmlreader *This = impl_from_IXmlReader(iface);
2843 TRACE("(%p)\n", This);
2845 return reader_move_to_first_attribute(This);
2848 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2850 xmlreader *This = impl_from_IXmlReader(iface);
2851 const struct list *next;
2853 TRACE("(%p)\n", This);
2855 if (!This->attr_count) return S_FALSE;
2857 if (!This->attr)
2858 return reader_move_to_first_attribute(This);
2860 next = list_next(&This->attrs, &This->attr->entry);
2861 if (next)
2863 This->attr = LIST_ENTRY(next, struct attribute, entry);
2864 reader_set_strvalue(This, StringValue_Prefix, &This->attr->prefix);
2865 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2866 reader_set_strvalue(This, StringValue_QualifiedName, &This->attr->qname);
2867 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2870 return next ? S_OK : S_FALSE;
2873 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2874 LPCWSTR local_name,
2875 LPCWSTR namespaceUri)
2877 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2878 return E_NOTIMPL;
2881 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2883 xmlreader *This = impl_from_IXmlReader(iface);
2885 TRACE("(%p)\n", This);
2887 if (!This->attr_count) return S_FALSE;
2889 if (This->attr)
2890 reader_dec_depth(This);
2892 This->attr = NULL;
2894 /* FIXME: support other node types with 'attributes' like DTD */
2895 if (This->is_empty_element) {
2896 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
2897 reader_set_strvalue(This, StringValue_LocalName, &This->empty_element.localname);
2898 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
2900 else {
2901 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2902 if (element) {
2903 reader_set_strvalue(This, StringValue_Prefix, &element->prefix);
2904 reader_set_strvalue(This, StringValue_LocalName, &element->localname);
2905 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
2908 reader_set_strvalue(This, StringValue_Value, &strval_empty);
2910 return S_OK;
2913 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2915 xmlreader *This = impl_from_IXmlReader(iface);
2917 TRACE("(%p)->(%p %p)\n", This, name, len);
2918 *name = This->strvalues[StringValue_QualifiedName].str;
2919 if (len) *len = This->strvalues[StringValue_QualifiedName].len;
2920 return S_OK;
2923 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
2925 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
2926 struct ns *ns;
2928 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
2929 if (strval_eq(reader, prefix, &ns->prefix))
2930 return ns;
2933 return NULL;
2936 static struct ns *reader_lookup_nsdef(xmlreader *reader)
2938 if (list_empty(&reader->nsdef))
2939 return NULL;
2941 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
2944 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
2946 xmlreader *This = impl_from_IXmlReader(iface);
2947 const strval *prefix = &This->strvalues[StringValue_Prefix];
2948 XmlNodeType nodetype;
2949 struct ns *ns;
2950 UINT length;
2952 TRACE("(%p %p %p)\n", iface, uri, len);
2954 if (!len)
2955 len = &length;
2957 *uri = NULL;
2958 *len = 0;
2960 switch ((nodetype = reader_get_nodetype(This)))
2962 case XmlNodeType_Attribute:
2964 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2965 '2','0','0','0','/','x','m','l','n','s','/',0};
2966 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2967 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
2968 const strval *local = &This->strvalues[StringValue_LocalName];
2970 /* check for reserved prefixes first */
2971 if ((strval_eq(This, prefix, &strval_empty) && strval_eq(This, local, &strval_xmlns)) ||
2972 strval_eq(This, prefix, &strval_xmlns))
2974 *uri = xmlns_uriW;
2975 *len = sizeof(xmlns_uriW)/sizeof(xmlns_uriW[0]) - 1;
2977 else if (strval_eq(This, prefix, &strval_xml)) {
2978 *uri = xml_uriW;
2979 *len = sizeof(xml_uriW)/sizeof(xml_uriW[0]) - 1;
2982 if (!*uri) {
2983 ns = reader_lookup_ns(This, prefix);
2984 if (ns) {
2985 *uri = ns->uri.str;
2986 *len = ns->uri.len;
2988 else {
2989 *uri = emptyW;
2990 *len = 0;
2994 break;
2995 case XmlNodeType_Element:
2996 case XmlNodeType_EndElement:
2998 ns = reader_lookup_ns(This, prefix);
3000 /* pick top default ns if any */
3001 if (!ns)
3002 ns = reader_lookup_nsdef(This);
3004 if (ns) {
3005 *uri = ns->uri.str;
3006 *len = ns->uri.len;
3008 else {
3009 *uri = emptyW;
3010 *len = 0;
3013 break;
3014 default:
3015 FIXME("Unhandled node type %d\n", nodetype);
3016 return E_NOTIMPL;
3019 return S_OK;
3022 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3024 xmlreader *This = impl_from_IXmlReader(iface);
3026 TRACE("(%p)->(%p %p)\n", This, name, len);
3027 *name = This->strvalues[StringValue_LocalName].str;
3028 if (len) *len = This->strvalues[StringValue_LocalName].len;
3029 return S_OK;
3032 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
3034 xmlreader *This = impl_from_IXmlReader(iface);
3036 TRACE("(%p)->(%p %p)\n", This, prefix, len);
3037 *prefix = This->strvalues[StringValue_Prefix].str;
3038 if (len) *len = This->strvalues[StringValue_Prefix].len;
3039 return S_OK;
3042 static BOOL is_namespace_definition(xmlreader *reader)
3044 const strval *local = &reader->strvalues[StringValue_LocalName];
3045 const strval *prefix = &reader->strvalues[StringValue_Prefix];
3047 if (reader_get_nodetype(reader) != XmlNodeType_Attribute)
3048 return FALSE;
3050 return ((strval_eq(reader, prefix, &strval_empty) && strval_eq(reader, local, &strval_xmlns)) ||
3051 strval_eq(reader, prefix, &strval_xmlns));
3054 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3056 xmlreader *reader = impl_from_IXmlReader(iface);
3057 strval *val = &reader->strvalues[StringValue_Value];
3059 TRACE("(%p)->(%p %p)\n", reader, value, len);
3061 *value = NULL;
3063 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
3065 XmlNodeType type;
3066 HRESULT hr;
3068 hr = IXmlReader_Read(iface, &type);
3069 if (FAILED(hr)) return hr;
3071 /* return if still pending, partially read values are not reported */
3072 if (is_reader_pending(reader)) return E_PENDING;
3075 if (!val->str)
3077 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3078 if (!ptr) return E_OUTOFMEMORY;
3079 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3080 ptr[val->len] = 0;
3081 val->str = ptr;
3084 /* For namespace definition attributes return values from namespace list */
3085 if (is_namespace_definition(reader)) {
3086 const strval *local = &reader->strvalues[StringValue_LocalName];
3087 struct ns *ns;
3089 ns = reader_lookup_ns(reader, local);
3090 if (!ns)
3091 ns = reader_lookup_nsdef(reader);
3093 val = &ns->uri;
3096 *value = val->str;
3097 if (len) *len = val->len;
3098 return S_OK;
3101 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3103 xmlreader *reader = impl_from_IXmlReader(iface);
3104 strval *val = &reader->strvalues[StringValue_Value];
3105 UINT len;
3107 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3109 /* Value is already allocated, chunked reads are not possible. */
3110 if (val->str) return S_FALSE;
3112 if (val->len)
3114 len = min(chunk_size, val->len);
3115 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
3116 val->start += len;
3117 val->len -= len;
3118 if (read) *read = len;
3121 return S_OK;
3124 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3125 LPCWSTR *baseUri,
3126 UINT *baseUri_length)
3128 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3129 return E_NOTIMPL;
3132 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3134 FIXME("(%p): stub\n", iface);
3135 return FALSE;
3138 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3140 xmlreader *This = impl_from_IXmlReader(iface);
3141 TRACE("(%p)\n", This);
3142 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3143 when current node is start tag of an element */
3144 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3147 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
3149 xmlreader *This = impl_from_IXmlReader(iface);
3151 TRACE("(%p %p)\n", This, lineNumber);
3153 if (!lineNumber) return E_INVALIDARG;
3155 *lineNumber = This->line;
3157 return S_OK;
3160 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
3162 xmlreader *This = impl_from_IXmlReader(iface);
3164 TRACE("(%p %p)\n", This, linePosition);
3166 if (!linePosition) return E_INVALIDARG;
3168 *linePosition = This->pos;
3170 return S_OK;
3173 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3175 xmlreader *This = impl_from_IXmlReader(iface);
3177 TRACE("(%p)->(%p)\n", This, count);
3179 if (!count) return E_INVALIDARG;
3181 *count = This->attr_count;
3182 return S_OK;
3185 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3187 xmlreader *This = impl_from_IXmlReader(iface);
3188 TRACE("(%p)->(%p)\n", This, depth);
3189 *depth = This->depth;
3190 return S_OK;
3193 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3195 xmlreader *This = impl_from_IXmlReader(iface);
3196 TRACE("(%p)\n", iface);
3197 return This->state == XmlReadState_EndOfFile;
3200 static const struct IXmlReaderVtbl xmlreader_vtbl =
3202 xmlreader_QueryInterface,
3203 xmlreader_AddRef,
3204 xmlreader_Release,
3205 xmlreader_SetInput,
3206 xmlreader_GetProperty,
3207 xmlreader_SetProperty,
3208 xmlreader_Read,
3209 xmlreader_GetNodeType,
3210 xmlreader_MoveToFirstAttribute,
3211 xmlreader_MoveToNextAttribute,
3212 xmlreader_MoveToAttributeByName,
3213 xmlreader_MoveToElement,
3214 xmlreader_GetQualifiedName,
3215 xmlreader_GetNamespaceUri,
3216 xmlreader_GetLocalName,
3217 xmlreader_GetPrefix,
3218 xmlreader_GetValue,
3219 xmlreader_ReadValueChunk,
3220 xmlreader_GetBaseUri,
3221 xmlreader_IsDefault,
3222 xmlreader_IsEmptyElement,
3223 xmlreader_GetLineNumber,
3224 xmlreader_GetLinePosition,
3225 xmlreader_GetAttributeCount,
3226 xmlreader_GetDepth,
3227 xmlreader_IsEOF
3230 /** IXmlReaderInput **/
3231 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3233 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3235 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3237 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3238 IsEqualGUID(riid, &IID_IUnknown))
3240 *ppvObject = iface;
3242 else
3244 WARN("interface %s not implemented\n", debugstr_guid(riid));
3245 *ppvObject = NULL;
3246 return E_NOINTERFACE;
3249 IUnknown_AddRef(iface);
3251 return S_OK;
3254 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3256 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3257 ULONG ref = InterlockedIncrement(&This->ref);
3258 TRACE("(%p)->(%d)\n", This, ref);
3259 return ref;
3262 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3264 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3265 LONG ref = InterlockedDecrement(&This->ref);
3267 TRACE("(%p)->(%d)\n", This, ref);
3269 if (ref == 0)
3271 IMalloc *imalloc = This->imalloc;
3272 if (This->input) IUnknown_Release(This->input);
3273 if (This->stream) ISequentialStream_Release(This->stream);
3274 if (This->buffer) free_input_buffer(This->buffer);
3275 readerinput_free(This, This->baseuri);
3276 readerinput_free(This, This);
3277 if (imalloc) IMalloc_Release(imalloc);
3280 return ref;
3283 static const struct IUnknownVtbl xmlreaderinputvtbl =
3285 xmlreaderinput_QueryInterface,
3286 xmlreaderinput_AddRef,
3287 xmlreaderinput_Release
3290 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3292 xmlreader *reader;
3293 int i;
3295 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3297 if (!IsEqualGUID(riid, &IID_IXmlReader))
3299 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
3300 return E_FAIL;
3303 if (imalloc)
3304 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3305 else
3306 reader = heap_alloc(sizeof(*reader));
3307 if(!reader) return E_OUTOFMEMORY;
3309 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3310 reader->ref = 1;
3311 reader->input = NULL;
3312 reader->state = XmlReadState_Closed;
3313 reader->instate = XmlReadInState_Initial;
3314 reader->resumestate = XmlReadResumeState_Initial;
3315 reader->dtdmode = DtdProcessing_Prohibit;
3316 reader->resolver = NULL;
3317 reader->mlang = NULL;
3318 reader->line = reader->pos = 0;
3319 reader->imalloc = imalloc;
3320 if (imalloc) IMalloc_AddRef(imalloc);
3321 reader->nodetype = XmlNodeType_None;
3322 list_init(&reader->attrs);
3323 reader->attr_count = 0;
3324 reader->attr = NULL;
3325 list_init(&reader->nsdef);
3326 list_init(&reader->ns);
3327 list_init(&reader->elements);
3328 reader->depth = 0;
3329 reader->max_depth = 256;
3330 reader->is_empty_element = FALSE;
3331 memset(reader->resume, 0, sizeof(reader->resume));
3333 for (i = 0; i < StringValue_Last; i++)
3334 reader->strvalues[i] = strval_empty;
3336 *obj = &reader->IXmlReader_iface;
3338 TRACE("returning iface %p\n", *obj);
3340 return S_OK;
3343 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3344 IMalloc *imalloc,
3345 LPCWSTR encoding,
3346 BOOL hint,
3347 LPCWSTR base_uri,
3348 IXmlReaderInput **ppInput)
3350 xmlreaderinput *readerinput;
3351 HRESULT hr;
3353 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3354 hint, wine_dbgstr_w(base_uri), ppInput);
3356 if (!stream || !ppInput) return E_INVALIDARG;
3358 if (imalloc)
3359 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3360 else
3361 readerinput = heap_alloc(sizeof(*readerinput));
3362 if(!readerinput) return E_OUTOFMEMORY;
3364 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3365 readerinput->ref = 1;
3366 readerinput->imalloc = imalloc;
3367 readerinput->stream = NULL;
3368 if (imalloc) IMalloc_AddRef(imalloc);
3369 readerinput->encoding = parse_encoding_name(encoding, -1);
3370 readerinput->hint = hint;
3371 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3372 readerinput->pending = 0;
3374 hr = alloc_input_buffer(readerinput);
3375 if (hr != S_OK)
3377 readerinput_free(readerinput, readerinput->baseuri);
3378 readerinput_free(readerinput, readerinput);
3379 if (imalloc) IMalloc_Release(imalloc);
3380 return hr;
3382 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3384 *ppInput = &readerinput->IXmlReaderInput_iface;
3386 TRACE("returning iface %p\n", *ppInput);
3388 return S_OK;