xmllite/reader: Reset node type to XmlNodeType_None on EOF.
[wine.git] / dlls / xmllite / reader.c
blobae5d9eaef8504be9c61258b9e81b7209672381f9
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW[] = {'\"',0};
90 static const WCHAR quoteW[] = {'\'',0};
91 static const WCHAR ltW[] = {'<',0};
92 static const WCHAR gtW[] = {'>',0};
93 static const WCHAR commentW[] = {'<','!','-','-',0};
94 static const WCHAR piW[] = {'<','?',0};
96 static BOOL is_namestartchar(WCHAR ch);
98 static const char *debugstr_nodetype(XmlNodeType nodetype)
100 static const char * const type_names[] =
102 "None",
103 "Element",
104 "Attribute",
105 "Text",
106 "CDATA",
109 "ProcessingInstruction",
110 "Comment",
112 "DocumentType",
115 "Whitespace",
117 "EndElement",
119 "XmlDeclaration"
122 if (nodetype > _XmlNodeType_Last)
123 return wine_dbg_sprintf("unknown type=%d", nodetype);
125 return type_names[nodetype];
128 static const char *debugstr_reader_prop(XmlReaderProperty prop)
130 static const char * const prop_names[] =
132 "MultiLanguage",
133 "ConformanceLevel",
134 "RandomAccess",
135 "XmlResolver",
136 "DtdProcessing",
137 "ReadState",
138 "MaxElementDepth",
139 "MaxEntityExpansion"
142 if (prop > _XmlReaderProperty_Last)
143 return wine_dbg_sprintf("unknown property=%d", prop);
145 return prop_names[prop];
148 struct xml_encoding_data
150 const WCHAR *name;
151 xml_encoding enc;
152 UINT cp;
155 static const struct xml_encoding_data xml_encoding_map[] = {
156 { utf16W, XmlEncoding_UTF16, ~0 },
157 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
160 const WCHAR *get_encoding_name(xml_encoding encoding)
162 return xml_encoding_map[encoding].name;
165 xml_encoding get_encoding_from_codepage(UINT codepage)
167 int i;
168 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
170 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
172 return XmlEncoding_Unknown;
175 typedef struct
177 char *data;
178 UINT cur;
179 unsigned int allocated;
180 unsigned int written;
181 } encoded_buffer;
183 typedef struct input_buffer input_buffer;
185 typedef struct
187 IXmlReaderInput IXmlReaderInput_iface;
188 LONG ref;
189 /* reference passed on IXmlReaderInput creation, is kept when input is created */
190 IUnknown *input;
191 IMalloc *imalloc;
192 xml_encoding encoding;
193 BOOL hint;
194 WCHAR *baseuri;
195 /* stream reference set after SetInput() call from reader,
196 stored as sequential stream, cause currently
197 optimizations possible with IStream aren't implemented */
198 ISequentialStream *stream;
199 input_buffer *buffer;
200 unsigned int pending : 1;
201 } xmlreaderinput;
203 static const struct IUnknownVtbl xmlreaderinputvtbl;
205 /* Structure to hold parsed string of specific length.
207 Reader stores node value as 'start' pointer, on request
208 a null-terminated version of it is allocated.
210 To init a strval variable use reader_init_strval(),
211 to set strval as a reader value use reader_set_strval().
213 typedef struct
215 WCHAR *str; /* allocated null-terminated string */
216 UINT len; /* length in WCHARs, altered after ReadValueChunk */
217 UINT start; /* input position where value starts */
218 } strval;
220 static WCHAR emptyW[] = {0};
221 static WCHAR xmlW[] = {'x','m','l',0};
222 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
223 static const strval strval_empty = { emptyW };
224 static const strval strval_xml = { xmlW, 3 };
225 static const strval strval_xmlns = { xmlnsW, 5 };
227 struct attribute
229 struct list entry;
230 strval prefix;
231 strval localname;
232 strval value;
235 struct element
237 struct list entry;
238 strval prefix;
239 strval localname;
240 strval qname;
243 struct ns
245 struct list entry;
246 strval prefix;
247 strval uri;
248 struct element *element;
251 typedef struct
253 IXmlReader IXmlReader_iface;
254 LONG ref;
255 xmlreaderinput *input;
256 IMalloc *imalloc;
257 XmlReadState state;
258 XmlReaderInternalState instate;
259 XmlReaderResumeState resumestate;
260 XmlNodeType nodetype;
261 DtdProcessing dtdmode;
262 IXmlResolver *resolver;
263 IUnknown *mlang;
264 UINT line, pos; /* reader position in XML stream */
265 struct list attrs; /* attributes list for current node */
266 struct attribute *attr; /* current attribute */
267 UINT attr_count;
268 struct list nsdef;
269 struct list ns;
270 struct list elements;
271 strval strvalues[StringValue_Last];
272 UINT depth;
273 UINT max_depth;
274 BOOL is_empty_element;
275 struct element empty_element;
276 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
277 } xmlreader;
279 struct input_buffer
281 encoded_buffer utf16;
282 encoded_buffer encoded;
283 UINT code_page;
284 xmlreaderinput *input;
287 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
289 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
292 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
294 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
297 /* reader memory allocation functions */
298 static inline void *reader_alloc(xmlreader *reader, size_t len)
300 return m_alloc(reader->imalloc, len);
303 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
305 void *ret = reader_alloc(reader, len);
306 if (ret)
307 memset(ret, 0, len);
308 return ret;
311 static inline void reader_free(xmlreader *reader, void *mem)
313 m_free(reader->imalloc, mem);
316 /* Just return pointer from offset, no attempt to read more. */
317 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
319 encoded_buffer *buffer = &reader->input->buffer->utf16;
320 return (WCHAR*)buffer->data + offset;
323 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
325 return v->str ? v->str : reader_get_ptr2(reader, v->start);
328 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
330 *dest = *src;
332 if (src->str != strval_empty.str)
334 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
335 if (!dest->str) return E_OUTOFMEMORY;
336 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
337 dest->str[dest->len] = 0;
338 dest->start = 0;
341 return S_OK;
344 /* reader input memory allocation functions */
345 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
347 return m_alloc(input->imalloc, len);
350 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
352 return m_realloc(input->imalloc, mem, len);
355 static inline void readerinput_free(xmlreaderinput *input, void *mem)
357 m_free(input->imalloc, mem);
360 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
362 LPWSTR ret = NULL;
364 if(str) {
365 DWORD size;
367 size = (strlenW(str)+1)*sizeof(WCHAR);
368 ret = readerinput_alloc(input, size);
369 if (ret) memcpy(ret, str, size);
372 return ret;
375 static void reader_clear_attrs(xmlreader *reader)
377 struct attribute *attr, *attr2;
378 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
380 reader_free(reader, attr);
382 list_init(&reader->attrs);
383 reader->attr_count = 0;
384 reader->attr = NULL;
387 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
388 while we are on a node with attributes */
389 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *value)
391 struct attribute *attr;
393 attr = reader_alloc(reader, sizeof(*attr));
394 if (!attr) return E_OUTOFMEMORY;
396 if (prefix)
397 attr->prefix = *prefix;
398 else
399 memset(&attr->prefix, 0, sizeof(attr->prefix));
400 attr->localname = *localname;
401 attr->value = *value;
402 list_add_tail(&reader->attrs, &attr->entry);
403 reader->attr_count++;
405 return S_OK;
408 /* This one frees stored string value if needed */
409 static void reader_free_strvalued(xmlreader *reader, strval *v)
411 if (v->str != strval_empty.str)
413 reader_free(reader, v->str);
414 *v = strval_empty;
418 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
420 v->start = start;
421 v->len = len;
422 v->str = NULL;
425 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
427 return debugstr_wn(reader_get_strptr(reader, v), v->len);
430 /* used to initialize from constant string */
431 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
433 v->start = 0;
434 v->len = len;
435 v->str = str;
438 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
440 reader_free_strvalued(reader, &reader->strvalues[type]);
443 static void reader_free_strvalues(xmlreader *reader)
445 int type;
446 for (type = 0; type < StringValue_Last; type++)
447 reader_free_strvalue(reader, type);
450 /* This helper should only be used to test if strings are the same,
451 it doesn't try to sort. */
452 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
454 if (str1->len != str2->len) return 0;
455 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
458 static void reader_clear_elements(xmlreader *reader)
460 struct element *elem, *elem2;
461 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
463 reader_free_strvalued(reader, &elem->prefix);
464 reader_free_strvalued(reader, &elem->localname);
465 reader_free_strvalued(reader, &elem->qname);
466 reader_free(reader, elem);
468 list_init(&reader->elements);
469 reader->is_empty_element = FALSE;
472 static HRESULT reader_inc_depth(xmlreader *reader)
474 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
475 return S_OK;
478 static void reader_dec_depth(xmlreader *reader)
480 if (reader->depth > 1) reader->depth--;
483 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
485 struct ns *ns;
486 HRESULT hr;
488 ns = reader_alloc(reader, sizeof(*ns));
489 if (!ns) return E_OUTOFMEMORY;
491 if (def)
492 memset(&ns->prefix, 0, sizeof(ns->prefix));
493 else {
494 hr = reader_strvaldup(reader, prefix, &ns->prefix);
495 if (FAILED(hr)) {
496 reader_free(reader, ns);
497 return hr;
501 hr = reader_strvaldup(reader, uri, &ns->uri);
502 if (FAILED(hr)) {
503 reader_free_strvalued(reader, &ns->prefix);
504 reader_free(reader, ns);
505 return hr;
508 ns->element = NULL;
509 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
510 return hr;
513 static void reader_free_element(xmlreader *reader, struct element *element)
515 reader_free_strvalued(reader, &element->prefix);
516 reader_free_strvalued(reader, &element->localname);
517 reader_free_strvalued(reader, &element->qname);
518 reader_free(reader, element);
521 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
523 struct ns *ns;
525 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
526 if (ns->element)
527 break;
528 ns->element = element;
531 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
532 if (ns->element)
533 break;
534 ns->element = element;
538 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
539 strval *qname)
541 struct element *element;
542 HRESULT hr;
544 if (!list_empty(&reader->elements))
546 hr = reader_inc_depth(reader);
547 if (FAILED(hr))
548 return hr;
551 element = reader_alloc_zero(reader, sizeof(*element));
552 if (!element) {
553 hr = E_OUTOFMEMORY;
554 goto failed;
557 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) != S_OK ||
558 (hr = reader_strvaldup(reader, localname, &element->localname)) != S_OK ||
559 (hr = reader_strvaldup(reader, qname, &element->qname)) != S_OK)
561 reader_free_element(reader, element);
562 goto failed;
565 list_add_head(&reader->elements, &element->entry);
566 reader_mark_ns_nodes(reader, element);
567 reader->is_empty_element = FALSE;
569 failed:
570 reader_dec_depth(reader);
571 return hr;
574 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
576 struct ns *ns, *ns2;
578 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
579 if (ns->element != element)
580 break;
582 list_remove(&ns->entry);
583 reader_free_strvalued(reader, &ns->prefix);
584 reader_free_strvalued(reader, &ns->uri);
585 reader_free(reader, ns);
588 if (!list_empty(&reader->nsdef)) {
589 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
590 if (ns->element == element) {
591 list_remove(&ns->entry);
592 reader_free_strvalued(reader, &ns->prefix);
593 reader_free_strvalued(reader, &ns->uri);
594 reader_free(reader, ns);
599 static void reader_pop_element(xmlreader *reader)
601 struct element *element;
603 if (list_empty(&reader->elements))
604 return;
606 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
607 list_remove(&element->entry);
609 reader_pop_ns_nodes(reader, element);
610 reader_free_element(reader, element);
611 reader_dec_depth(reader);
613 /* It was a root element, the rest is expected as Misc */
614 if (list_empty(&reader->elements))
615 reader->instate = XmlReadInState_MiscEnd;
618 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
619 means node value is to be determined. */
620 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
622 strval *v = &reader->strvalues[type];
624 reader_free_strvalue(reader, type);
625 if (!value)
627 v->str = NULL;
628 v->start = 0;
629 v->len = 0;
630 return;
633 if (value->str == strval_empty.str)
634 *v = *value;
635 else
637 if (type == StringValue_Value)
639 /* defer allocation for value string */
640 v->str = NULL;
641 v->start = value->start;
642 v->len = value->len;
644 else
646 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
647 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
648 v->str[value->len] = 0;
649 v->len = value->len;
654 static inline int is_reader_pending(xmlreader *reader)
656 return reader->input->pending;
659 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
661 const int initial_len = 0x2000;
662 buffer->data = readerinput_alloc(input, initial_len);
663 if (!buffer->data) return E_OUTOFMEMORY;
665 memset(buffer->data, 0, 4);
666 buffer->cur = 0;
667 buffer->allocated = initial_len;
668 buffer->written = 0;
670 return S_OK;
673 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
675 readerinput_free(input, buffer->data);
678 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
680 if (encoding == XmlEncoding_Unknown)
682 FIXME("unsupported encoding %d\n", encoding);
683 return E_NOTIMPL;
686 *cp = xml_encoding_map[encoding].cp;
688 return S_OK;
691 xml_encoding parse_encoding_name(const WCHAR *name, int len)
693 int min, max, n, c;
695 if (!name) return XmlEncoding_Unknown;
697 min = 0;
698 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
700 while (min <= max)
702 n = (min+max)/2;
704 if (len != -1)
705 c = strncmpiW(xml_encoding_map[n].name, name, len);
706 else
707 c = strcmpiW(xml_encoding_map[n].name, name);
708 if (!c)
709 return xml_encoding_map[n].enc;
711 if (c > 0)
712 max = n-1;
713 else
714 min = n+1;
717 return XmlEncoding_Unknown;
720 static HRESULT alloc_input_buffer(xmlreaderinput *input)
722 input_buffer *buffer;
723 HRESULT hr;
725 input->buffer = NULL;
727 buffer = readerinput_alloc(input, sizeof(*buffer));
728 if (!buffer) return E_OUTOFMEMORY;
730 buffer->input = input;
731 buffer->code_page = ~0; /* code page is unknown at this point */
732 hr = init_encoded_buffer(input, &buffer->utf16);
733 if (hr != S_OK) {
734 readerinput_free(input, buffer);
735 return hr;
738 hr = init_encoded_buffer(input, &buffer->encoded);
739 if (hr != S_OK) {
740 free_encoded_buffer(input, &buffer->utf16);
741 readerinput_free(input, buffer);
742 return hr;
745 input->buffer = buffer;
746 return S_OK;
749 static void free_input_buffer(input_buffer *buffer)
751 free_encoded_buffer(buffer->input, &buffer->encoded);
752 free_encoded_buffer(buffer->input, &buffer->utf16);
753 readerinput_free(buffer->input, buffer);
756 static void readerinput_release_stream(xmlreaderinput *readerinput)
758 if (readerinput->stream) {
759 ISequentialStream_Release(readerinput->stream);
760 readerinput->stream = NULL;
764 /* Queries already stored interface for IStream/ISequentialStream.
765 Interface supplied on creation will be overwritten */
766 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
768 HRESULT hr;
770 readerinput_release_stream(readerinput);
771 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
772 if (hr != S_OK)
773 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
775 return hr;
778 /* reads a chunk to raw buffer */
779 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
781 encoded_buffer *buffer = &readerinput->buffer->encoded;
782 /* to make sure aligned length won't exceed allocated length */
783 ULONG len = buffer->allocated - buffer->written - 4;
784 ULONG read;
785 HRESULT hr;
787 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
788 variable width encodings like UTF-8 */
789 len = (len + 3) & ~3;
790 /* try to use allocated space or grow */
791 if (buffer->allocated - buffer->written < len)
793 buffer->allocated *= 2;
794 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
795 len = buffer->allocated - buffer->written;
798 read = 0;
799 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
800 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
801 readerinput->pending = hr == E_PENDING;
802 if (FAILED(hr)) return hr;
803 buffer->written += read;
805 return hr;
808 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
809 static void readerinput_grow(xmlreaderinput *readerinput, int length)
811 encoded_buffer *buffer = &readerinput->buffer->utf16;
813 length *= sizeof(WCHAR);
814 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
815 if (buffer->allocated < buffer->written + length + 4)
817 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
818 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
819 buffer->allocated = grown_size;
823 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
825 static const char startA[] = {'<','?'};
826 static const char commentA[] = {'<','!'};
827 encoded_buffer *buffer = &readerinput->buffer->encoded;
828 unsigned char *ptr = (unsigned char*)buffer->data;
830 return !memcmp(buffer->data, startA, sizeof(startA)) ||
831 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
832 /* test start byte */
833 (ptr[0] == '<' &&
835 (ptr[1] && (ptr[1] <= 0x7f)) ||
836 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
837 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
838 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
842 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
844 encoded_buffer *buffer = &readerinput->buffer->encoded;
845 static const char utf8bom[] = {0xef,0xbb,0xbf};
846 static const char utf16lebom[] = {0xff,0xfe};
847 WCHAR *ptrW;
849 *enc = XmlEncoding_Unknown;
851 if (buffer->written <= 3)
853 HRESULT hr = readerinput_growraw(readerinput);
854 if (FAILED(hr)) return hr;
855 if (buffer->written <= 3) return MX_E_INPUTEND;
858 ptrW = (WCHAR *)buffer->data;
859 /* try start symbols if we have enough data to do that, input buffer should contain
860 first chunk already */
861 if (readerinput_is_utf8(readerinput))
862 *enc = XmlEncoding_UTF8;
863 else if (*ptrW == '<')
865 ptrW++;
866 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
867 *enc = XmlEncoding_UTF16;
869 /* try with BOM now */
870 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
872 buffer->cur += sizeof(utf8bom);
873 *enc = XmlEncoding_UTF8;
875 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
877 buffer->cur += sizeof(utf16lebom);
878 *enc = XmlEncoding_UTF16;
881 return S_OK;
884 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
886 encoded_buffer *buffer = &readerinput->buffer->encoded;
887 int len = buffer->written;
889 /* complete single byte char */
890 if (!(buffer->data[len-1] & 0x80)) return len;
892 /* find start byte of multibyte char */
893 while (--len && !(buffer->data[len] & 0xc0))
896 return len;
899 /* Returns byte length of complete char sequence for buffer code page,
900 it's relative to current buffer position which is currently used for BOM handling
901 only. */
902 static int readerinput_get_convlen(xmlreaderinput *readerinput)
904 encoded_buffer *buffer = &readerinput->buffer->encoded;
905 int len;
907 if (readerinput->buffer->code_page == CP_UTF8)
908 len = readerinput_get_utf8_convlen(readerinput);
909 else
910 len = buffer->written;
912 TRACE("%d\n", len - buffer->cur);
913 return len - buffer->cur;
916 /* It's possible that raw buffer has some leftovers from last conversion - some char
917 sequence that doesn't represent a full code point. Length argument should be calculated with
918 readerinput_get_convlen(), if it's -1 it will be calculated here. */
919 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
921 encoded_buffer *buffer = &readerinput->buffer->encoded;
923 if (len == -1)
924 len = readerinput_get_convlen(readerinput);
926 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
927 /* everything below cur is lost too */
928 buffer->written -= len + buffer->cur;
929 /* after this point we don't need cur offset really,
930 it's used only to mark where actual data begins when first chunk is read */
931 buffer->cur = 0;
934 /* note that raw buffer content is kept */
935 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
937 encoded_buffer *src = &readerinput->buffer->encoded;
938 encoded_buffer *dest = &readerinput->buffer->utf16;
939 int len, dest_len;
940 HRESULT hr;
941 WCHAR *ptr;
942 UINT cp;
944 hr = get_code_page(enc, &cp);
945 if (FAILED(hr)) return;
947 readerinput->buffer->code_page = cp;
948 len = readerinput_get_convlen(readerinput);
950 TRACE("switching to cp %d\n", cp);
952 /* just copy in this case */
953 if (enc == XmlEncoding_UTF16)
955 readerinput_grow(readerinput, len);
956 memcpy(dest->data, src->data + src->cur, len);
957 dest->written += len*sizeof(WCHAR);
958 return;
961 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
962 readerinput_grow(readerinput, dest_len);
963 ptr = (WCHAR*)dest->data;
964 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
965 ptr[dest_len] = 0;
966 dest->written += dest_len*sizeof(WCHAR);
969 /* shrinks parsed data a buffer begins with */
970 static void reader_shrink(xmlreader *reader)
972 encoded_buffer *buffer = &reader->input->buffer->utf16;
974 /* avoid to move too often using threshold shrink length */
975 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
977 buffer->written -= buffer->cur*sizeof(WCHAR);
978 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
979 buffer->cur = 0;
980 *(WCHAR*)&buffer->data[buffer->written] = 0;
984 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
985 It won't attempt to shrink but will grow destination buffer if needed */
986 static HRESULT reader_more(xmlreader *reader)
988 xmlreaderinput *readerinput = reader->input;
989 encoded_buffer *src = &readerinput->buffer->encoded;
990 encoded_buffer *dest = &readerinput->buffer->utf16;
991 UINT cp = readerinput->buffer->code_page;
992 int len, dest_len;
993 HRESULT hr;
994 WCHAR *ptr;
996 /* get some raw data from stream first */
997 hr = readerinput_growraw(readerinput);
998 len = readerinput_get_convlen(readerinput);
1000 /* just copy for UTF-16 case */
1001 if (cp == ~0)
1003 readerinput_grow(readerinput, len);
1004 memcpy(dest->data + dest->written, src->data + src->cur, len);
1005 dest->written += len*sizeof(WCHAR);
1006 return hr;
1009 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1010 readerinput_grow(readerinput, dest_len);
1011 ptr = (WCHAR*)(dest->data + dest->written);
1012 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1013 ptr[dest_len] = 0;
1014 dest->written += dest_len*sizeof(WCHAR);
1015 /* get rid of processed data */
1016 readerinput_shrinkraw(readerinput, len);
1018 return hr;
1021 static inline UINT reader_get_cur(xmlreader *reader)
1023 return reader->input->buffer->utf16.cur;
1026 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1028 encoded_buffer *buffer = &reader->input->buffer->utf16;
1029 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1030 if (!*ptr) reader_more(reader);
1031 return (WCHAR*)buffer->data + buffer->cur;
1034 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1036 int i=0;
1037 const WCHAR *ptr = reader_get_ptr(reader);
1038 while (str[i])
1040 if (!ptr[i])
1042 reader_more(reader);
1043 ptr = reader_get_ptr(reader);
1045 if (str[i] != ptr[i])
1046 return ptr[i] - str[i];
1047 i++;
1049 return 0;
1052 /* moves cursor n WCHARs forward */
1053 static void reader_skipn(xmlreader *reader, int n)
1055 encoded_buffer *buffer = &reader->input->buffer->utf16;
1056 const WCHAR *ptr = reader_get_ptr(reader);
1058 while (*ptr++ && n--)
1060 buffer->cur++;
1061 reader->pos++;
1065 static inline BOOL is_wchar_space(WCHAR ch)
1067 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1070 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1071 static int reader_skipspaces(xmlreader *reader)
1073 encoded_buffer *buffer = &reader->input->buffer->utf16;
1074 const WCHAR *ptr = reader_get_ptr(reader);
1075 UINT start = reader_get_cur(reader);
1077 while (is_wchar_space(*ptr))
1079 if (*ptr == '\r')
1080 reader->pos = 0;
1081 else if (*ptr == '\n')
1083 reader->line++;
1084 reader->pos = 0;
1086 else
1087 reader->pos++;
1089 buffer->cur++;
1090 ptr = reader_get_ptr(reader);
1093 return reader_get_cur(reader) - start;
1096 /* [26] VersionNum ::= '1.' [0-9]+ */
1097 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1099 static const WCHAR onedotW[] = {'1','.',0};
1100 WCHAR *ptr, *ptr2;
1101 UINT start;
1103 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1105 start = reader_get_cur(reader);
1106 /* skip "1." */
1107 reader_skipn(reader, 2);
1109 ptr2 = ptr = reader_get_ptr(reader);
1110 while (*ptr >= '0' && *ptr <= '9')
1112 reader_skipn(reader, 1);
1113 ptr = reader_get_ptr(reader);
1116 if (ptr2 == ptr) return WC_E_DIGIT;
1117 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1118 TRACE("version=%s\n", debug_strval(reader, val));
1119 return S_OK;
1122 /* [25] Eq ::= S? '=' S? */
1123 static HRESULT reader_parse_eq(xmlreader *reader)
1125 static const WCHAR eqW[] = {'=',0};
1126 reader_skipspaces(reader);
1127 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1128 /* skip '=' */
1129 reader_skipn(reader, 1);
1130 reader_skipspaces(reader);
1131 return S_OK;
1134 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1135 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1137 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1138 strval val, name;
1139 HRESULT hr;
1141 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1143 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1144 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1145 /* skip 'version' */
1146 reader_skipn(reader, 7);
1148 hr = reader_parse_eq(reader);
1149 if (FAILED(hr)) return hr;
1151 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1152 return WC_E_QUOTE;
1153 /* skip "'"|'"' */
1154 reader_skipn(reader, 1);
1156 hr = reader_parse_versionnum(reader, &val);
1157 if (FAILED(hr)) return hr;
1159 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1160 return WC_E_QUOTE;
1162 /* skip "'"|'"' */
1163 reader_skipn(reader, 1);
1165 return reader_add_attr(reader, NULL, &name, &val);
1168 /* ([A-Za-z0-9._] | '-') */
1169 static inline BOOL is_wchar_encname(WCHAR ch)
1171 return ((ch >= 'A' && ch <= 'Z') ||
1172 (ch >= 'a' && ch <= 'z') ||
1173 (ch >= '0' && ch <= '9') ||
1174 (ch == '.') || (ch == '_') ||
1175 (ch == '-'));
1178 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1179 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1181 WCHAR *start = reader_get_ptr(reader), *ptr;
1182 xml_encoding enc;
1183 int len;
1185 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1186 return WC_E_ENCNAME;
1188 val->start = reader_get_cur(reader);
1190 ptr = start;
1191 while (is_wchar_encname(*++ptr))
1194 len = ptr - start;
1195 enc = parse_encoding_name(start, len);
1196 TRACE("encoding name %s\n", debugstr_wn(start, len));
1197 val->str = start;
1198 val->len = len;
1200 if (enc == XmlEncoding_Unknown)
1201 return WC_E_ENCNAME;
1203 /* skip encoding name */
1204 reader_skipn(reader, len);
1205 return S_OK;
1208 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1209 static HRESULT reader_parse_encdecl(xmlreader *reader)
1211 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1212 strval name, val;
1213 HRESULT hr;
1215 if (!reader_skipspaces(reader)) return S_FALSE;
1217 if (reader_cmp(reader, encodingW)) return S_FALSE;
1218 name.str = reader_get_ptr(reader);
1219 name.start = reader_get_cur(reader);
1220 name.len = 8;
1221 /* skip 'encoding' */
1222 reader_skipn(reader, 8);
1224 hr = reader_parse_eq(reader);
1225 if (FAILED(hr)) return hr;
1227 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1228 return WC_E_QUOTE;
1229 /* skip "'"|'"' */
1230 reader_skipn(reader, 1);
1232 hr = reader_parse_encname(reader, &val);
1233 if (FAILED(hr)) return hr;
1235 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1236 return WC_E_QUOTE;
1238 /* skip "'"|'"' */
1239 reader_skipn(reader, 1);
1241 return reader_add_attr(reader, NULL, &name, &val);
1244 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1245 static HRESULT reader_parse_sddecl(xmlreader *reader)
1247 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1248 static const WCHAR yesW[] = {'y','e','s',0};
1249 static const WCHAR noW[] = {'n','o',0};
1250 strval name, val;
1251 UINT start;
1252 HRESULT hr;
1254 if (!reader_skipspaces(reader)) return S_FALSE;
1256 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1257 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1258 /* skip 'standalone' */
1259 reader_skipn(reader, 10);
1261 hr = reader_parse_eq(reader);
1262 if (FAILED(hr)) return hr;
1264 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1265 return WC_E_QUOTE;
1266 /* skip "'"|'"' */
1267 reader_skipn(reader, 1);
1269 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1270 return WC_E_XMLDECL;
1272 start = reader_get_cur(reader);
1273 /* skip 'yes'|'no' */
1274 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1275 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1276 TRACE("standalone=%s\n", debug_strval(reader, &val));
1278 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1279 return WC_E_QUOTE;
1280 /* skip "'"|'"' */
1281 reader_skipn(reader, 1);
1283 return reader_add_attr(reader, NULL, &name, &val);
1286 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1287 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1289 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1290 static const WCHAR declcloseW[] = {'?','>',0};
1291 HRESULT hr;
1293 /* check if we have "<?xml " */
1294 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1296 reader_skipn(reader, 5);
1297 hr = reader_parse_versioninfo(reader);
1298 if (FAILED(hr))
1299 return hr;
1301 hr = reader_parse_encdecl(reader);
1302 if (FAILED(hr))
1303 return hr;
1305 hr = reader_parse_sddecl(reader);
1306 if (FAILED(hr))
1307 return hr;
1309 reader_skipspaces(reader);
1310 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1311 reader_skipn(reader, 2);
1313 reader_inc_depth(reader);
1314 reader->nodetype = XmlNodeType_XmlDeclaration;
1315 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1316 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1317 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1319 return S_OK;
1322 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1323 static HRESULT reader_parse_comment(xmlreader *reader)
1325 WCHAR *ptr;
1326 UINT start;
1328 if (reader->resumestate == XmlReadResumeState_Comment)
1330 start = reader->resume[XmlReadResume_Body];
1331 ptr = reader_get_ptr(reader);
1333 else
1335 /* skip '<!--' */
1336 reader_skipn(reader, 4);
1337 reader_shrink(reader);
1338 ptr = reader_get_ptr(reader);
1339 start = reader_get_cur(reader);
1340 reader->nodetype = XmlNodeType_Comment;
1341 reader->resume[XmlReadResume_Body] = start;
1342 reader->resumestate = XmlReadResumeState_Comment;
1343 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1344 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1345 reader_set_strvalue(reader, StringValue_Value, NULL);
1348 /* will exit when there's no more data, it won't attempt to
1349 read more from stream */
1350 while (*ptr)
1352 if (ptr[0] == '-')
1354 if (ptr[1] == '-')
1356 if (ptr[2] == '>')
1358 strval value;
1360 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1361 TRACE("%s\n", debug_strval(reader, &value));
1363 /* skip rest of markup '->' */
1364 reader_skipn(reader, 3);
1366 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1367 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1368 reader_set_strvalue(reader, StringValue_Value, &value);
1369 reader->resume[XmlReadResume_Body] = 0;
1370 reader->resumestate = XmlReadResumeState_Initial;
1371 return S_OK;
1373 else
1374 return WC_E_COMMENT;
1378 reader_skipn(reader, 1);
1379 ptr++;
1382 return S_OK;
1385 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1386 static inline BOOL is_char(WCHAR ch)
1388 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1389 (ch >= 0x20 && ch <= 0xd7ff) ||
1390 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1391 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1392 (ch >= 0xe000 && ch <= 0xfffd);
1395 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1396 static inline BOOL is_pubchar(WCHAR ch)
1398 return (ch == ' ') ||
1399 (ch >= 'a' && ch <= 'z') ||
1400 (ch >= 'A' && ch <= 'Z') ||
1401 (ch >= '0' && ch <= '9') ||
1402 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1403 (ch == '=') || (ch == '?') ||
1404 (ch == '@') || (ch == '!') ||
1405 (ch >= '#' && ch <= '%') || /* #$% */
1406 (ch == '_') || (ch == '\r') || (ch == '\n');
1409 static inline BOOL is_namestartchar(WCHAR ch)
1411 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1412 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1413 (ch >= 0xc0 && ch <= 0xd6) ||
1414 (ch >= 0xd8 && ch <= 0xf6) ||
1415 (ch >= 0xf8 && ch <= 0x2ff) ||
1416 (ch >= 0x370 && ch <= 0x37d) ||
1417 (ch >= 0x37f && ch <= 0x1fff) ||
1418 (ch >= 0x200c && ch <= 0x200d) ||
1419 (ch >= 0x2070 && ch <= 0x218f) ||
1420 (ch >= 0x2c00 && ch <= 0x2fef) ||
1421 (ch >= 0x3001 && ch <= 0xd7ff) ||
1422 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1423 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1424 (ch >= 0xf900 && ch <= 0xfdcf) ||
1425 (ch >= 0xfdf0 && ch <= 0xfffd);
1428 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1429 static inline BOOL is_ncnamechar(WCHAR ch)
1431 return (ch >= 'A' && ch <= 'Z') ||
1432 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1433 (ch == '-') || (ch == '.') ||
1434 (ch >= '0' && ch <= '9') ||
1435 (ch == 0xb7) ||
1436 (ch >= 0xc0 && ch <= 0xd6) ||
1437 (ch >= 0xd8 && ch <= 0xf6) ||
1438 (ch >= 0xf8 && ch <= 0x2ff) ||
1439 (ch >= 0x300 && ch <= 0x36f) ||
1440 (ch >= 0x370 && ch <= 0x37d) ||
1441 (ch >= 0x37f && ch <= 0x1fff) ||
1442 (ch >= 0x200c && ch <= 0x200d) ||
1443 (ch >= 0x203f && ch <= 0x2040) ||
1444 (ch >= 0x2070 && ch <= 0x218f) ||
1445 (ch >= 0x2c00 && ch <= 0x2fef) ||
1446 (ch >= 0x3001 && ch <= 0xd7ff) ||
1447 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1448 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1449 (ch >= 0xf900 && ch <= 0xfdcf) ||
1450 (ch >= 0xfdf0 && ch <= 0xfffd);
1453 static inline BOOL is_namechar(WCHAR ch)
1455 return (ch == ':') || is_ncnamechar(ch);
1458 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1460 /* When we're on attribute always return attribute type, container node type is kept.
1461 Note that container is not necessarily an element, and attribute doesn't mean it's
1462 an attribute in XML spec terms. */
1463 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1466 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1467 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1468 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1469 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1470 [5] Name ::= NameStartChar (NameChar)* */
1471 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1473 WCHAR *ptr;
1474 UINT start;
1476 if (reader->resume[XmlReadResume_Name])
1478 start = reader->resume[XmlReadResume_Name];
1479 ptr = reader_get_ptr(reader);
1481 else
1483 ptr = reader_get_ptr(reader);
1484 start = reader_get_cur(reader);
1485 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1488 while (is_namechar(*ptr))
1490 reader_skipn(reader, 1);
1491 ptr = reader_get_ptr(reader);
1494 if (is_reader_pending(reader))
1496 reader->resume[XmlReadResume_Name] = start;
1497 return E_PENDING;
1499 else
1500 reader->resume[XmlReadResume_Name] = 0;
1502 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1503 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1505 return S_OK;
1508 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1509 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1511 static const WCHAR xmlW[] = {'x','m','l'};
1512 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1513 strval name;
1514 WCHAR *ptr;
1515 HRESULT hr;
1516 UINT i;
1518 hr = reader_parse_name(reader, &name);
1519 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1521 /* now that we got name check for illegal content */
1522 if (strval_eq(reader, &name, &xmlval))
1523 return WC_E_LEADINGXML;
1525 /* PITarget can't be a qualified name */
1526 ptr = reader_get_strptr(reader, &name);
1527 for (i = 0; i < name.len; i++)
1528 if (ptr[i] == ':')
1529 return i ? NC_E_NAMECOLON : WC_E_PI;
1531 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1532 *target = name;
1533 return S_OK;
1536 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1537 static HRESULT reader_parse_pi(xmlreader *reader)
1539 strval target;
1540 WCHAR *ptr;
1541 UINT start;
1542 HRESULT hr;
1544 switch (reader->resumestate)
1546 case XmlReadResumeState_Initial:
1547 /* skip '<?' */
1548 reader_skipn(reader, 2);
1549 reader_shrink(reader);
1550 reader->resumestate = XmlReadResumeState_PITarget;
1551 case XmlReadResumeState_PITarget:
1552 hr = reader_parse_pitarget(reader, &target);
1553 if (FAILED(hr)) return hr;
1554 reader_set_strvalue(reader, StringValue_LocalName, &target);
1555 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1556 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1557 reader->resumestate = XmlReadResumeState_PIBody;
1558 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1559 default:
1563 start = reader->resume[XmlReadResume_Body];
1564 ptr = reader_get_ptr(reader);
1565 while (*ptr)
1567 if (ptr[0] == '?')
1569 if (ptr[1] == '>')
1571 UINT cur = reader_get_cur(reader);
1572 strval value;
1574 /* strip all leading whitespace chars */
1575 while (start < cur)
1577 ptr = reader_get_ptr2(reader, start);
1578 if (!is_wchar_space(*ptr)) break;
1579 start++;
1582 reader_init_strvalue(start, cur-start, &value);
1584 /* skip '?>' */
1585 reader_skipn(reader, 2);
1586 TRACE("%s\n", debug_strval(reader, &value));
1587 reader->nodetype = XmlNodeType_ProcessingInstruction;
1588 reader->resumestate = XmlReadResumeState_Initial;
1589 reader->resume[XmlReadResume_Body] = 0;
1590 reader_set_strvalue(reader, StringValue_Value, &value);
1591 return S_OK;
1595 reader_skipn(reader, 1);
1596 ptr = reader_get_ptr(reader);
1599 return S_OK;
1602 /* This one is used to parse significant whitespace nodes, like in Misc production */
1603 static HRESULT reader_parse_whitespace(xmlreader *reader)
1605 switch (reader->resumestate)
1607 case XmlReadResumeState_Initial:
1608 reader_shrink(reader);
1609 reader->resumestate = XmlReadResumeState_Whitespace;
1610 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1611 reader->nodetype = XmlNodeType_Whitespace;
1612 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1613 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1614 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1615 /* fallthrough */
1616 case XmlReadResumeState_Whitespace:
1618 strval value;
1619 UINT start;
1621 reader_skipspaces(reader);
1622 if (is_reader_pending(reader)) return S_OK;
1624 start = reader->resume[XmlReadResume_Body];
1625 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1626 reader_set_strvalue(reader, StringValue_Value, &value);
1627 TRACE("%s\n", debug_strval(reader, &value));
1628 reader->resumestate = XmlReadResumeState_Initial;
1630 default:
1634 return S_OK;
1637 /* [27] Misc ::= Comment | PI | S */
1638 static HRESULT reader_parse_misc(xmlreader *reader)
1640 HRESULT hr = S_FALSE;
1642 if (reader->resumestate != XmlReadResumeState_Initial)
1644 hr = reader_more(reader);
1645 if (FAILED(hr)) return hr;
1647 /* finish current node */
1648 switch (reader->resumestate)
1650 case XmlReadResumeState_PITarget:
1651 case XmlReadResumeState_PIBody:
1652 return reader_parse_pi(reader);
1653 case XmlReadResumeState_Comment:
1654 return reader_parse_comment(reader);
1655 case XmlReadResumeState_Whitespace:
1656 return reader_parse_whitespace(reader);
1657 default:
1658 ERR("unknown resume state %d\n", reader->resumestate);
1662 while (1)
1664 const WCHAR *cur = reader_get_ptr(reader);
1666 if (is_wchar_space(*cur))
1667 hr = reader_parse_whitespace(reader);
1668 else if (!reader_cmp(reader, commentW))
1669 hr = reader_parse_comment(reader);
1670 else if (!reader_cmp(reader, piW))
1671 hr = reader_parse_pi(reader);
1672 else
1673 break;
1675 if (hr != S_FALSE) return hr;
1678 return hr;
1681 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1682 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1684 WCHAR *cur = reader_get_ptr(reader), quote;
1685 UINT start;
1687 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1689 quote = *cur;
1690 reader_skipn(reader, 1);
1692 cur = reader_get_ptr(reader);
1693 start = reader_get_cur(reader);
1694 while (is_char(*cur) && *cur != quote)
1696 reader_skipn(reader, 1);
1697 cur = reader_get_ptr(reader);
1699 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1700 if (*cur == quote) reader_skipn(reader, 1);
1702 TRACE("%s\n", debug_strval(reader, literal));
1703 return S_OK;
1706 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1707 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1708 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1710 WCHAR *cur = reader_get_ptr(reader), quote;
1711 UINT start;
1713 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1715 quote = *cur;
1716 reader_skipn(reader, 1);
1718 start = reader_get_cur(reader);
1719 cur = reader_get_ptr(reader);
1720 while (is_pubchar(*cur) && *cur != quote)
1722 reader_skipn(reader, 1);
1723 cur = reader_get_ptr(reader);
1725 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1726 if (*cur == quote) reader_skipn(reader, 1);
1728 TRACE("%s\n", debug_strval(reader, literal));
1729 return S_OK;
1732 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1733 static HRESULT reader_parse_externalid(xmlreader *reader)
1735 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1736 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1737 strval name, sys;
1738 HRESULT hr;
1739 int cnt;
1741 if (!reader_cmp(reader, publicW)) {
1742 strval pub;
1744 /* public id */
1745 reader_skipn(reader, 6);
1746 cnt = reader_skipspaces(reader);
1747 if (!cnt) return WC_E_WHITESPACE;
1749 hr = reader_parse_pub_literal(reader, &pub);
1750 if (FAILED(hr)) return hr;
1752 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1753 hr = reader_add_attr(reader, NULL, &name, &pub);
1754 if (FAILED(hr)) return hr;
1756 cnt = reader_skipspaces(reader);
1757 if (!cnt) return S_OK;
1759 /* optional system id */
1760 hr = reader_parse_sys_literal(reader, &sys);
1761 if (FAILED(hr)) return S_OK;
1763 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1764 hr = reader_add_attr(reader, NULL, &name, &sys);
1765 if (FAILED(hr)) return hr;
1767 return S_OK;
1768 } else if (!reader_cmp(reader, systemW)) {
1769 /* system id */
1770 reader_skipn(reader, 6);
1771 cnt = reader_skipspaces(reader);
1772 if (!cnt) return WC_E_WHITESPACE;
1774 hr = reader_parse_sys_literal(reader, &sys);
1775 if (FAILED(hr)) return hr;
1777 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1778 return reader_add_attr(reader, NULL, &name, &sys);
1781 return S_FALSE;
1784 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1785 static HRESULT reader_parse_dtd(xmlreader *reader)
1787 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1788 strval name;
1789 WCHAR *cur;
1790 HRESULT hr;
1792 /* check if we have "<!DOCTYPE" */
1793 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1794 reader_shrink(reader);
1796 /* DTD processing is not allowed by default */
1797 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1799 reader_skipn(reader, 9);
1800 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1802 /* name */
1803 hr = reader_parse_name(reader, &name);
1804 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1806 reader_skipspaces(reader);
1808 hr = reader_parse_externalid(reader);
1809 if (FAILED(hr)) return hr;
1811 reader_skipspaces(reader);
1813 cur = reader_get_ptr(reader);
1814 if (*cur != '>')
1816 FIXME("internal subset parsing not implemented\n");
1817 return E_NOTIMPL;
1820 /* skip '>' */
1821 reader_skipn(reader, 1);
1823 reader->nodetype = XmlNodeType_DocumentType;
1824 reader_set_strvalue(reader, StringValue_LocalName, &name);
1825 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1827 return S_OK;
1830 /* [11 NS] LocalPart ::= NCName */
1831 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1833 WCHAR *ptr;
1834 UINT start;
1836 if (reader->resume[XmlReadResume_Local])
1838 start = reader->resume[XmlReadResume_Local];
1839 ptr = reader_get_ptr(reader);
1841 else
1843 ptr = reader_get_ptr(reader);
1844 start = reader_get_cur(reader);
1847 while (is_ncnamechar(*ptr))
1849 reader_skipn(reader, 1);
1850 ptr = reader_get_ptr(reader);
1853 if (is_reader_pending(reader))
1855 reader->resume[XmlReadResume_Local] = start;
1856 return E_PENDING;
1858 else
1859 reader->resume[XmlReadResume_Local] = 0;
1861 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1863 return S_OK;
1866 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1867 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1868 [9 NS] UnprefixedName ::= LocalPart
1869 [10 NS] Prefix ::= NCName */
1870 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1872 WCHAR *ptr;
1873 UINT start;
1874 HRESULT hr;
1876 if (reader->resume[XmlReadResume_Name])
1878 start = reader->resume[XmlReadResume_Name];
1879 ptr = reader_get_ptr(reader);
1881 else
1883 ptr = reader_get_ptr(reader);
1884 start = reader_get_cur(reader);
1885 reader->resume[XmlReadResume_Name] = start;
1886 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1889 if (reader->resume[XmlReadResume_Local])
1891 hr = reader_parse_local(reader, local);
1892 if (FAILED(hr)) return hr;
1894 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1895 local->start - reader->resume[XmlReadResume_Name] - 1,
1896 prefix);
1898 else
1900 /* skip prefix part */
1901 while (is_ncnamechar(*ptr))
1903 reader_skipn(reader, 1);
1904 ptr = reader_get_ptr(reader);
1907 if (is_reader_pending(reader)) return E_PENDING;
1909 /* got a qualified name */
1910 if (*ptr == ':')
1912 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1914 /* skip ':' */
1915 reader_skipn(reader, 1);
1916 hr = reader_parse_local(reader, local);
1917 if (FAILED(hr)) return hr;
1919 else
1921 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1922 reader_init_strvalue(0, 0, prefix);
1926 if (prefix->len)
1927 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1928 else
1929 TRACE("ncname %s\n", debug_strval(reader, local));
1931 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1932 /* count ':' too */
1933 (prefix->len ? prefix->len + 1 : 0) + local->len,
1934 qname);
1936 reader->resume[XmlReadResume_Name] = 0;
1937 reader->resume[XmlReadResume_Local] = 0;
1939 return S_OK;
1942 /* Applies normalization rules to a single char, used for attribute values.
1944 Rules include 2 steps:
1946 1) replacing \r\n with a single \n;
1947 2) replacing all whitespace chars with ' '.
1950 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1952 encoded_buffer *buffer = &reader->input->buffer->utf16;
1954 if (!is_wchar_space(*ptr)) return;
1956 if (*ptr == '\r' && *(ptr+1) == '\n')
1958 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1959 memmove(ptr+1, ptr+2, len);
1961 *ptr = ' ';
1964 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1966 static const WCHAR entltW[] = {'l','t'};
1967 static const WCHAR entgtW[] = {'g','t'};
1968 static const WCHAR entampW[] = {'a','m','p'};
1969 static const WCHAR entaposW[] = {'a','p','o','s'};
1970 static const WCHAR entquotW[] = {'q','u','o','t'};
1971 static const strval lt = { (WCHAR*)entltW, 2 };
1972 static const strval gt = { (WCHAR*)entgtW, 2 };
1973 static const strval amp = { (WCHAR*)entampW, 3 };
1974 static const strval apos = { (WCHAR*)entaposW, 4 };
1975 static const strval quot = { (WCHAR*)entquotW, 4 };
1976 WCHAR *str = reader_get_strptr(reader, name);
1978 switch (*str)
1980 case 'l':
1981 if (strval_eq(reader, name, &lt)) return '<';
1982 break;
1983 case 'g':
1984 if (strval_eq(reader, name, &gt)) return '>';
1985 break;
1986 case 'a':
1987 if (strval_eq(reader, name, &amp))
1988 return '&';
1989 else if (strval_eq(reader, name, &apos))
1990 return '\'';
1991 break;
1992 case 'q':
1993 if (strval_eq(reader, name, &quot)) return '\"';
1994 break;
1995 default:
1999 return 0;
2002 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2003 [67] Reference ::= EntityRef | CharRef
2004 [68] EntityRef ::= '&' Name ';' */
2005 static HRESULT reader_parse_reference(xmlreader *reader)
2007 encoded_buffer *buffer = &reader->input->buffer->utf16;
2008 WCHAR *start = reader_get_ptr(reader), *ptr;
2009 UINT cur = reader_get_cur(reader);
2010 WCHAR ch = 0;
2011 int len;
2013 /* skip '&' */
2014 reader_skipn(reader, 1);
2015 ptr = reader_get_ptr(reader);
2017 if (*ptr == '#')
2019 reader_skipn(reader, 1);
2020 ptr = reader_get_ptr(reader);
2022 /* hex char or decimal */
2023 if (*ptr == 'x')
2025 reader_skipn(reader, 1);
2026 ptr = reader_get_ptr(reader);
2028 while (*ptr != ';')
2030 if ((*ptr >= '0' && *ptr <= '9'))
2031 ch = ch*16 + *ptr - '0';
2032 else if ((*ptr >= 'a' && *ptr <= 'f'))
2033 ch = ch*16 + *ptr - 'a' + 10;
2034 else if ((*ptr >= 'A' && *ptr <= 'F'))
2035 ch = ch*16 + *ptr - 'A' + 10;
2036 else
2037 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2038 reader_skipn(reader, 1);
2039 ptr = reader_get_ptr(reader);
2042 else
2044 while (*ptr != ';')
2046 if ((*ptr >= '0' && *ptr <= '9'))
2048 ch = ch*10 + *ptr - '0';
2049 reader_skipn(reader, 1);
2050 ptr = reader_get_ptr(reader);
2052 else
2053 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2057 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2059 /* normalize */
2060 if (is_wchar_space(ch)) ch = ' ';
2062 ptr = reader_get_ptr(reader);
2063 start = reader_get_ptr2(reader, cur);
2064 len = buffer->written - ((char *)ptr - buffer->data);
2065 memmove(start + 1, ptr + 1, len);
2067 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2068 buffer->cur = cur + 1;
2070 *start = ch;
2072 else
2074 strval name;
2075 HRESULT hr;
2077 hr = reader_parse_name(reader, &name);
2078 if (FAILED(hr)) return hr;
2080 ptr = reader_get_ptr(reader);
2081 if (*ptr != ';') return WC_E_SEMICOLON;
2083 /* predefined entities resolve to a single character */
2084 ch = get_predefined_entity(reader, &name);
2085 if (ch)
2087 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2088 memmove(start+1, ptr+1, len);
2089 buffer->cur = cur + 1;
2091 *start = ch;
2093 else
2095 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2096 return WC_E_UNDECLAREDENTITY;
2101 return S_OK;
2104 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2105 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2107 WCHAR *ptr, quote;
2108 UINT start;
2110 ptr = reader_get_ptr(reader);
2112 /* skip opening quote */
2113 quote = *ptr;
2114 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2115 reader_skipn(reader, 1);
2117 ptr = reader_get_ptr(reader);
2118 start = reader_get_cur(reader);
2119 while (*ptr)
2121 if (*ptr == '<') return WC_E_LESSTHAN;
2123 if (*ptr == quote)
2125 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2126 /* skip closing quote */
2127 reader_skipn(reader, 1);
2128 return S_OK;
2131 if (*ptr == '&')
2133 HRESULT hr = reader_parse_reference(reader);
2134 if (FAILED(hr)) return hr;
2136 else
2138 reader_normalize_space(reader, ptr);
2139 reader_skipn(reader, 1);
2141 ptr = reader_get_ptr(reader);
2144 return WC_E_QUOTE;
2147 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2148 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2149 [3 NS] DefaultAttName ::= 'xmlns'
2150 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2151 static HRESULT reader_parse_attribute(xmlreader *reader)
2153 strval prefix, local, qname, value;
2154 BOOL ns = FALSE, nsdef = FALSE;
2155 HRESULT hr;
2157 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2158 if (FAILED(hr)) return hr;
2160 if (strval_eq(reader, &prefix, &strval_xmlns))
2161 ns = TRUE;
2163 if (strval_eq(reader, &qname, &strval_xmlns))
2164 ns = nsdef = TRUE;
2166 hr = reader_parse_eq(reader);
2167 if (FAILED(hr)) return hr;
2169 hr = reader_parse_attvalue(reader, &value);
2170 if (FAILED(hr)) return hr;
2172 if (ns)
2173 reader_push_ns(reader, nsdef ? &strval_xmlns : &local, &value, nsdef);
2175 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2176 return reader_add_attr(reader, &prefix, &local, &value);
2179 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2180 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2181 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2183 HRESULT hr;
2185 hr = reader_parse_qname(reader, prefix, local, qname);
2186 if (FAILED(hr)) return hr;
2188 while (1)
2190 static const WCHAR endW[] = {'/','>',0};
2192 reader_skipspaces(reader);
2194 /* empty element */
2195 if ((*empty = !reader_cmp(reader, endW)))
2197 /* skip '/>' */
2198 reader_skipn(reader, 2);
2199 reader->is_empty_element = TRUE;
2200 reader->empty_element.prefix = *prefix;
2201 reader->empty_element.localname = *local;
2202 reader->empty_element.qname = *qname;
2203 reader_mark_ns_nodes(reader, &reader->empty_element);
2204 return S_OK;
2207 /* got a start tag */
2208 if (!reader_cmp(reader, gtW))
2210 /* skip '>' */
2211 reader_skipn(reader, 1);
2212 return reader_push_element(reader, prefix, local, qname);
2215 hr = reader_parse_attribute(reader);
2216 if (FAILED(hr)) return hr;
2219 return S_OK;
2222 /* [39] element ::= EmptyElemTag | STag content ETag */
2223 static HRESULT reader_parse_element(xmlreader *reader)
2225 HRESULT hr;
2227 switch (reader->resumestate)
2229 case XmlReadResumeState_Initial:
2230 /* check if we are really on element */
2231 if (reader_cmp(reader, ltW)) return S_FALSE;
2233 /* skip '<' */
2234 reader_skipn(reader, 1);
2236 reader_shrink(reader);
2237 reader->resumestate = XmlReadResumeState_STag;
2238 case XmlReadResumeState_STag:
2240 strval qname, prefix, local;
2241 int empty = 0;
2243 /* this handles empty elements too */
2244 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2245 if (FAILED(hr)) return hr;
2247 /* FIXME: need to check for defined namespace to reject invalid prefix */
2249 /* if we got empty element and stack is empty go straight to Misc */
2250 if (empty && list_empty(&reader->elements))
2251 reader->instate = XmlReadInState_MiscEnd;
2252 else
2253 reader->instate = XmlReadInState_Content;
2255 reader->nodetype = XmlNodeType_Element;
2256 reader->resumestate = XmlReadResumeState_Initial;
2257 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2258 reader_set_strvalue(reader, StringValue_LocalName, &local);
2259 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2260 break;
2262 default:
2263 hr = E_FAIL;
2266 return hr;
2269 /* [13 NS] ETag ::= '</' QName S? '>' */
2270 static HRESULT reader_parse_endtag(xmlreader *reader)
2272 strval prefix, local, qname;
2273 struct element *elem;
2274 HRESULT hr;
2276 /* skip '</' */
2277 reader_skipn(reader, 2);
2279 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2280 if (FAILED(hr)) return hr;
2282 reader_skipspaces(reader);
2284 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2286 /* skip '>' */
2287 reader_skipn(reader, 1);
2289 /* Element stack should never be empty at this point, cause we shouldn't get to
2290 content parsing if it's empty. */
2291 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2292 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2294 reader->nodetype = XmlNodeType_EndElement;
2295 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2296 reader_set_strvalue(reader, StringValue_LocalName, &local);
2297 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2299 return S_OK;
2302 /* [18] CDSect ::= CDStart CData CDEnd
2303 [19] CDStart ::= '<![CDATA['
2304 [20] CData ::= (Char* - (Char* ']]>' Char*))
2305 [21] CDEnd ::= ']]>' */
2306 static HRESULT reader_parse_cdata(xmlreader *reader)
2308 WCHAR *ptr;
2309 UINT start;
2311 if (reader->resumestate == XmlReadResumeState_CDATA)
2313 start = reader->resume[XmlReadResume_Body];
2314 ptr = reader_get_ptr(reader);
2316 else
2318 /* skip markup '<![CDATA[' */
2319 reader_skipn(reader, 9);
2320 reader_shrink(reader);
2321 ptr = reader_get_ptr(reader);
2322 start = reader_get_cur(reader);
2323 reader->nodetype = XmlNodeType_CDATA;
2324 reader->resume[XmlReadResume_Body] = start;
2325 reader->resumestate = XmlReadResumeState_CDATA;
2326 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2327 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2328 reader_set_strvalue(reader, StringValue_Value, NULL);
2331 while (*ptr)
2333 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2335 strval value;
2337 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2339 /* skip ']]>' */
2340 reader_skipn(reader, 3);
2341 TRACE("%s\n", debug_strval(reader, &value));
2343 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2344 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2345 reader_set_strvalue(reader, StringValue_Value, &value);
2346 reader->resume[XmlReadResume_Body] = 0;
2347 reader->resumestate = XmlReadResumeState_Initial;
2348 return S_OK;
2350 else
2352 /* Value normalization is not fully implemented, rules are:
2354 - single '\r' -> '\n';
2355 - sequence '\r\n' -> '\n', in this case value length changes;
2357 if (*ptr == '\r') *ptr = '\n';
2358 reader_skipn(reader, 1);
2359 ptr++;
2363 return S_OK;
2366 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2367 static HRESULT reader_parse_chardata(xmlreader *reader)
2369 WCHAR *ptr;
2370 UINT start;
2372 if (reader->resumestate == XmlReadResumeState_CharData)
2374 start = reader->resume[XmlReadResume_Body];
2375 ptr = reader_get_ptr(reader);
2377 else
2379 reader_shrink(reader);
2380 ptr = reader_get_ptr(reader);
2381 start = reader_get_cur(reader);
2382 /* There's no text */
2383 if (!*ptr || *ptr == '<') return S_OK;
2384 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2385 reader->resume[XmlReadResume_Body] = start;
2386 reader->resumestate = XmlReadResumeState_CharData;
2387 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2388 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2389 reader_set_strvalue(reader, StringValue_Value, NULL);
2392 while (*ptr)
2394 static const WCHAR ampW[] = {'&',0};
2396 /* CDATA closing sequence ']]>' is not allowed */
2397 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2398 return WC_E_CDSECTEND;
2400 /* Found next markup part */
2401 if (ptr[0] == '<')
2403 strval value;
2405 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2406 reader_set_strvalue(reader, StringValue_Value, &value);
2407 reader->resume[XmlReadResume_Body] = 0;
2408 reader->resumestate = XmlReadResumeState_Initial;
2409 return S_OK;
2412 /* this covers a case when text has leading whitespace chars */
2413 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2415 if (!reader_cmp(reader, ampW))
2416 reader_parse_reference(reader);
2417 else
2418 reader_skipn(reader, 1);
2420 ptr = reader_get_ptr(reader);
2423 return S_OK;
2426 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2427 static HRESULT reader_parse_content(xmlreader *reader)
2429 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2430 static const WCHAR etagW[] = {'<','/',0};
2432 if (reader->resumestate != XmlReadResumeState_Initial)
2434 switch (reader->resumestate)
2436 case XmlReadResumeState_CDATA:
2437 return reader_parse_cdata(reader);
2438 case XmlReadResumeState_Comment:
2439 return reader_parse_comment(reader);
2440 case XmlReadResumeState_PIBody:
2441 case XmlReadResumeState_PITarget:
2442 return reader_parse_pi(reader);
2443 case XmlReadResumeState_CharData:
2444 return reader_parse_chardata(reader);
2445 default:
2446 ERR("unknown resume state %d\n", reader->resumestate);
2450 reader_shrink(reader);
2452 /* handle end tag here, it indicates end of content as well */
2453 if (!reader_cmp(reader, etagW))
2454 return reader_parse_endtag(reader);
2456 if (!reader_cmp(reader, commentW))
2457 return reader_parse_comment(reader);
2459 if (!reader_cmp(reader, piW))
2460 return reader_parse_pi(reader);
2462 if (!reader_cmp(reader, cdstartW))
2463 return reader_parse_cdata(reader);
2465 if (!reader_cmp(reader, ltW))
2466 return reader_parse_element(reader);
2468 /* what's left must be CharData */
2469 return reader_parse_chardata(reader);
2472 static HRESULT reader_parse_nextnode(xmlreader *reader)
2474 XmlNodeType nodetype = reader_get_nodetype(reader);
2475 HRESULT hr;
2477 if (!is_reader_pending(reader))
2478 reader_clear_attrs(reader);
2480 /* When moving from EndElement or empty element, pop its own namespace definitions */
2481 if (nodetype == XmlNodeType_Element && reader->is_empty_element)
2482 reader_pop_ns_nodes(reader, &reader->empty_element);
2483 else if (nodetype == XmlNodeType_EndElement)
2484 reader_pop_element(reader);
2486 while (1)
2488 switch (reader->instate)
2490 /* if it's a first call for a new input we need to detect stream encoding */
2491 case XmlReadInState_Initial:
2493 xml_encoding enc;
2495 hr = readerinput_growraw(reader->input);
2496 if (FAILED(hr)) return hr;
2498 /* try to detect encoding by BOM or data and set input code page */
2499 hr = readerinput_detectencoding(reader->input, &enc);
2500 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2501 debugstr_w(xml_encoding_map[enc].name), hr);
2502 if (FAILED(hr)) return hr;
2504 /* always switch first time cause we have to put something in */
2505 readerinput_switchencoding(reader->input, enc);
2507 /* parse xml declaration */
2508 hr = reader_parse_xmldecl(reader);
2509 if (FAILED(hr)) return hr;
2511 readerinput_shrinkraw(reader->input, -1);
2512 reader->instate = XmlReadInState_Misc_DTD;
2513 if (hr == S_OK) return hr;
2515 break;
2516 case XmlReadInState_Misc_DTD:
2517 hr = reader_parse_misc(reader);
2518 if (FAILED(hr)) return hr;
2520 if (hr == S_FALSE)
2521 reader->instate = XmlReadInState_DTD;
2522 else
2523 return hr;
2524 break;
2525 case XmlReadInState_DTD:
2526 hr = reader_parse_dtd(reader);
2527 if (FAILED(hr)) return hr;
2529 if (hr == S_OK)
2531 reader->instate = XmlReadInState_DTD_Misc;
2532 return hr;
2534 else
2535 reader->instate = XmlReadInState_Element;
2536 break;
2537 case XmlReadInState_DTD_Misc:
2538 hr = reader_parse_misc(reader);
2539 if (FAILED(hr)) return hr;
2541 if (hr == S_FALSE)
2542 reader->instate = XmlReadInState_Element;
2543 else
2544 return hr;
2545 break;
2546 case XmlReadInState_Element:
2547 return reader_parse_element(reader);
2548 case XmlReadInState_Content:
2549 return reader_parse_content(reader);
2550 case XmlReadInState_MiscEnd:
2551 hr = reader_parse_misc(reader);
2552 if (FAILED(hr)) return hr;
2554 if (hr == S_FALSE)
2556 reader->instate = XmlReadInState_Eof;
2557 reader->nodetype = XmlNodeType_None;
2559 return hr;
2560 case XmlReadInState_Eof:
2561 return S_FALSE;
2562 default:
2563 FIXME("internal state %d not handled\n", reader->instate);
2564 return E_NOTIMPL;
2568 return E_NOTIMPL;
2571 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2573 xmlreader *This = impl_from_IXmlReader(iface);
2575 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2577 if (IsEqualGUID(riid, &IID_IUnknown) ||
2578 IsEqualGUID(riid, &IID_IXmlReader))
2580 *ppvObject = iface;
2582 else
2584 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2585 *ppvObject = NULL;
2586 return E_NOINTERFACE;
2589 IXmlReader_AddRef(iface);
2591 return S_OK;
2594 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2596 xmlreader *This = impl_from_IXmlReader(iface);
2597 ULONG ref = InterlockedIncrement(&This->ref);
2598 TRACE("(%p)->(%d)\n", This, ref);
2599 return ref;
2602 static void reader_clear_ns(xmlreader *reader)
2604 struct ns *ns, *ns2;
2606 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2607 reader_free_strvalued(reader, &ns->prefix);
2608 reader_free_strvalued(reader, &ns->uri);
2609 reader_free(reader, ns);
2612 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2613 reader_free_strvalued(reader, &ns->uri);
2614 reader_free(reader, ns);
2618 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2620 xmlreader *This = impl_from_IXmlReader(iface);
2621 LONG ref = InterlockedDecrement(&This->ref);
2623 TRACE("(%p)->(%d)\n", This, ref);
2625 if (ref == 0)
2627 IMalloc *imalloc = This->imalloc;
2628 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2629 if (This->resolver) IXmlResolver_Release(This->resolver);
2630 if (This->mlang) IUnknown_Release(This->mlang);
2631 reader_clear_attrs(This);
2632 reader_clear_ns(This);
2633 reader_clear_elements(This);
2634 reader_free_strvalues(This);
2635 reader_free(This, This);
2636 if (imalloc) IMalloc_Release(imalloc);
2639 return ref;
2642 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2644 xmlreader *This = impl_from_IXmlReader(iface);
2645 IXmlReaderInput *readerinput;
2646 HRESULT hr;
2648 TRACE("(%p)->(%p)\n", This, input);
2650 if (This->input)
2652 readerinput_release_stream(This->input);
2653 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2654 This->input = NULL;
2657 This->line = This->pos = 0;
2658 reader_clear_elements(This);
2659 This->depth = 0;
2660 This->resumestate = XmlReadResumeState_Initial;
2661 memset(This->resume, 0, sizeof(This->resume));
2663 /* just reset current input */
2664 if (!input)
2666 This->state = XmlReadState_Initial;
2667 return S_OK;
2670 /* now try IXmlReaderInput, ISequentialStream, IStream */
2671 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2672 if (hr == S_OK)
2674 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2675 This->input = impl_from_IXmlReaderInput(readerinput);
2676 else
2678 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2679 readerinput, readerinput->lpVtbl);
2680 IUnknown_Release(readerinput);
2681 return E_FAIL;
2686 if (hr != S_OK || !readerinput)
2688 /* create IXmlReaderInput basing on supplied interface */
2689 hr = CreateXmlReaderInputWithEncodingName(input,
2690 This->imalloc, NULL, FALSE, NULL, &readerinput);
2691 if (hr != S_OK) return hr;
2692 This->input = impl_from_IXmlReaderInput(readerinput);
2695 /* set stream for supplied IXmlReaderInput */
2696 hr = readerinput_query_for_stream(This->input);
2697 if (hr == S_OK)
2699 This->state = XmlReadState_Initial;
2700 This->instate = XmlReadInState_Initial;
2703 return hr;
2706 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2708 xmlreader *This = impl_from_IXmlReader(iface);
2710 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2712 if (!value) return E_INVALIDARG;
2714 switch (property)
2716 case XmlReaderProperty_MultiLanguage:
2717 *value = (LONG_PTR)This->mlang;
2718 if (This->mlang)
2719 IUnknown_AddRef(This->mlang);
2720 break;
2721 case XmlReaderProperty_XmlResolver:
2722 *value = (LONG_PTR)This->resolver;
2723 if (This->resolver)
2724 IXmlResolver_AddRef(This->resolver);
2725 break;
2726 case XmlReaderProperty_DtdProcessing:
2727 *value = This->dtdmode;
2728 break;
2729 case XmlReaderProperty_ReadState:
2730 *value = This->state;
2731 break;
2732 default:
2733 FIXME("Unimplemented property (%u)\n", property);
2734 return E_NOTIMPL;
2737 return S_OK;
2740 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2742 xmlreader *This = impl_from_IXmlReader(iface);
2744 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2746 switch (property)
2748 case XmlReaderProperty_MultiLanguage:
2749 if (This->mlang)
2750 IUnknown_Release(This->mlang);
2751 This->mlang = (IUnknown*)value;
2752 if (This->mlang)
2753 IUnknown_AddRef(This->mlang);
2754 if (This->mlang)
2755 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2756 break;
2757 case XmlReaderProperty_XmlResolver:
2758 if (This->resolver)
2759 IXmlResolver_Release(This->resolver);
2760 This->resolver = (IXmlResolver*)value;
2761 if (This->resolver)
2762 IXmlResolver_AddRef(This->resolver);
2763 break;
2764 case XmlReaderProperty_DtdProcessing:
2765 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2766 This->dtdmode = value;
2767 break;
2768 case XmlReaderProperty_MaxElementDepth:
2769 FIXME("Ignoring MaxElementDepth %ld\n", value);
2770 break;
2771 default:
2772 FIXME("Unimplemented property (%u)\n", property);
2773 return E_NOTIMPL;
2776 return S_OK;
2779 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2781 xmlreader *This = impl_from_IXmlReader(iface);
2782 XmlNodeType oldtype = This->nodetype;
2783 HRESULT hr;
2785 TRACE("(%p)->(%p)\n", This, nodetype);
2787 if (This->state == XmlReadState_Closed) return S_FALSE;
2789 hr = reader_parse_nextnode(This);
2790 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2791 This->state = XmlReadState_Interactive;
2793 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2794 if (nodetype)
2795 *nodetype = This->nodetype;
2797 return hr;
2800 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2802 xmlreader *This = impl_from_IXmlReader(iface);
2804 TRACE("(%p)->(%p)\n", This, node_type);
2806 if (!node_type)
2807 return E_INVALIDARG;
2809 *node_type = reader_get_nodetype(This);
2810 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2813 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2815 if (!reader->attr_count)
2816 return S_FALSE;
2818 reader->attr = LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry);
2819 reader_set_strvalue(reader, StringValue_Prefix, &reader->attr->prefix);
2820 reader_set_strvalue(reader, StringValue_LocalName, &reader->attr->localname);
2821 reader_set_strvalue(reader, StringValue_Value, &reader->attr->value);
2823 return S_OK;
2826 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2828 xmlreader *This = impl_from_IXmlReader(iface);
2830 TRACE("(%p)\n", This);
2832 return reader_move_to_first_attribute(This);
2835 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2837 xmlreader *This = impl_from_IXmlReader(iface);
2838 const struct list *next;
2840 TRACE("(%p)\n", This);
2842 if (!This->attr_count) return S_FALSE;
2844 if (!This->attr)
2845 return reader_move_to_first_attribute(This);
2847 next = list_next(&This->attrs, &This->attr->entry);
2848 if (next)
2850 This->attr = LIST_ENTRY(next, struct attribute, entry);
2851 reader_set_strvalue(This, StringValue_Prefix, &This->attr->prefix);
2852 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2853 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2856 return next ? S_OK : S_FALSE;
2859 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2860 LPCWSTR local_name,
2861 LPCWSTR namespaceUri)
2863 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2864 return E_NOTIMPL;
2867 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2869 xmlreader *This = impl_from_IXmlReader(iface);
2871 TRACE("(%p)\n", This);
2873 if (!This->attr_count) return S_FALSE;
2874 This->attr = NULL;
2876 /* FIXME: support other node types with 'attributes' like DTD */
2877 if (This->is_empty_element) {
2878 reader_set_strvalue(This, StringValue_LocalName, &This->empty_element.localname);
2879 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
2881 else {
2882 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2883 if (element) {
2884 reader_set_strvalue(This, StringValue_LocalName, &element->localname);
2885 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
2889 return S_OK;
2892 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2894 xmlreader *This = impl_from_IXmlReader(iface);
2896 TRACE("(%p)->(%p %p)\n", This, name, len);
2897 *name = This->strvalues[StringValue_QualifiedName].str;
2898 if (len) *len = This->strvalues[StringValue_QualifiedName].len;
2899 return S_OK;
2902 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
2904 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
2905 struct ns *ns;
2907 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
2908 if (strval_eq(reader, prefix, &ns->prefix))
2909 return ns;
2912 return NULL;
2915 static struct ns *reader_lookup_nsdef(xmlreader *reader)
2917 if (list_empty(&reader->nsdef))
2918 return NULL;
2920 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
2923 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
2925 xmlreader *This = impl_from_IXmlReader(iface);
2926 const strval *prefix = &This->strvalues[StringValue_Prefix];
2927 XmlNodeType nodetype;
2928 struct ns *ns;
2929 UINT length;
2931 TRACE("(%p %p %p)\n", iface, uri, len);
2933 if (!len)
2934 len = &length;
2936 *uri = NULL;
2937 *len = 0;
2939 switch ((nodetype = reader_get_nodetype(This)))
2941 case XmlNodeType_Attribute:
2943 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2944 '2','0','0','0','/','x','m','l','n','s','/',0};
2945 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2946 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
2947 const strval *local = &This->strvalues[StringValue_LocalName];
2949 /* check for reserved prefixes first */
2950 if ((strval_eq(This, prefix, &strval_empty) && strval_eq(This, local, &strval_xmlns)) ||
2951 strval_eq(This, prefix, &strval_xmlns))
2953 *uri = xmlns_uriW;
2954 *len = sizeof(xmlns_uriW)/sizeof(xmlns_uriW[0]) - 1;
2956 else if (strval_eq(This, prefix, &strval_xml)) {
2957 *uri = xml_uriW;
2958 *len = sizeof(xml_uriW)/sizeof(xml_uriW[0]) - 1;
2961 if (!*uri) {
2962 ns = reader_lookup_ns(This, prefix);
2963 if (ns) {
2964 *uri = ns->uri.str;
2965 *len = ns->uri.len;
2967 else {
2968 *uri = emptyW;
2969 *len = 0;
2973 break;
2974 case XmlNodeType_Element:
2975 case XmlNodeType_EndElement:
2977 ns = reader_lookup_ns(This, prefix);
2979 /* pick top default ns if any */
2980 if (!ns)
2981 ns = reader_lookup_nsdef(This);
2983 if (ns) {
2984 *uri = ns->uri.str;
2985 *len = ns->uri.len;
2987 else {
2988 *uri = emptyW;
2989 *len = 0;
2992 break;
2993 default:
2994 FIXME("Unhandled node type %d\n", nodetype);
2995 return E_NOTIMPL;
2998 return S_OK;
3001 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3003 xmlreader *This = impl_from_IXmlReader(iface);
3005 TRACE("(%p)->(%p %p)\n", This, name, len);
3006 *name = This->strvalues[StringValue_LocalName].str;
3007 if (len) *len = This->strvalues[StringValue_LocalName].len;
3008 return S_OK;
3011 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
3013 xmlreader *This = impl_from_IXmlReader(iface);
3015 TRACE("(%p)->(%p %p)\n", This, prefix, len);
3016 *prefix = This->strvalues[StringValue_Prefix].str;
3017 if (len) *len = This->strvalues[StringValue_Prefix].len;
3018 return S_OK;
3021 static BOOL is_namespace_definition(xmlreader *reader)
3023 const strval *local = &reader->strvalues[StringValue_LocalName];
3024 const strval *prefix = &reader->strvalues[StringValue_Prefix];
3026 if (reader_get_nodetype(reader) != XmlNodeType_Attribute)
3027 return FALSE;
3029 return ((strval_eq(reader, prefix, &strval_empty) && strval_eq(reader, local, &strval_xmlns)) ||
3030 strval_eq(reader, prefix, &strval_xmlns));
3033 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3035 xmlreader *reader = impl_from_IXmlReader(iface);
3036 strval *val = &reader->strvalues[StringValue_Value];
3038 TRACE("(%p)->(%p %p)\n", reader, value, len);
3040 *value = NULL;
3042 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
3044 XmlNodeType type;
3045 HRESULT hr;
3047 hr = IXmlReader_Read(iface, &type);
3048 if (FAILED(hr)) return hr;
3050 /* return if still pending, partially read values are not reported */
3051 if (is_reader_pending(reader)) return E_PENDING;
3054 if (!val->str)
3056 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3057 if (!ptr) return E_OUTOFMEMORY;
3058 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3059 ptr[val->len] = 0;
3060 val->str = ptr;
3063 /* For namespace definition attributes return values from namespace list */
3064 if (is_namespace_definition(reader)) {
3065 const strval *local = &reader->strvalues[StringValue_LocalName];
3066 struct ns *ns;
3068 ns = reader_lookup_ns(reader, local);
3069 if (!ns)
3070 ns = reader_lookup_nsdef(reader);
3072 val = &ns->uri;
3075 *value = val->str;
3076 if (len) *len = val->len;
3077 return S_OK;
3080 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3082 xmlreader *reader = impl_from_IXmlReader(iface);
3083 strval *val = &reader->strvalues[StringValue_Value];
3084 UINT len;
3086 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3088 /* Value is already allocated, chunked reads are not possible. */
3089 if (val->str) return S_FALSE;
3091 if (val->len)
3093 len = min(chunk_size, val->len);
3094 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
3095 val->start += len;
3096 val->len -= len;
3097 if (read) *read = len;
3100 return S_OK;
3103 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3104 LPCWSTR *baseUri,
3105 UINT *baseUri_length)
3107 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3108 return E_NOTIMPL;
3111 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3113 FIXME("(%p): stub\n", iface);
3114 return FALSE;
3117 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3119 xmlreader *This = impl_from_IXmlReader(iface);
3120 TRACE("(%p)\n", This);
3121 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3122 when current node is start tag of an element */
3123 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3126 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
3128 xmlreader *This = impl_from_IXmlReader(iface);
3130 TRACE("(%p %p)\n", This, lineNumber);
3132 if (!lineNumber) return E_INVALIDARG;
3134 *lineNumber = This->line;
3136 return S_OK;
3139 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
3141 xmlreader *This = impl_from_IXmlReader(iface);
3143 TRACE("(%p %p)\n", This, linePosition);
3145 if (!linePosition) return E_INVALIDARG;
3147 *linePosition = This->pos;
3149 return S_OK;
3152 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3154 xmlreader *This = impl_from_IXmlReader(iface);
3156 TRACE("(%p)->(%p)\n", This, count);
3158 if (!count) return E_INVALIDARG;
3160 *count = This->attr_count;
3161 return S_OK;
3164 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3166 xmlreader *This = impl_from_IXmlReader(iface);
3167 TRACE("(%p)->(%p)\n", This, depth);
3168 *depth = This->depth;
3169 return S_OK;
3172 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3174 FIXME("(%p): stub\n", iface);
3175 return FALSE;
3178 static const struct IXmlReaderVtbl xmlreader_vtbl =
3180 xmlreader_QueryInterface,
3181 xmlreader_AddRef,
3182 xmlreader_Release,
3183 xmlreader_SetInput,
3184 xmlreader_GetProperty,
3185 xmlreader_SetProperty,
3186 xmlreader_Read,
3187 xmlreader_GetNodeType,
3188 xmlreader_MoveToFirstAttribute,
3189 xmlreader_MoveToNextAttribute,
3190 xmlreader_MoveToAttributeByName,
3191 xmlreader_MoveToElement,
3192 xmlreader_GetQualifiedName,
3193 xmlreader_GetNamespaceUri,
3194 xmlreader_GetLocalName,
3195 xmlreader_GetPrefix,
3196 xmlreader_GetValue,
3197 xmlreader_ReadValueChunk,
3198 xmlreader_GetBaseUri,
3199 xmlreader_IsDefault,
3200 xmlreader_IsEmptyElement,
3201 xmlreader_GetLineNumber,
3202 xmlreader_GetLinePosition,
3203 xmlreader_GetAttributeCount,
3204 xmlreader_GetDepth,
3205 xmlreader_IsEOF
3208 /** IXmlReaderInput **/
3209 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3211 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3213 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3215 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3216 IsEqualGUID(riid, &IID_IUnknown))
3218 *ppvObject = iface;
3220 else
3222 WARN("interface %s not implemented\n", debugstr_guid(riid));
3223 *ppvObject = NULL;
3224 return E_NOINTERFACE;
3227 IUnknown_AddRef(iface);
3229 return S_OK;
3232 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3234 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3235 ULONG ref = InterlockedIncrement(&This->ref);
3236 TRACE("(%p)->(%d)\n", This, ref);
3237 return ref;
3240 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3242 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3243 LONG ref = InterlockedDecrement(&This->ref);
3245 TRACE("(%p)->(%d)\n", This, ref);
3247 if (ref == 0)
3249 IMalloc *imalloc = This->imalloc;
3250 if (This->input) IUnknown_Release(This->input);
3251 if (This->stream) ISequentialStream_Release(This->stream);
3252 if (This->buffer) free_input_buffer(This->buffer);
3253 readerinput_free(This, This->baseuri);
3254 readerinput_free(This, This);
3255 if (imalloc) IMalloc_Release(imalloc);
3258 return ref;
3261 static const struct IUnknownVtbl xmlreaderinputvtbl =
3263 xmlreaderinput_QueryInterface,
3264 xmlreaderinput_AddRef,
3265 xmlreaderinput_Release
3268 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3270 xmlreader *reader;
3271 int i;
3273 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3275 if (!IsEqualGUID(riid, &IID_IXmlReader))
3277 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
3278 return E_FAIL;
3281 if (imalloc)
3282 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3283 else
3284 reader = heap_alloc(sizeof(*reader));
3285 if(!reader) return E_OUTOFMEMORY;
3287 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3288 reader->ref = 1;
3289 reader->input = NULL;
3290 reader->state = XmlReadState_Closed;
3291 reader->instate = XmlReadInState_Initial;
3292 reader->resumestate = XmlReadResumeState_Initial;
3293 reader->dtdmode = DtdProcessing_Prohibit;
3294 reader->resolver = NULL;
3295 reader->mlang = NULL;
3296 reader->line = reader->pos = 0;
3297 reader->imalloc = imalloc;
3298 if (imalloc) IMalloc_AddRef(imalloc);
3299 reader->nodetype = XmlNodeType_None;
3300 list_init(&reader->attrs);
3301 reader->attr_count = 0;
3302 reader->attr = NULL;
3303 list_init(&reader->nsdef);
3304 list_init(&reader->ns);
3305 list_init(&reader->elements);
3306 reader->depth = 0;
3307 reader->max_depth = 256;
3308 reader->is_empty_element = FALSE;
3309 memset(reader->resume, 0, sizeof(reader->resume));
3311 for (i = 0; i < StringValue_Last; i++)
3312 reader->strvalues[i] = strval_empty;
3314 *obj = &reader->IXmlReader_iface;
3316 TRACE("returning iface %p\n", *obj);
3318 return S_OK;
3321 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3322 IMalloc *imalloc,
3323 LPCWSTR encoding,
3324 BOOL hint,
3325 LPCWSTR base_uri,
3326 IXmlReaderInput **ppInput)
3328 xmlreaderinput *readerinput;
3329 HRESULT hr;
3331 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3332 hint, wine_dbgstr_w(base_uri), ppInput);
3334 if (!stream || !ppInput) return E_INVALIDARG;
3336 if (imalloc)
3337 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3338 else
3339 readerinput = heap_alloc(sizeof(*readerinput));
3340 if(!readerinput) return E_OUTOFMEMORY;
3342 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3343 readerinput->ref = 1;
3344 readerinput->imalloc = imalloc;
3345 readerinput->stream = NULL;
3346 if (imalloc) IMalloc_AddRef(imalloc);
3347 readerinput->encoding = parse_encoding_name(encoding, -1);
3348 readerinput->hint = hint;
3349 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3350 readerinput->pending = 0;
3352 hr = alloc_input_buffer(readerinput);
3353 if (hr != S_OK)
3355 readerinput_free(readerinput, readerinput->baseuri);
3356 readerinput_free(readerinput, readerinput);
3357 if (imalloc) IMalloc_Release(imalloc);
3358 return hr;
3360 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3362 *ppInput = &readerinput->IXmlReaderInput_iface;
3364 TRACE("returning iface %p\n", *ppInput);
3366 return S_OK;