Release 2.9.
[wine.git] / dlls / xmllite / reader.c
blob7f127f614f23104564ba66897b33f9c88d298f3e
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "initguid.h"
29 #include "objbase.h"
30 #include "xmllite.h"
31 #include "xmllite_private.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
35 #include "wine/unicode.h"
37 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
39 /* not defined in public headers */
40 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
42 typedef enum
44 XmlReadInState_Initial,
45 XmlReadInState_XmlDecl,
46 XmlReadInState_Misc_DTD,
47 XmlReadInState_DTD,
48 XmlReadInState_DTD_Misc,
49 XmlReadInState_Element,
50 XmlReadInState_Content,
51 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
52 XmlReadInState_Eof
53 } XmlReaderInternalState;
55 /* This state denotes where parsing was interrupted by input problem.
56 Reader resumes parsing using this information. */
57 typedef enum
59 XmlReadResumeState_Initial,
60 XmlReadResumeState_PITarget,
61 XmlReadResumeState_PIBody,
62 XmlReadResumeState_CDATA,
63 XmlReadResumeState_Comment,
64 XmlReadResumeState_STag,
65 XmlReadResumeState_CharData,
66 XmlReadResumeState_Whitespace
67 } XmlReaderResumeState;
69 /* saved pointer index to resume from particular input position */
70 typedef enum
72 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
73 XmlReadResume_Local, /* local for QName */
74 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
75 XmlReadResume_Last
76 } XmlReaderResume;
78 typedef enum
80 StringValue_LocalName,
81 StringValue_Prefix,
82 StringValue_QualifiedName,
83 StringValue_Value,
84 StringValue_Last
85 } XmlReaderStringValue;
87 static const WCHAR usasciiW[] = {'U','S','-','A','S','C','I','I',0};
88 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
89 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
91 static const WCHAR dblquoteW[] = {'\"',0};
92 static const WCHAR quoteW[] = {'\'',0};
93 static const WCHAR ltW[] = {'<',0};
94 static const WCHAR gtW[] = {'>',0};
95 static const WCHAR commentW[] = {'<','!','-','-',0};
96 static const WCHAR piW[] = {'<','?',0};
98 static BOOL is_namestartchar(WCHAR ch);
100 static const char *debugstr_nodetype(XmlNodeType nodetype)
102 static const char * const type_names[] =
104 "None",
105 "Element",
106 "Attribute",
107 "Text",
108 "CDATA",
111 "ProcessingInstruction",
112 "Comment",
114 "DocumentType",
117 "Whitespace",
119 "EndElement",
121 "XmlDeclaration"
124 if (nodetype > _XmlNodeType_Last)
125 return wine_dbg_sprintf("unknown type=%d", nodetype);
127 return type_names[nodetype];
130 static const char *debugstr_reader_prop(XmlReaderProperty prop)
132 static const char * const prop_names[] =
134 "MultiLanguage",
135 "ConformanceLevel",
136 "RandomAccess",
137 "XmlResolver",
138 "DtdProcessing",
139 "ReadState",
140 "MaxElementDepth",
141 "MaxEntityExpansion"
144 if (prop > _XmlReaderProperty_Last)
145 return wine_dbg_sprintf("unknown property=%d", prop);
147 return prop_names[prop];
150 struct xml_encoding_data
152 const WCHAR *name;
153 xml_encoding enc;
154 UINT cp;
157 static const struct xml_encoding_data xml_encoding_map[] = {
158 { usasciiW, XmlEncoding_USASCII, 20127 },
159 { utf16W, XmlEncoding_UTF16, ~0 },
160 { utf8W, XmlEncoding_UTF8, CP_UTF8 },
163 const WCHAR *get_encoding_name(xml_encoding encoding)
165 return xml_encoding_map[encoding].name;
168 xml_encoding get_encoding_from_codepage(UINT codepage)
170 int i;
171 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
173 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
175 return XmlEncoding_Unknown;
178 typedef struct
180 char *data;
181 UINT cur;
182 unsigned int allocated;
183 unsigned int written;
184 BOOL prev_cr;
185 } encoded_buffer;
187 typedef struct input_buffer input_buffer;
189 typedef struct
191 IXmlReaderInput IXmlReaderInput_iface;
192 LONG ref;
193 /* reference passed on IXmlReaderInput creation, is kept when input is created */
194 IUnknown *input;
195 IMalloc *imalloc;
196 xml_encoding encoding;
197 BOOL hint;
198 WCHAR *baseuri;
199 /* stream reference set after SetInput() call from reader,
200 stored as sequential stream, cause currently
201 optimizations possible with IStream aren't implemented */
202 ISequentialStream *stream;
203 input_buffer *buffer;
204 unsigned int pending : 1;
205 } xmlreaderinput;
207 static const struct IUnknownVtbl xmlreaderinputvtbl;
209 /* Structure to hold parsed string of specific length.
211 Reader stores node value as 'start' pointer, on request
212 a null-terminated version of it is allocated.
214 To init a strval variable use reader_init_strval(),
215 to set strval as a reader value use reader_set_strval().
217 typedef struct
219 WCHAR *str; /* allocated null-terminated string */
220 UINT len; /* length in WCHARs, altered after ReadValueChunk */
221 UINT start; /* input position where value starts */
222 } strval;
224 static WCHAR emptyW[] = {0};
225 static WCHAR xmlW[] = {'x','m','l',0};
226 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
227 static const strval strval_empty = { emptyW };
228 static const strval strval_xml = { xmlW, 3 };
229 static const strval strval_xmlns = { xmlnsW, 5 };
231 struct reader_position
233 UINT line_number;
234 UINT line_position;
237 enum attribute_flags
239 ATTRIBUTE_NS_DEFINITION = 0x1,
240 ATTRIBUTE_DEFAULT_NS_DEFINITION = 0x2,
243 struct attribute
245 struct list entry;
246 strval prefix;
247 strval localname;
248 strval qname;
249 strval value;
250 struct reader_position position;
251 unsigned int flags;
254 struct element
256 struct list entry;
257 strval prefix;
258 strval localname;
259 strval qname;
260 struct reader_position position;
263 struct ns
265 struct list entry;
266 strval prefix;
267 strval uri;
268 struct element *element;
271 typedef struct
273 IXmlReader IXmlReader_iface;
274 LONG ref;
275 xmlreaderinput *input;
276 IMalloc *imalloc;
277 XmlReadState state;
278 HRESULT error; /* error set on XmlReadState_Error */
279 XmlReaderInternalState instate;
280 XmlReaderResumeState resumestate;
281 XmlNodeType nodetype;
282 DtdProcessing dtdmode;
283 IXmlResolver *resolver;
284 IUnknown *mlang;
285 struct reader_position position;
286 struct list attrs; /* attributes list for current node */
287 struct attribute *attr; /* current attribute */
288 UINT attr_count;
289 struct list nsdef;
290 struct list ns;
291 struct list elements;
292 int chunk_read_off;
293 strval strvalues[StringValue_Last];
294 UINT depth;
295 UINT max_depth;
296 BOOL is_empty_element;
297 struct element empty_element; /* used for empty elements without end tag <a />,
298 and to keep <?xml reader position */
299 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
300 } xmlreader;
302 struct input_buffer
304 encoded_buffer utf16;
305 encoded_buffer encoded;
306 UINT code_page;
307 xmlreaderinput *input;
310 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
312 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
315 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
317 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
320 /* reader memory allocation functions */
321 static inline void *reader_alloc(xmlreader *reader, size_t len)
323 return m_alloc(reader->imalloc, len);
326 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
328 void *ret = reader_alloc(reader, len);
329 if (ret)
330 memset(ret, 0, len);
331 return ret;
334 static inline void reader_free(xmlreader *reader, void *mem)
336 m_free(reader->imalloc, mem);
339 /* Just return pointer from offset, no attempt to read more. */
340 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
342 encoded_buffer *buffer = &reader->input->buffer->utf16;
343 return (WCHAR*)buffer->data + offset;
346 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
348 return v->str ? v->str : reader_get_ptr2(reader, v->start);
351 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
353 *dest = *src;
355 if (src->str != strval_empty.str)
357 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
358 if (!dest->str) return E_OUTOFMEMORY;
359 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
360 dest->str[dest->len] = 0;
361 dest->start = 0;
364 return S_OK;
367 /* reader input memory allocation functions */
368 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
370 return m_alloc(input->imalloc, len);
373 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
375 return m_realloc(input->imalloc, mem, len);
378 static inline void readerinput_free(xmlreaderinput *input, void *mem)
380 m_free(input->imalloc, mem);
383 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
385 LPWSTR ret = NULL;
387 if(str) {
388 DWORD size;
390 size = (strlenW(str)+1)*sizeof(WCHAR);
391 ret = readerinput_alloc(input, size);
392 if (ret) memcpy(ret, str, size);
395 return ret;
398 /* This one frees stored string value if needed */
399 static void reader_free_strvalued(xmlreader *reader, strval *v)
401 if (v->str != strval_empty.str)
403 reader_free(reader, v->str);
404 *v = strval_empty;
408 static void reader_clear_attrs(xmlreader *reader)
410 struct attribute *attr, *attr2;
411 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
413 reader_free_strvalued(reader, &attr->localname);
414 reader_free_strvalued(reader, &attr->value);
415 reader_free(reader, attr);
417 list_init(&reader->attrs);
418 reader->attr_count = 0;
419 reader->attr = NULL;
422 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
423 while we are on a node with attributes */
424 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname,
425 strval *value, const struct reader_position *position, unsigned int flags)
427 struct attribute *attr;
428 HRESULT hr;
430 attr = reader_alloc(reader, sizeof(*attr));
431 if (!attr) return E_OUTOFMEMORY;
433 hr = reader_strvaldup(reader, localname, &attr->localname);
434 if (hr == S_OK)
436 hr = reader_strvaldup(reader, value, &attr->value);
437 if (hr != S_OK)
438 reader_free_strvalued(reader, &attr->value);
440 if (hr != S_OK)
442 reader_free(reader, attr);
443 return hr;
446 if (prefix)
447 attr->prefix = *prefix;
448 else
449 memset(&attr->prefix, 0, sizeof(attr->prefix));
450 attr->qname = qname ? *qname : *localname;
451 attr->position = *position;
452 attr->flags = flags;
453 list_add_tail(&reader->attrs, &attr->entry);
454 reader->attr_count++;
456 return S_OK;
459 /* Returns current element, doesn't check if reader is actually positioned on it. */
460 static struct element *reader_get_element(xmlreader *reader)
462 if (reader->is_empty_element)
463 return &reader->empty_element;
465 return LIST_ENTRY(list_head(&reader->elements), struct element, entry);
468 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
470 v->start = start;
471 v->len = len;
472 v->str = NULL;
475 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
477 return debugstr_wn(reader_get_strptr(reader, v), v->len);
480 /* used to initialize from constant string */
481 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
483 v->start = 0;
484 v->len = len;
485 v->str = str;
488 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
490 reader_free_strvalued(reader, &reader->strvalues[type]);
493 static void reader_free_strvalues(xmlreader *reader)
495 int type;
496 for (type = 0; type < StringValue_Last; type++)
497 reader_free_strvalue(reader, type);
500 /* This helper should only be used to test if strings are the same,
501 it doesn't try to sort. */
502 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
504 if (str1->len != str2->len) return 0;
505 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
508 static void reader_clear_elements(xmlreader *reader)
510 struct element *elem, *elem2;
511 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
513 reader_free_strvalued(reader, &elem->prefix);
514 reader_free_strvalued(reader, &elem->localname);
515 reader_free_strvalued(reader, &elem->qname);
516 reader_free(reader, elem);
518 list_init(&reader->elements);
519 reader_free_strvalued(reader, &reader->empty_element.localname);
520 reader_free_strvalued(reader, &reader->empty_element.qname);
521 reader->is_empty_element = FALSE;
524 static HRESULT reader_inc_depth(xmlreader *reader)
526 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
529 static void reader_dec_depth(xmlreader *reader)
531 if (reader->depth)
532 reader->depth--;
535 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
537 struct ns *ns;
538 HRESULT hr;
540 ns = reader_alloc(reader, sizeof(*ns));
541 if (!ns) return E_OUTOFMEMORY;
543 if (def)
544 memset(&ns->prefix, 0, sizeof(ns->prefix));
545 else {
546 hr = reader_strvaldup(reader, prefix, &ns->prefix);
547 if (FAILED(hr)) {
548 reader_free(reader, ns);
549 return hr;
553 hr = reader_strvaldup(reader, uri, &ns->uri);
554 if (FAILED(hr)) {
555 reader_free_strvalued(reader, &ns->prefix);
556 reader_free(reader, ns);
557 return hr;
560 ns->element = NULL;
561 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
562 return hr;
565 static void reader_free_element(xmlreader *reader, struct element *element)
567 reader_free_strvalued(reader, &element->prefix);
568 reader_free_strvalued(reader, &element->localname);
569 reader_free_strvalued(reader, &element->qname);
570 reader_free(reader, element);
573 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
575 struct ns *ns;
577 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
578 if (ns->element)
579 break;
580 ns->element = element;
583 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
584 if (ns->element)
585 break;
586 ns->element = element;
590 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
591 strval *qname, const struct reader_position *position)
593 struct element *element;
594 HRESULT hr;
596 element = reader_alloc_zero(reader, sizeof(*element));
597 if (!element)
598 return E_OUTOFMEMORY;
600 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK &&
601 (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK &&
602 (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK)
604 list_add_head(&reader->elements, &element->entry);
605 reader_mark_ns_nodes(reader, element);
606 reader->is_empty_element = FALSE;
607 element->position = *position;
609 else
610 reader_free_element(reader, element);
612 return hr;
615 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
617 struct ns *ns, *ns2;
619 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
620 if (ns->element != element)
621 break;
623 list_remove(&ns->entry);
624 reader_free_strvalued(reader, &ns->prefix);
625 reader_free_strvalued(reader, &ns->uri);
626 reader_free(reader, ns);
629 if (!list_empty(&reader->nsdef)) {
630 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
631 if (ns->element == element) {
632 list_remove(&ns->entry);
633 reader_free_strvalued(reader, &ns->prefix);
634 reader_free_strvalued(reader, &ns->uri);
635 reader_free(reader, ns);
640 static void reader_pop_element(xmlreader *reader)
642 struct element *element;
644 if (list_empty(&reader->elements))
645 return;
647 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
648 list_remove(&element->entry);
650 reader_pop_ns_nodes(reader, element);
651 reader_free_element(reader, element);
653 /* It was a root element, the rest is expected as Misc */
654 if (list_empty(&reader->elements))
655 reader->instate = XmlReadInState_MiscEnd;
658 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
659 means node value is to be determined. */
660 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
662 strval *v = &reader->strvalues[type];
664 reader_free_strvalue(reader, type);
665 if (!value)
667 v->str = NULL;
668 v->start = 0;
669 v->len = 0;
670 return;
673 if (value->str == strval_empty.str)
674 *v = *value;
675 else
677 if (type == StringValue_Value)
679 /* defer allocation for value string */
680 v->str = NULL;
681 v->start = value->start;
682 v->len = value->len;
684 else
686 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
687 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
688 v->str[value->len] = 0;
689 v->len = value->len;
694 static inline int is_reader_pending(xmlreader *reader)
696 return reader->input->pending;
699 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
701 const int initial_len = 0x2000;
702 buffer->data = readerinput_alloc(input, initial_len);
703 if (!buffer->data) return E_OUTOFMEMORY;
705 memset(buffer->data, 0, 4);
706 buffer->cur = 0;
707 buffer->allocated = initial_len;
708 buffer->written = 0;
709 buffer->prev_cr = FALSE;
711 return S_OK;
714 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
716 readerinput_free(input, buffer->data);
719 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
721 if (encoding == XmlEncoding_Unknown)
723 FIXME("unsupported encoding %d\n", encoding);
724 return E_NOTIMPL;
727 *cp = xml_encoding_map[encoding].cp;
729 return S_OK;
732 xml_encoding parse_encoding_name(const WCHAR *name, int len)
734 int min, max, n, c;
736 if (!name) return XmlEncoding_Unknown;
738 min = 0;
739 max = sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]) - 1;
741 while (min <= max)
743 n = (min+max)/2;
745 if (len != -1)
746 c = strncmpiW(xml_encoding_map[n].name, name, len);
747 else
748 c = strcmpiW(xml_encoding_map[n].name, name);
749 if (!c)
750 return xml_encoding_map[n].enc;
752 if (c > 0)
753 max = n-1;
754 else
755 min = n+1;
758 return XmlEncoding_Unknown;
761 static HRESULT alloc_input_buffer(xmlreaderinput *input)
763 input_buffer *buffer;
764 HRESULT hr;
766 input->buffer = NULL;
768 buffer = readerinput_alloc(input, sizeof(*buffer));
769 if (!buffer) return E_OUTOFMEMORY;
771 buffer->input = input;
772 buffer->code_page = ~0; /* code page is unknown at this point */
773 hr = init_encoded_buffer(input, &buffer->utf16);
774 if (hr != S_OK) {
775 readerinput_free(input, buffer);
776 return hr;
779 hr = init_encoded_buffer(input, &buffer->encoded);
780 if (hr != S_OK) {
781 free_encoded_buffer(input, &buffer->utf16);
782 readerinput_free(input, buffer);
783 return hr;
786 input->buffer = buffer;
787 return S_OK;
790 static void free_input_buffer(input_buffer *buffer)
792 free_encoded_buffer(buffer->input, &buffer->encoded);
793 free_encoded_buffer(buffer->input, &buffer->utf16);
794 readerinput_free(buffer->input, buffer);
797 static void readerinput_release_stream(xmlreaderinput *readerinput)
799 if (readerinput->stream) {
800 ISequentialStream_Release(readerinput->stream);
801 readerinput->stream = NULL;
805 /* Queries already stored interface for IStream/ISequentialStream.
806 Interface supplied on creation will be overwritten */
807 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
809 HRESULT hr;
811 readerinput_release_stream(readerinput);
812 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
813 if (hr != S_OK)
814 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
816 return hr;
819 /* reads a chunk to raw buffer */
820 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
822 encoded_buffer *buffer = &readerinput->buffer->encoded;
823 /* to make sure aligned length won't exceed allocated length */
824 ULONG len = buffer->allocated - buffer->written - 4;
825 ULONG read;
826 HRESULT hr;
828 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
829 variable width encodings like UTF-8 */
830 len = (len + 3) & ~3;
831 /* try to use allocated space or grow */
832 if (buffer->allocated - buffer->written < len)
834 buffer->allocated *= 2;
835 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
836 len = buffer->allocated - buffer->written;
839 read = 0;
840 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
841 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
842 readerinput->pending = hr == E_PENDING;
843 if (FAILED(hr)) return hr;
844 buffer->written += read;
846 return hr;
849 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
850 static void readerinput_grow(xmlreaderinput *readerinput, int length)
852 encoded_buffer *buffer = &readerinput->buffer->utf16;
854 length *= sizeof(WCHAR);
855 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
856 if (buffer->allocated < buffer->written + length + 4)
858 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
859 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
860 buffer->allocated = grown_size;
864 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
866 static const char startA[] = {'<','?'};
867 static const char commentA[] = {'<','!'};
868 encoded_buffer *buffer = &readerinput->buffer->encoded;
869 unsigned char *ptr = (unsigned char*)buffer->data;
871 return !memcmp(buffer->data, startA, sizeof(startA)) ||
872 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
873 /* test start byte */
874 (ptr[0] == '<' &&
876 (ptr[1] && (ptr[1] <= 0x7f)) ||
877 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
878 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
879 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
883 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
885 encoded_buffer *buffer = &readerinput->buffer->encoded;
886 static const char utf8bom[] = {0xef,0xbb,0xbf};
887 static const char utf16lebom[] = {0xff,0xfe};
888 WCHAR *ptrW;
890 *enc = XmlEncoding_Unknown;
892 if (buffer->written <= 3)
894 HRESULT hr = readerinput_growraw(readerinput);
895 if (FAILED(hr)) return hr;
896 if (buffer->written < 3) return MX_E_INPUTEND;
899 ptrW = (WCHAR *)buffer->data;
900 /* try start symbols if we have enough data to do that, input buffer should contain
901 first chunk already */
902 if (readerinput_is_utf8(readerinput))
903 *enc = XmlEncoding_UTF8;
904 else if (*ptrW == '<')
906 ptrW++;
907 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
908 *enc = XmlEncoding_UTF16;
910 /* try with BOM now */
911 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
913 buffer->cur += sizeof(utf8bom);
914 *enc = XmlEncoding_UTF8;
916 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
918 buffer->cur += sizeof(utf16lebom);
919 *enc = XmlEncoding_UTF16;
922 return S_OK;
925 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
927 encoded_buffer *buffer = &readerinput->buffer->encoded;
928 int len = buffer->written;
930 /* complete single byte char */
931 if (!(buffer->data[len-1] & 0x80)) return len;
933 /* find start byte of multibyte char */
934 while (--len && !(buffer->data[len] & 0xc0))
937 return len;
940 /* Returns byte length of complete char sequence for buffer code page,
941 it's relative to current buffer position which is currently used for BOM handling
942 only. */
943 static int readerinput_get_convlen(xmlreaderinput *readerinput)
945 encoded_buffer *buffer = &readerinput->buffer->encoded;
946 int len;
948 if (readerinput->buffer->code_page == CP_UTF8)
949 len = readerinput_get_utf8_convlen(readerinput);
950 else
951 len = buffer->written;
953 TRACE("%d\n", len - buffer->cur);
954 return len - buffer->cur;
957 /* It's possible that raw buffer has some leftovers from last conversion - some char
958 sequence that doesn't represent a full code point. Length argument should be calculated with
959 readerinput_get_convlen(), if it's -1 it will be calculated here. */
960 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
962 encoded_buffer *buffer = &readerinput->buffer->encoded;
964 if (len == -1)
965 len = readerinput_get_convlen(readerinput);
967 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
968 /* everything below cur is lost too */
969 buffer->written -= len + buffer->cur;
970 /* after this point we don't need cur offset really,
971 it's used only to mark where actual data begins when first chunk is read */
972 buffer->cur = 0;
975 static void fixup_buffer_cr(encoded_buffer *buffer, int off)
977 BOOL prev_cr = buffer->prev_cr;
978 const WCHAR *src;
979 WCHAR *dest;
981 src = dest = (WCHAR*)buffer->data + off;
982 while ((const char*)src < buffer->data + buffer->written)
984 if (*src == '\r')
986 *dest++ = '\n';
987 src++;
988 prev_cr = TRUE;
989 continue;
991 if(prev_cr && *src == '\n')
992 src++;
993 else
994 *dest++ = *src++;
995 prev_cr = FALSE;
998 buffer->written = (char*)dest - buffer->data;
999 buffer->prev_cr = prev_cr;
1000 *dest = 0;
1003 /* note that raw buffer content is kept */
1004 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
1006 encoded_buffer *src = &readerinput->buffer->encoded;
1007 encoded_buffer *dest = &readerinput->buffer->utf16;
1008 int len, dest_len;
1009 HRESULT hr;
1010 WCHAR *ptr;
1011 UINT cp;
1013 hr = get_code_page(enc, &cp);
1014 if (FAILED(hr)) return;
1016 readerinput->buffer->code_page = cp;
1017 len = readerinput_get_convlen(readerinput);
1019 TRACE("switching to cp %d\n", cp);
1021 /* just copy in this case */
1022 if (enc == XmlEncoding_UTF16)
1024 readerinput_grow(readerinput, len);
1025 memcpy(dest->data, src->data + src->cur, len);
1026 dest->written += len*sizeof(WCHAR);
1028 else
1030 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1031 readerinput_grow(readerinput, dest_len);
1032 ptr = (WCHAR*)dest->data;
1033 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1034 ptr[dest_len] = 0;
1035 dest->written += dest_len*sizeof(WCHAR);
1038 fixup_buffer_cr(dest, 0);
1041 /* shrinks parsed data a buffer begins with */
1042 static void reader_shrink(xmlreader *reader)
1044 encoded_buffer *buffer = &reader->input->buffer->utf16;
1046 /* avoid to move too often using threshold shrink length */
1047 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
1049 buffer->written -= buffer->cur*sizeof(WCHAR);
1050 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
1051 buffer->cur = 0;
1052 *(WCHAR*)&buffer->data[buffer->written] = 0;
1056 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1057 It won't attempt to shrink but will grow destination buffer if needed */
1058 static HRESULT reader_more(xmlreader *reader)
1060 xmlreaderinput *readerinput = reader->input;
1061 encoded_buffer *src = &readerinput->buffer->encoded;
1062 encoded_buffer *dest = &readerinput->buffer->utf16;
1063 UINT cp = readerinput->buffer->code_page;
1064 int len, dest_len, prev_len;
1065 HRESULT hr;
1066 WCHAR *ptr;
1068 /* get some raw data from stream first */
1069 hr = readerinput_growraw(readerinput);
1070 len = readerinput_get_convlen(readerinput);
1071 prev_len = dest->written / sizeof(WCHAR);
1073 /* just copy for UTF-16 case */
1074 if (cp == ~0)
1076 readerinput_grow(readerinput, len);
1077 memcpy(dest->data + dest->written, src->data + src->cur, len);
1078 dest->written += len*sizeof(WCHAR);
1080 else
1082 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1083 readerinput_grow(readerinput, dest_len);
1084 ptr = (WCHAR*)(dest->data + dest->written);
1085 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1086 ptr[dest_len] = 0;
1087 dest->written += dest_len*sizeof(WCHAR);
1088 /* get rid of processed data */
1089 readerinput_shrinkraw(readerinput, len);
1092 fixup_buffer_cr(dest, prev_len);
1093 return hr;
1096 static inline UINT reader_get_cur(xmlreader *reader)
1098 return reader->input->buffer->utf16.cur;
1101 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1103 encoded_buffer *buffer = &reader->input->buffer->utf16;
1104 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1105 if (!*ptr) reader_more(reader);
1106 return (WCHAR*)buffer->data + buffer->cur;
1109 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1111 int i=0;
1112 const WCHAR *ptr = reader_get_ptr(reader);
1113 while (str[i])
1115 if (!ptr[i])
1117 reader_more(reader);
1118 ptr = reader_get_ptr(reader);
1120 if (str[i] != ptr[i])
1121 return ptr[i] - str[i];
1122 i++;
1124 return 0;
1127 static void reader_update_position(xmlreader *reader, WCHAR ch)
1129 if (ch == '\r')
1130 reader->position.line_position = 1;
1131 else if (ch == '\n')
1133 reader->position.line_number++;
1134 reader->position.line_position = 1;
1136 else
1137 reader->position.line_position++;
1140 /* moves cursor n WCHARs forward */
1141 static void reader_skipn(xmlreader *reader, int n)
1143 encoded_buffer *buffer = &reader->input->buffer->utf16;
1144 const WCHAR *ptr;
1146 while (*(ptr = reader_get_ptr(reader)) && n--)
1148 reader_update_position(reader, *ptr);
1149 buffer->cur++;
1153 static inline BOOL is_wchar_space(WCHAR ch)
1155 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1158 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1159 static int reader_skipspaces(xmlreader *reader)
1161 const WCHAR *ptr = reader_get_ptr(reader);
1162 UINT start = reader_get_cur(reader);
1164 while (is_wchar_space(*ptr))
1166 reader_skipn(reader, 1);
1167 ptr = reader_get_ptr(reader);
1170 return reader_get_cur(reader) - start;
1173 /* [26] VersionNum ::= '1.' [0-9]+ */
1174 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1176 static const WCHAR onedotW[] = {'1','.',0};
1177 WCHAR *ptr, *ptr2;
1178 UINT start;
1180 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1182 start = reader_get_cur(reader);
1183 /* skip "1." */
1184 reader_skipn(reader, 2);
1186 ptr2 = ptr = reader_get_ptr(reader);
1187 while (*ptr >= '0' && *ptr <= '9')
1189 reader_skipn(reader, 1);
1190 ptr = reader_get_ptr(reader);
1193 if (ptr2 == ptr) return WC_E_DIGIT;
1194 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1195 TRACE("version=%s\n", debug_strval(reader, val));
1196 return S_OK;
1199 /* [25] Eq ::= S? '=' S? */
1200 static HRESULT reader_parse_eq(xmlreader *reader)
1202 static const WCHAR eqW[] = {'=',0};
1203 reader_skipspaces(reader);
1204 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1205 /* skip '=' */
1206 reader_skipn(reader, 1);
1207 reader_skipspaces(reader);
1208 return S_OK;
1211 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1212 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1214 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1215 struct reader_position position;
1216 strval val, name;
1217 HRESULT hr;
1219 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1221 position = reader->position;
1222 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1223 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1224 /* skip 'version' */
1225 reader_skipn(reader, 7);
1227 hr = reader_parse_eq(reader);
1228 if (FAILED(hr)) return hr;
1230 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1231 return WC_E_QUOTE;
1232 /* skip "'"|'"' */
1233 reader_skipn(reader, 1);
1235 hr = reader_parse_versionnum(reader, &val);
1236 if (FAILED(hr)) return hr;
1238 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1239 return WC_E_QUOTE;
1241 /* skip "'"|'"' */
1242 reader_skipn(reader, 1);
1244 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1247 /* ([A-Za-z0-9._] | '-') */
1248 static inline BOOL is_wchar_encname(WCHAR ch)
1250 return ((ch >= 'A' && ch <= 'Z') ||
1251 (ch >= 'a' && ch <= 'z') ||
1252 (ch >= '0' && ch <= '9') ||
1253 (ch == '.') || (ch == '_') ||
1254 (ch == '-'));
1257 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1258 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1260 WCHAR *start = reader_get_ptr(reader), *ptr;
1261 xml_encoding enc;
1262 int len;
1264 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1265 return WC_E_ENCNAME;
1267 val->start = reader_get_cur(reader);
1269 ptr = start;
1270 while (is_wchar_encname(*++ptr))
1273 len = ptr - start;
1274 enc = parse_encoding_name(start, len);
1275 TRACE("encoding name %s\n", debugstr_wn(start, len));
1276 val->str = start;
1277 val->len = len;
1279 if (enc == XmlEncoding_Unknown)
1280 return WC_E_ENCNAME;
1282 /* skip encoding name */
1283 reader_skipn(reader, len);
1284 return S_OK;
1287 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1288 static HRESULT reader_parse_encdecl(xmlreader *reader)
1290 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1291 struct reader_position position;
1292 strval name, val;
1293 HRESULT hr;
1295 if (!reader_skipspaces(reader)) return S_FALSE;
1297 position = reader->position;
1298 if (reader_cmp(reader, encodingW)) return S_FALSE;
1299 name.str = reader_get_ptr(reader);
1300 name.start = reader_get_cur(reader);
1301 name.len = 8;
1302 /* skip 'encoding' */
1303 reader_skipn(reader, 8);
1305 hr = reader_parse_eq(reader);
1306 if (FAILED(hr)) return hr;
1308 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1309 return WC_E_QUOTE;
1310 /* skip "'"|'"' */
1311 reader_skipn(reader, 1);
1313 hr = reader_parse_encname(reader, &val);
1314 if (FAILED(hr)) return hr;
1316 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1317 return WC_E_QUOTE;
1319 /* skip "'"|'"' */
1320 reader_skipn(reader, 1);
1322 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1325 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1326 static HRESULT reader_parse_sddecl(xmlreader *reader)
1328 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1329 static const WCHAR yesW[] = {'y','e','s',0};
1330 static const WCHAR noW[] = {'n','o',0};
1331 struct reader_position position;
1332 strval name, val;
1333 UINT start;
1334 HRESULT hr;
1336 if (!reader_skipspaces(reader)) return S_FALSE;
1338 position = reader->position;
1339 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1340 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1341 /* skip 'standalone' */
1342 reader_skipn(reader, 10);
1344 hr = reader_parse_eq(reader);
1345 if (FAILED(hr)) return hr;
1347 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1348 return WC_E_QUOTE;
1349 /* skip "'"|'"' */
1350 reader_skipn(reader, 1);
1352 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1353 return WC_E_XMLDECL;
1355 start = reader_get_cur(reader);
1356 /* skip 'yes'|'no' */
1357 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1358 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1359 TRACE("standalone=%s\n", debug_strval(reader, &val));
1361 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1362 return WC_E_QUOTE;
1363 /* skip "'"|'"' */
1364 reader_skipn(reader, 1);
1366 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1369 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1370 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1372 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1373 static const WCHAR declcloseW[] = {'?','>',0};
1374 struct reader_position position;
1375 HRESULT hr;
1377 /* check if we have "<?xml " */
1378 if (reader_cmp(reader, xmldeclW))
1379 return S_FALSE;
1381 reader_skipn(reader, 2);
1382 position = reader->position;
1383 reader_skipn(reader, 3);
1384 hr = reader_parse_versioninfo(reader);
1385 if (FAILED(hr))
1386 return hr;
1388 hr = reader_parse_encdecl(reader);
1389 if (FAILED(hr))
1390 return hr;
1392 hr = reader_parse_sddecl(reader);
1393 if (FAILED(hr))
1394 return hr;
1396 reader_skipspaces(reader);
1397 if (reader_cmp(reader, declcloseW))
1398 return WC_E_XMLDECL;
1400 /* skip '?>' */
1401 reader_skipn(reader, 2);
1403 reader->nodetype = XmlNodeType_XmlDeclaration;
1404 reader->empty_element.position = position;
1405 reader_set_strvalue(reader, StringValue_LocalName, &strval_xml);
1406 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml);
1408 return S_OK;
1411 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1412 static HRESULT reader_parse_comment(xmlreader *reader)
1414 WCHAR *ptr;
1415 UINT start;
1417 if (reader->resumestate == XmlReadResumeState_Comment)
1419 start = reader->resume[XmlReadResume_Body];
1420 ptr = reader_get_ptr(reader);
1422 else
1424 /* skip '<!--' */
1425 reader_skipn(reader, 4);
1426 reader_shrink(reader);
1427 ptr = reader_get_ptr(reader);
1428 start = reader_get_cur(reader);
1429 reader->nodetype = XmlNodeType_Comment;
1430 reader->resume[XmlReadResume_Body] = start;
1431 reader->resumestate = XmlReadResumeState_Comment;
1432 reader_set_strvalue(reader, StringValue_Value, NULL);
1435 /* will exit when there's no more data, it won't attempt to
1436 read more from stream */
1437 while (*ptr)
1439 if (ptr[0] == '-')
1441 if (ptr[1] == '-')
1443 if (ptr[2] == '>')
1445 strval value;
1447 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1448 TRACE("%s\n", debug_strval(reader, &value));
1450 /* skip rest of markup '->' */
1451 reader_skipn(reader, 3);
1453 reader_set_strvalue(reader, StringValue_Value, &value);
1454 reader->resume[XmlReadResume_Body] = 0;
1455 reader->resumestate = XmlReadResumeState_Initial;
1456 return S_OK;
1458 else
1459 return WC_E_COMMENT;
1463 reader_skipn(reader, 1);
1464 ptr++;
1467 return S_OK;
1470 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1471 static inline BOOL is_char(WCHAR ch)
1473 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1474 (ch >= 0x20 && ch <= 0xd7ff) ||
1475 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1476 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1477 (ch >= 0xe000 && ch <= 0xfffd);
1480 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1481 static inline BOOL is_pubchar(WCHAR ch)
1483 return (ch == ' ') ||
1484 (ch >= 'a' && ch <= 'z') ||
1485 (ch >= 'A' && ch <= 'Z') ||
1486 (ch >= '0' && ch <= '9') ||
1487 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1488 (ch == '=') || (ch == '?') ||
1489 (ch == '@') || (ch == '!') ||
1490 (ch >= '#' && ch <= '%') || /* #$% */
1491 (ch == '_') || (ch == '\r') || (ch == '\n');
1494 static inline BOOL is_namestartchar(WCHAR ch)
1496 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1497 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1498 (ch >= 0xc0 && ch <= 0xd6) ||
1499 (ch >= 0xd8 && ch <= 0xf6) ||
1500 (ch >= 0xf8 && ch <= 0x2ff) ||
1501 (ch >= 0x370 && ch <= 0x37d) ||
1502 (ch >= 0x37f && ch <= 0x1fff) ||
1503 (ch >= 0x200c && ch <= 0x200d) ||
1504 (ch >= 0x2070 && ch <= 0x218f) ||
1505 (ch >= 0x2c00 && ch <= 0x2fef) ||
1506 (ch >= 0x3001 && ch <= 0xd7ff) ||
1507 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1508 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1509 (ch >= 0xf900 && ch <= 0xfdcf) ||
1510 (ch >= 0xfdf0 && ch <= 0xfffd);
1513 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1514 static inline BOOL is_ncnamechar(WCHAR ch)
1516 return (ch >= 'A' && ch <= 'Z') ||
1517 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1518 (ch == '-') || (ch == '.') ||
1519 (ch >= '0' && ch <= '9') ||
1520 (ch == 0xb7) ||
1521 (ch >= 0xc0 && ch <= 0xd6) ||
1522 (ch >= 0xd8 && ch <= 0xf6) ||
1523 (ch >= 0xf8 && ch <= 0x2ff) ||
1524 (ch >= 0x300 && ch <= 0x36f) ||
1525 (ch >= 0x370 && ch <= 0x37d) ||
1526 (ch >= 0x37f && ch <= 0x1fff) ||
1527 (ch >= 0x200c && ch <= 0x200d) ||
1528 (ch >= 0x203f && ch <= 0x2040) ||
1529 (ch >= 0x2070 && ch <= 0x218f) ||
1530 (ch >= 0x2c00 && ch <= 0x2fef) ||
1531 (ch >= 0x3001 && ch <= 0xd7ff) ||
1532 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1533 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1534 (ch >= 0xf900 && ch <= 0xfdcf) ||
1535 (ch >= 0xfdf0 && ch <= 0xfffd);
1538 static inline BOOL is_namechar(WCHAR ch)
1540 return (ch == ':') || is_ncnamechar(ch);
1543 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1545 /* When we're on attribute always return attribute type, container node type is kept.
1546 Note that container is not necessarily an element, and attribute doesn't mean it's
1547 an attribute in XML spec terms. */
1548 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1551 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1552 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1553 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1554 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1555 [5] Name ::= NameStartChar (NameChar)* */
1556 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1558 WCHAR *ptr;
1559 UINT start;
1561 if (reader->resume[XmlReadResume_Name])
1563 start = reader->resume[XmlReadResume_Name];
1564 ptr = reader_get_ptr(reader);
1566 else
1568 ptr = reader_get_ptr(reader);
1569 start = reader_get_cur(reader);
1570 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1573 while (is_namechar(*ptr))
1575 reader_skipn(reader, 1);
1576 ptr = reader_get_ptr(reader);
1579 if (is_reader_pending(reader))
1581 reader->resume[XmlReadResume_Name] = start;
1582 return E_PENDING;
1584 else
1585 reader->resume[XmlReadResume_Name] = 0;
1587 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1588 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1590 return S_OK;
1593 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1594 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1596 static const WCHAR xmlW[] = {'x','m','l'};
1597 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1598 strval name;
1599 WCHAR *ptr;
1600 HRESULT hr;
1601 UINT i;
1603 hr = reader_parse_name(reader, &name);
1604 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1606 /* now that we got name check for illegal content */
1607 if (strval_eq(reader, &name, &xmlval))
1608 return WC_E_LEADINGXML;
1610 /* PITarget can't be a qualified name */
1611 ptr = reader_get_strptr(reader, &name);
1612 for (i = 0; i < name.len; i++)
1613 if (ptr[i] == ':')
1614 return i ? NC_E_NAMECOLON : WC_E_PI;
1616 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1617 *target = name;
1618 return S_OK;
1621 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1622 static HRESULT reader_parse_pi(xmlreader *reader)
1624 strval target;
1625 WCHAR *ptr;
1626 UINT start;
1627 HRESULT hr;
1629 switch (reader->resumestate)
1631 case XmlReadResumeState_Initial:
1632 /* skip '<?' */
1633 reader_skipn(reader, 2);
1634 reader_shrink(reader);
1635 reader->resumestate = XmlReadResumeState_PITarget;
1636 case XmlReadResumeState_PITarget:
1637 hr = reader_parse_pitarget(reader, &target);
1638 if (FAILED(hr)) return hr;
1639 reader_set_strvalue(reader, StringValue_LocalName, &target);
1640 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1641 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1642 reader->resumestate = XmlReadResumeState_PIBody;
1643 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1644 default:
1648 start = reader->resume[XmlReadResume_Body];
1649 ptr = reader_get_ptr(reader);
1650 while (*ptr)
1652 if (ptr[0] == '?')
1654 if (ptr[1] == '>')
1656 UINT cur = reader_get_cur(reader);
1657 strval value;
1659 /* strip all leading whitespace chars */
1660 while (start < cur)
1662 ptr = reader_get_ptr2(reader, start);
1663 if (!is_wchar_space(*ptr)) break;
1664 start++;
1667 reader_init_strvalue(start, cur-start, &value);
1669 /* skip '?>' */
1670 reader_skipn(reader, 2);
1671 TRACE("%s\n", debug_strval(reader, &value));
1672 reader->nodetype = XmlNodeType_ProcessingInstruction;
1673 reader->resumestate = XmlReadResumeState_Initial;
1674 reader->resume[XmlReadResume_Body] = 0;
1675 reader_set_strvalue(reader, StringValue_Value, &value);
1676 return S_OK;
1680 reader_skipn(reader, 1);
1681 ptr = reader_get_ptr(reader);
1684 return S_OK;
1687 /* This one is used to parse significant whitespace nodes, like in Misc production */
1688 static HRESULT reader_parse_whitespace(xmlreader *reader)
1690 switch (reader->resumestate)
1692 case XmlReadResumeState_Initial:
1693 reader_shrink(reader);
1694 reader->resumestate = XmlReadResumeState_Whitespace;
1695 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1696 reader->nodetype = XmlNodeType_Whitespace;
1697 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1698 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1699 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1700 /* fallthrough */
1701 case XmlReadResumeState_Whitespace:
1703 strval value;
1704 UINT start;
1706 reader_skipspaces(reader);
1707 if (is_reader_pending(reader)) return S_OK;
1709 start = reader->resume[XmlReadResume_Body];
1710 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1711 reader_set_strvalue(reader, StringValue_Value, &value);
1712 TRACE("%s\n", debug_strval(reader, &value));
1713 reader->resumestate = XmlReadResumeState_Initial;
1715 default:
1719 return S_OK;
1722 /* [27] Misc ::= Comment | PI | S */
1723 static HRESULT reader_parse_misc(xmlreader *reader)
1725 HRESULT hr = S_FALSE;
1727 if (reader->resumestate != XmlReadResumeState_Initial)
1729 hr = reader_more(reader);
1730 if (FAILED(hr)) return hr;
1732 /* finish current node */
1733 switch (reader->resumestate)
1735 case XmlReadResumeState_PITarget:
1736 case XmlReadResumeState_PIBody:
1737 return reader_parse_pi(reader);
1738 case XmlReadResumeState_Comment:
1739 return reader_parse_comment(reader);
1740 case XmlReadResumeState_Whitespace:
1741 return reader_parse_whitespace(reader);
1742 default:
1743 ERR("unknown resume state %d\n", reader->resumestate);
1747 while (1)
1749 const WCHAR *cur = reader_get_ptr(reader);
1751 if (is_wchar_space(*cur))
1752 hr = reader_parse_whitespace(reader);
1753 else if (!reader_cmp(reader, commentW))
1754 hr = reader_parse_comment(reader);
1755 else if (!reader_cmp(reader, piW))
1756 hr = reader_parse_pi(reader);
1757 else
1758 break;
1760 if (hr != S_FALSE) return hr;
1763 return hr;
1766 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1767 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1769 WCHAR *cur = reader_get_ptr(reader), quote;
1770 UINT start;
1772 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1774 quote = *cur;
1775 reader_skipn(reader, 1);
1777 cur = reader_get_ptr(reader);
1778 start = reader_get_cur(reader);
1779 while (is_char(*cur) && *cur != quote)
1781 reader_skipn(reader, 1);
1782 cur = reader_get_ptr(reader);
1784 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1785 if (*cur == quote) reader_skipn(reader, 1);
1787 TRACE("%s\n", debug_strval(reader, literal));
1788 return S_OK;
1791 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1792 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1793 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1795 WCHAR *cur = reader_get_ptr(reader), quote;
1796 UINT start;
1798 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1800 quote = *cur;
1801 reader_skipn(reader, 1);
1803 start = reader_get_cur(reader);
1804 cur = reader_get_ptr(reader);
1805 while (is_pubchar(*cur) && *cur != quote)
1807 reader_skipn(reader, 1);
1808 cur = reader_get_ptr(reader);
1810 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1811 if (*cur == quote) reader_skipn(reader, 1);
1813 TRACE("%s\n", debug_strval(reader, literal));
1814 return S_OK;
1817 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1818 static HRESULT reader_parse_externalid(xmlreader *reader)
1820 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1821 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1822 struct reader_position position = reader->position;
1823 strval name, sys;
1824 HRESULT hr;
1825 int cnt;
1827 if (!reader_cmp(reader, publicW)) {
1828 strval pub;
1830 /* public id */
1831 reader_skipn(reader, 6);
1832 cnt = reader_skipspaces(reader);
1833 if (!cnt) return WC_E_WHITESPACE;
1835 hr = reader_parse_pub_literal(reader, &pub);
1836 if (FAILED(hr)) return hr;
1838 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1839 hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0);
1840 if (FAILED(hr)) return hr;
1842 cnt = reader_skipspaces(reader);
1843 if (!cnt) return S_OK;
1845 /* optional system id */
1846 hr = reader_parse_sys_literal(reader, &sys);
1847 if (FAILED(hr)) return S_OK;
1849 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1850 hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1851 if (FAILED(hr)) return hr;
1853 return S_OK;
1854 } else if (!reader_cmp(reader, systemW)) {
1855 /* system id */
1856 reader_skipn(reader, 6);
1857 cnt = reader_skipspaces(reader);
1858 if (!cnt) return WC_E_WHITESPACE;
1860 hr = reader_parse_sys_literal(reader, &sys);
1861 if (FAILED(hr)) return hr;
1863 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1864 return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1867 return S_FALSE;
1870 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1871 static HRESULT reader_parse_dtd(xmlreader *reader)
1873 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1874 strval name;
1875 WCHAR *cur;
1876 HRESULT hr;
1878 /* check if we have "<!DOCTYPE" */
1879 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1880 reader_shrink(reader);
1882 /* DTD processing is not allowed by default */
1883 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1885 reader_skipn(reader, 9);
1886 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1888 /* name */
1889 hr = reader_parse_name(reader, &name);
1890 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1892 reader_skipspaces(reader);
1894 hr = reader_parse_externalid(reader);
1895 if (FAILED(hr)) return hr;
1897 reader_skipspaces(reader);
1899 cur = reader_get_ptr(reader);
1900 if (*cur != '>')
1902 FIXME("internal subset parsing not implemented\n");
1903 return E_NOTIMPL;
1906 /* skip '>' */
1907 reader_skipn(reader, 1);
1909 reader->nodetype = XmlNodeType_DocumentType;
1910 reader_set_strvalue(reader, StringValue_LocalName, &name);
1911 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1913 return S_OK;
1916 /* [11 NS] LocalPart ::= NCName */
1917 static HRESULT reader_parse_local(xmlreader *reader, strval *local, BOOL check_for_separator)
1919 WCHAR *ptr;
1920 UINT start;
1922 if (reader->resume[XmlReadResume_Local])
1924 start = reader->resume[XmlReadResume_Local];
1925 ptr = reader_get_ptr(reader);
1927 else
1929 ptr = reader_get_ptr(reader);
1930 start = reader_get_cur(reader);
1933 while (is_ncnamechar(*ptr))
1935 reader_skipn(reader, 1);
1936 ptr = reader_get_ptr(reader);
1939 if (check_for_separator && *ptr == ':')
1940 return NC_E_QNAMECOLON;
1942 if (is_reader_pending(reader))
1944 reader->resume[XmlReadResume_Local] = start;
1945 return E_PENDING;
1947 else
1948 reader->resume[XmlReadResume_Local] = 0;
1950 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1952 return S_OK;
1955 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1956 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1957 [9 NS] UnprefixedName ::= LocalPart
1958 [10 NS] Prefix ::= NCName */
1959 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1961 WCHAR *ptr;
1962 UINT start;
1963 HRESULT hr;
1965 if (reader->resume[XmlReadResume_Name])
1967 start = reader->resume[XmlReadResume_Name];
1968 ptr = reader_get_ptr(reader);
1970 else
1972 ptr = reader_get_ptr(reader);
1973 start = reader_get_cur(reader);
1974 reader->resume[XmlReadResume_Name] = start;
1975 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1978 if (reader->resume[XmlReadResume_Local])
1980 hr = reader_parse_local(reader, local, FALSE);
1981 if (FAILED(hr)) return hr;
1983 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1984 local->start - reader->resume[XmlReadResume_Name] - 1,
1985 prefix);
1987 else
1989 /* skip prefix part */
1990 while (is_ncnamechar(*ptr))
1992 reader_skipn(reader, 1);
1993 ptr = reader_get_ptr(reader);
1996 if (is_reader_pending(reader)) return E_PENDING;
1998 /* got a qualified name */
1999 if (*ptr == ':')
2001 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
2003 /* skip ':' */
2004 reader_skipn(reader, 1);
2005 hr = reader_parse_local(reader, local, TRUE);
2006 if (FAILED(hr)) return hr;
2008 else
2010 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
2011 reader_init_strvalue(0, 0, prefix);
2015 if (prefix->len)
2016 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
2017 else
2018 TRACE("ncname %s\n", debug_strval(reader, local));
2020 reader_init_strvalue(prefix->len ? prefix->start : local->start,
2021 /* count ':' too */
2022 (prefix->len ? prefix->len + 1 : 0) + local->len,
2023 qname);
2025 reader->resume[XmlReadResume_Name] = 0;
2026 reader->resume[XmlReadResume_Local] = 0;
2028 return S_OK;
2031 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
2033 static const WCHAR entltW[] = {'l','t'};
2034 static const WCHAR entgtW[] = {'g','t'};
2035 static const WCHAR entampW[] = {'a','m','p'};
2036 static const WCHAR entaposW[] = {'a','p','o','s'};
2037 static const WCHAR entquotW[] = {'q','u','o','t'};
2038 static const strval lt = { (WCHAR*)entltW, 2 };
2039 static const strval gt = { (WCHAR*)entgtW, 2 };
2040 static const strval amp = { (WCHAR*)entampW, 3 };
2041 static const strval apos = { (WCHAR*)entaposW, 4 };
2042 static const strval quot = { (WCHAR*)entquotW, 4 };
2043 WCHAR *str = reader_get_strptr(reader, name);
2045 switch (*str)
2047 case 'l':
2048 if (strval_eq(reader, name, &lt)) return '<';
2049 break;
2050 case 'g':
2051 if (strval_eq(reader, name, &gt)) return '>';
2052 break;
2053 case 'a':
2054 if (strval_eq(reader, name, &amp))
2055 return '&';
2056 else if (strval_eq(reader, name, &apos))
2057 return '\'';
2058 break;
2059 case 'q':
2060 if (strval_eq(reader, name, &quot)) return '\"';
2061 break;
2062 default:
2066 return 0;
2069 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2070 [67] Reference ::= EntityRef | CharRef
2071 [68] EntityRef ::= '&' Name ';' */
2072 static HRESULT reader_parse_reference(xmlreader *reader)
2074 encoded_buffer *buffer = &reader->input->buffer->utf16;
2075 WCHAR *start = reader_get_ptr(reader), *ptr;
2076 UINT cur = reader_get_cur(reader);
2077 WCHAR ch = 0;
2078 int len;
2080 /* skip '&' */
2081 reader_skipn(reader, 1);
2082 ptr = reader_get_ptr(reader);
2084 if (*ptr == '#')
2086 reader_skipn(reader, 1);
2087 ptr = reader_get_ptr(reader);
2089 /* hex char or decimal */
2090 if (*ptr == 'x')
2092 reader_skipn(reader, 1);
2093 ptr = reader_get_ptr(reader);
2095 while (*ptr != ';')
2097 if ((*ptr >= '0' && *ptr <= '9'))
2098 ch = ch*16 + *ptr - '0';
2099 else if ((*ptr >= 'a' && *ptr <= 'f'))
2100 ch = ch*16 + *ptr - 'a' + 10;
2101 else if ((*ptr >= 'A' && *ptr <= 'F'))
2102 ch = ch*16 + *ptr - 'A' + 10;
2103 else
2104 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2105 reader_skipn(reader, 1);
2106 ptr = reader_get_ptr(reader);
2109 else
2111 while (*ptr != ';')
2113 if ((*ptr >= '0' && *ptr <= '9'))
2115 ch = ch*10 + *ptr - '0';
2116 reader_skipn(reader, 1);
2117 ptr = reader_get_ptr(reader);
2119 else
2120 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2124 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2126 /* normalize */
2127 if (is_wchar_space(ch)) ch = ' ';
2129 ptr = reader_get_ptr(reader);
2130 start = reader_get_ptr2(reader, cur);
2131 len = buffer->written - ((char *)ptr - buffer->data);
2132 memmove(start + 1, ptr + 1, len);
2134 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2135 buffer->cur = cur + 1;
2137 *start = ch;
2139 else
2141 strval name;
2142 HRESULT hr;
2144 hr = reader_parse_name(reader, &name);
2145 if (FAILED(hr)) return hr;
2147 ptr = reader_get_ptr(reader);
2148 if (*ptr != ';') return WC_E_SEMICOLON;
2150 /* predefined entities resolve to a single character */
2151 ch = get_predefined_entity(reader, &name);
2152 if (ch)
2154 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2155 memmove(start+1, ptr+1, len);
2156 buffer->cur = cur + 1;
2157 buffer->written -= (ptr - start) * sizeof(WCHAR);
2159 *start = ch;
2161 else
2163 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2164 return WC_E_UNDECLAREDENTITY;
2169 return S_OK;
2172 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2173 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2175 WCHAR *ptr, quote;
2176 UINT start;
2178 ptr = reader_get_ptr(reader);
2180 /* skip opening quote */
2181 quote = *ptr;
2182 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2183 reader_skipn(reader, 1);
2185 ptr = reader_get_ptr(reader);
2186 start = reader_get_cur(reader);
2187 while (*ptr)
2189 if (*ptr == '<') return WC_E_LESSTHAN;
2191 if (*ptr == quote)
2193 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2194 /* skip closing quote */
2195 reader_skipn(reader, 1);
2196 return S_OK;
2199 if (*ptr == '&')
2201 HRESULT hr = reader_parse_reference(reader);
2202 if (FAILED(hr)) return hr;
2204 else
2206 /* replace all whitespace chars with ' ' */
2207 if (is_wchar_space(*ptr)) *ptr = ' ';
2208 reader_skipn(reader, 1);
2210 ptr = reader_get_ptr(reader);
2213 return WC_E_QUOTE;
2216 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2217 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2218 [3 NS] DefaultAttName ::= 'xmlns'
2219 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2220 static HRESULT reader_parse_attribute(xmlreader *reader)
2222 struct reader_position position = reader->position;
2223 strval prefix, local, qname, value;
2224 enum attribute_flags flags = 0;
2225 HRESULT hr;
2227 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2228 if (FAILED(hr)) return hr;
2230 if (strval_eq(reader, &prefix, &strval_xmlns))
2231 flags |= ATTRIBUTE_NS_DEFINITION;
2233 if (strval_eq(reader, &qname, &strval_xmlns))
2234 flags |= ATTRIBUTE_DEFAULT_NS_DEFINITION;
2236 hr = reader_parse_eq(reader);
2237 if (FAILED(hr)) return hr;
2239 hr = reader_parse_attvalue(reader, &value);
2240 if (FAILED(hr)) return hr;
2242 if (flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
2243 reader_push_ns(reader, &local, &value, !!(flags & ATTRIBUTE_DEFAULT_NS_DEFINITION));
2245 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2246 return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags);
2249 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2250 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2251 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
2253 struct reader_position position = reader->position;
2254 HRESULT hr;
2256 hr = reader_parse_qname(reader, prefix, local, qname);
2257 if (FAILED(hr)) return hr;
2259 for (;;)
2261 static const WCHAR endW[] = {'/','>',0};
2263 reader_skipspaces(reader);
2265 /* empty element */
2266 if ((reader->is_empty_element = !reader_cmp(reader, endW)))
2268 struct element *element = &reader->empty_element;
2270 /* skip '/>' */
2271 reader_skipn(reader, 2);
2273 reader_free_strvalued(reader, &element->qname);
2274 reader_free_strvalued(reader, &element->localname);
2276 element->prefix = *prefix;
2277 reader_strvaldup(reader, qname, &element->qname);
2278 reader_strvaldup(reader, local, &element->localname);
2279 element->position = position;
2280 reader_mark_ns_nodes(reader, element);
2281 return S_OK;
2284 /* got a start tag */
2285 if (!reader_cmp(reader, gtW))
2287 /* skip '>' */
2288 reader_skipn(reader, 1);
2289 return reader_push_element(reader, prefix, local, qname, &position);
2292 hr = reader_parse_attribute(reader);
2293 if (FAILED(hr)) return hr;
2296 return S_OK;
2299 /* [39] element ::= EmptyElemTag | STag content ETag */
2300 static HRESULT reader_parse_element(xmlreader *reader)
2302 HRESULT hr;
2304 switch (reader->resumestate)
2306 case XmlReadResumeState_Initial:
2307 /* check if we are really on element */
2308 if (reader_cmp(reader, ltW)) return S_FALSE;
2310 /* skip '<' */
2311 reader_skipn(reader, 1);
2313 reader_shrink(reader);
2314 reader->resumestate = XmlReadResumeState_STag;
2315 case XmlReadResumeState_STag:
2317 strval qname, prefix, local;
2319 /* this handles empty elements too */
2320 hr = reader_parse_stag(reader, &prefix, &local, &qname);
2321 if (FAILED(hr)) return hr;
2323 /* FIXME: need to check for defined namespace to reject invalid prefix */
2325 /* if we got empty element and stack is empty go straight to Misc */
2326 if (reader->is_empty_element && list_empty(&reader->elements))
2327 reader->instate = XmlReadInState_MiscEnd;
2328 else
2329 reader->instate = XmlReadInState_Content;
2331 reader->nodetype = XmlNodeType_Element;
2332 reader->resumestate = XmlReadResumeState_Initial;
2333 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2334 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2335 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
2336 break;
2338 default:
2339 hr = E_FAIL;
2342 return hr;
2345 /* [13 NS] ETag ::= '</' QName S? '>' */
2346 static HRESULT reader_parse_endtag(xmlreader *reader)
2348 struct reader_position position;
2349 strval prefix, local, qname;
2350 struct element *element;
2351 HRESULT hr;
2353 /* skip '</' */
2354 reader_skipn(reader, 2);
2356 position = reader->position;
2357 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2358 if (FAILED(hr)) return hr;
2360 reader_skipspaces(reader);
2362 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2364 /* skip '>' */
2365 reader_skipn(reader, 1);
2367 /* Element stack should never be empty at this point, cause we shouldn't get to
2368 content parsing if it's empty. */
2369 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2370 if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH;
2372 /* update position stored for start tag, we won't be using it */
2373 element->position = position;
2375 reader->nodetype = XmlNodeType_EndElement;
2376 reader->is_empty_element = FALSE;
2377 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2379 return S_OK;
2382 /* [18] CDSect ::= CDStart CData CDEnd
2383 [19] CDStart ::= '<![CDATA['
2384 [20] CData ::= (Char* - (Char* ']]>' Char*))
2385 [21] CDEnd ::= ']]>' */
2386 static HRESULT reader_parse_cdata(xmlreader *reader)
2388 WCHAR *ptr;
2389 UINT start;
2391 if (reader->resumestate == XmlReadResumeState_CDATA)
2393 start = reader->resume[XmlReadResume_Body];
2394 ptr = reader_get_ptr(reader);
2396 else
2398 /* skip markup '<![CDATA[' */
2399 reader_skipn(reader, 9);
2400 reader_shrink(reader);
2401 ptr = reader_get_ptr(reader);
2402 start = reader_get_cur(reader);
2403 reader->nodetype = XmlNodeType_CDATA;
2404 reader->resume[XmlReadResume_Body] = start;
2405 reader->resumestate = XmlReadResumeState_CDATA;
2406 reader_set_strvalue(reader, StringValue_Value, NULL);
2409 while (*ptr)
2411 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2413 strval value;
2415 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2417 /* skip ']]>' */
2418 reader_skipn(reader, 3);
2419 TRACE("%s\n", debug_strval(reader, &value));
2421 reader_set_strvalue(reader, StringValue_Value, &value);
2422 reader->resume[XmlReadResume_Body] = 0;
2423 reader->resumestate = XmlReadResumeState_Initial;
2424 return S_OK;
2426 else
2428 reader_skipn(reader, 1);
2429 ptr++;
2433 return S_OK;
2436 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2437 static HRESULT reader_parse_chardata(xmlreader *reader)
2439 struct reader_position position;
2440 WCHAR *ptr;
2441 UINT start;
2443 if (reader->resumestate == XmlReadResumeState_CharData)
2445 start = reader->resume[XmlReadResume_Body];
2446 ptr = reader_get_ptr(reader);
2448 else
2450 reader_shrink(reader);
2451 ptr = reader_get_ptr(reader);
2452 start = reader_get_cur(reader);
2453 /* There's no text */
2454 if (!*ptr || *ptr == '<') return S_OK;
2455 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2456 reader->resume[XmlReadResume_Body] = start;
2457 reader->resumestate = XmlReadResumeState_CharData;
2458 reader_set_strvalue(reader, StringValue_Value, NULL);
2461 position = reader->position;
2462 while (*ptr)
2464 static const WCHAR ampW[] = {'&',0};
2466 /* CDATA closing sequence ']]>' is not allowed */
2467 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2468 return WC_E_CDSECTEND;
2470 /* Found next markup part */
2471 if (ptr[0] == '<')
2473 strval value;
2475 reader->empty_element.position = position;
2476 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2477 reader_set_strvalue(reader, StringValue_Value, &value);
2478 reader->resume[XmlReadResume_Body] = 0;
2479 reader->resumestate = XmlReadResumeState_Initial;
2480 return S_OK;
2483 /* this covers a case when text has leading whitespace chars */
2484 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2486 if (!reader_cmp(reader, ampW))
2487 reader_parse_reference(reader);
2488 else
2489 reader_skipn(reader, 1);
2491 ptr = reader_get_ptr(reader);
2494 return S_OK;
2497 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2498 static HRESULT reader_parse_content(xmlreader *reader)
2500 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2501 static const WCHAR etagW[] = {'<','/',0};
2503 if (reader->resumestate != XmlReadResumeState_Initial)
2505 switch (reader->resumestate)
2507 case XmlReadResumeState_CDATA:
2508 return reader_parse_cdata(reader);
2509 case XmlReadResumeState_Comment:
2510 return reader_parse_comment(reader);
2511 case XmlReadResumeState_PIBody:
2512 case XmlReadResumeState_PITarget:
2513 return reader_parse_pi(reader);
2514 case XmlReadResumeState_CharData:
2515 return reader_parse_chardata(reader);
2516 default:
2517 ERR("unknown resume state %d\n", reader->resumestate);
2521 reader_shrink(reader);
2523 /* handle end tag here, it indicates end of content as well */
2524 if (!reader_cmp(reader, etagW))
2525 return reader_parse_endtag(reader);
2527 if (!reader_cmp(reader, commentW))
2528 return reader_parse_comment(reader);
2530 if (!reader_cmp(reader, piW))
2531 return reader_parse_pi(reader);
2533 if (!reader_cmp(reader, cdstartW))
2534 return reader_parse_cdata(reader);
2536 if (!reader_cmp(reader, ltW))
2537 return reader_parse_element(reader);
2539 /* what's left must be CharData */
2540 return reader_parse_chardata(reader);
2543 static HRESULT reader_parse_nextnode(xmlreader *reader)
2545 XmlNodeType nodetype = reader_get_nodetype(reader);
2546 HRESULT hr;
2548 if (!is_reader_pending(reader))
2550 reader->chunk_read_off = 0;
2551 reader_clear_attrs(reader);
2554 /* When moving from EndElement or empty element, pop its own namespace definitions */
2555 switch (nodetype)
2557 case XmlNodeType_Attribute:
2558 reader_dec_depth(reader);
2559 /* fallthrough */
2560 case XmlNodeType_Element:
2561 if (reader->is_empty_element)
2562 reader_pop_ns_nodes(reader, &reader->empty_element);
2563 else if (FAILED(hr = reader_inc_depth(reader)))
2564 return hr;
2565 break;
2566 case XmlNodeType_EndElement:
2567 reader_pop_element(reader);
2568 reader_dec_depth(reader);
2569 break;
2570 default:
2574 for (;;)
2576 switch (reader->instate)
2578 /* if it's a first call for a new input we need to detect stream encoding */
2579 case XmlReadInState_Initial:
2581 xml_encoding enc;
2583 hr = readerinput_growraw(reader->input);
2584 if (FAILED(hr)) return hr;
2586 reader->position.line_number = 1;
2587 reader->position.line_position = 1;
2589 /* try to detect encoding by BOM or data and set input code page */
2590 hr = readerinput_detectencoding(reader->input, &enc);
2591 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2592 debugstr_w(xml_encoding_map[enc].name), hr);
2593 if (FAILED(hr)) return hr;
2595 /* always switch first time cause we have to put something in */
2596 readerinput_switchencoding(reader->input, enc);
2598 /* parse xml declaration */
2599 hr = reader_parse_xmldecl(reader);
2600 if (FAILED(hr)) return hr;
2602 readerinput_shrinkraw(reader->input, -1);
2603 reader->instate = XmlReadInState_Misc_DTD;
2604 if (hr == S_OK) return hr;
2606 break;
2607 case XmlReadInState_Misc_DTD:
2608 hr = reader_parse_misc(reader);
2609 if (FAILED(hr)) return hr;
2611 if (hr == S_FALSE)
2612 reader->instate = XmlReadInState_DTD;
2613 else
2614 return hr;
2615 break;
2616 case XmlReadInState_DTD:
2617 hr = reader_parse_dtd(reader);
2618 if (FAILED(hr)) return hr;
2620 if (hr == S_OK)
2622 reader->instate = XmlReadInState_DTD_Misc;
2623 return hr;
2625 else
2626 reader->instate = XmlReadInState_Element;
2627 break;
2628 case XmlReadInState_DTD_Misc:
2629 hr = reader_parse_misc(reader);
2630 if (FAILED(hr)) return hr;
2632 if (hr == S_FALSE)
2633 reader->instate = XmlReadInState_Element;
2634 else
2635 return hr;
2636 break;
2637 case XmlReadInState_Element:
2638 return reader_parse_element(reader);
2639 case XmlReadInState_Content:
2640 return reader_parse_content(reader);
2641 case XmlReadInState_MiscEnd:
2642 hr = reader_parse_misc(reader);
2643 if (hr != S_FALSE) return hr;
2645 if (*reader_get_ptr(reader))
2647 WARN("found garbage in the end of XML\n");
2648 return WC_E_SYNTAX;
2651 reader->instate = XmlReadInState_Eof;
2652 reader->state = XmlReadState_EndOfFile;
2653 reader->nodetype = XmlNodeType_None;
2654 return hr;
2655 case XmlReadInState_Eof:
2656 return S_FALSE;
2657 default:
2658 FIXME("internal state %d not handled\n", reader->instate);
2659 return E_NOTIMPL;
2663 return E_NOTIMPL;
2666 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2668 xmlreader *This = impl_from_IXmlReader(iface);
2670 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2672 if (IsEqualGUID(riid, &IID_IUnknown) ||
2673 IsEqualGUID(riid, &IID_IXmlReader))
2675 *ppvObject = iface;
2677 else
2679 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2680 *ppvObject = NULL;
2681 return E_NOINTERFACE;
2684 IXmlReader_AddRef(iface);
2686 return S_OK;
2689 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2691 xmlreader *This = impl_from_IXmlReader(iface);
2692 ULONG ref = InterlockedIncrement(&This->ref);
2693 TRACE("(%p)->(%d)\n", This, ref);
2694 return ref;
2697 static void reader_clear_ns(xmlreader *reader)
2699 struct ns *ns, *ns2;
2701 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2702 list_remove(&ns->entry);
2703 reader_free_strvalued(reader, &ns->prefix);
2704 reader_free_strvalued(reader, &ns->uri);
2705 reader_free(reader, ns);
2708 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2709 list_remove(&ns->entry);
2710 reader_free_strvalued(reader, &ns->uri);
2711 reader_free(reader, ns);
2715 static void reader_reset_parser(xmlreader *reader)
2717 reader->position.line_number = 0;
2718 reader->position.line_position = 0;
2720 reader_clear_elements(reader);
2721 reader_clear_attrs(reader);
2722 reader_clear_ns(reader);
2723 reader_free_strvalues(reader);
2725 reader->depth = 0;
2726 reader->nodetype = XmlNodeType_None;
2727 reader->resumestate = XmlReadResumeState_Initial;
2728 memset(reader->resume, 0, sizeof(reader->resume));
2729 reader->is_empty_element = FALSE;
2732 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2734 xmlreader *This = impl_from_IXmlReader(iface);
2735 LONG ref = InterlockedDecrement(&This->ref);
2737 TRACE("(%p)->(%d)\n", This, ref);
2739 if (ref == 0)
2741 IMalloc *imalloc = This->imalloc;
2742 reader_reset_parser(This);
2743 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2744 if (This->resolver) IXmlResolver_Release(This->resolver);
2745 if (This->mlang) IUnknown_Release(This->mlang);
2746 reader_free(This, This);
2747 if (imalloc) IMalloc_Release(imalloc);
2750 return ref;
2753 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2755 xmlreader *This = impl_from_IXmlReader(iface);
2756 IXmlReaderInput *readerinput;
2757 HRESULT hr;
2759 TRACE("(%p)->(%p)\n", This, input);
2761 if (This->input)
2763 readerinput_release_stream(This->input);
2764 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2765 This->input = NULL;
2768 reader_reset_parser(This);
2770 /* just reset current input */
2771 if (!input)
2773 This->state = XmlReadState_Initial;
2774 return S_OK;
2777 /* now try IXmlReaderInput, ISequentialStream, IStream */
2778 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2779 if (hr == S_OK)
2781 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2782 This->input = impl_from_IXmlReaderInput(readerinput);
2783 else
2785 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2786 readerinput, readerinput->lpVtbl);
2787 IUnknown_Release(readerinput);
2788 return E_FAIL;
2793 if (hr != S_OK || !readerinput)
2795 /* create IXmlReaderInput basing on supplied interface */
2796 hr = CreateXmlReaderInputWithEncodingName(input,
2797 This->imalloc, NULL, FALSE, NULL, &readerinput);
2798 if (hr != S_OK) return hr;
2799 This->input = impl_from_IXmlReaderInput(readerinput);
2802 /* set stream for supplied IXmlReaderInput */
2803 hr = readerinput_query_for_stream(This->input);
2804 if (hr == S_OK)
2806 This->state = XmlReadState_Initial;
2807 This->instate = XmlReadInState_Initial;
2809 return hr;
2812 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2814 xmlreader *This = impl_from_IXmlReader(iface);
2816 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2818 if (!value) return E_INVALIDARG;
2820 switch (property)
2822 case XmlReaderProperty_MultiLanguage:
2823 *value = (LONG_PTR)This->mlang;
2824 if (This->mlang)
2825 IUnknown_AddRef(This->mlang);
2826 break;
2827 case XmlReaderProperty_XmlResolver:
2828 *value = (LONG_PTR)This->resolver;
2829 if (This->resolver)
2830 IXmlResolver_AddRef(This->resolver);
2831 break;
2832 case XmlReaderProperty_DtdProcessing:
2833 *value = This->dtdmode;
2834 break;
2835 case XmlReaderProperty_ReadState:
2836 *value = This->state;
2837 break;
2838 case XmlReaderProperty_MaxElementDepth:
2839 *value = This->max_depth;
2840 break;
2841 default:
2842 FIXME("Unimplemented property (%u)\n", property);
2843 return E_NOTIMPL;
2846 return S_OK;
2849 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2851 xmlreader *This = impl_from_IXmlReader(iface);
2853 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2855 switch (property)
2857 case XmlReaderProperty_MultiLanguage:
2858 if (This->mlang)
2859 IUnknown_Release(This->mlang);
2860 This->mlang = (IUnknown*)value;
2861 if (This->mlang)
2862 IUnknown_AddRef(This->mlang);
2863 if (This->mlang)
2864 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2865 break;
2866 case XmlReaderProperty_XmlResolver:
2867 if (This->resolver)
2868 IXmlResolver_Release(This->resolver);
2869 This->resolver = (IXmlResolver*)value;
2870 if (This->resolver)
2871 IXmlResolver_AddRef(This->resolver);
2872 break;
2873 case XmlReaderProperty_DtdProcessing:
2874 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2875 This->dtdmode = value;
2876 break;
2877 case XmlReaderProperty_MaxElementDepth:
2878 This->max_depth = value;
2879 break;
2880 default:
2881 FIXME("Unimplemented property (%u)\n", property);
2882 return E_NOTIMPL;
2885 return S_OK;
2888 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2890 xmlreader *This = impl_from_IXmlReader(iface);
2891 XmlNodeType oldtype = This->nodetype;
2892 XmlNodeType type;
2893 HRESULT hr;
2895 TRACE("(%p)->(%p)\n", This, nodetype);
2897 if (!nodetype)
2898 nodetype = &type;
2900 switch (This->state)
2902 case XmlReadState_Closed:
2903 hr = S_FALSE;
2904 break;
2905 case XmlReadState_Error:
2906 hr = This->error;
2907 break;
2908 default:
2909 hr = reader_parse_nextnode(This);
2910 if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype)
2911 This->state = XmlReadState_Interactive;
2913 if (FAILED(hr))
2915 This->state = XmlReadState_Error;
2916 This->nodetype = XmlNodeType_None;
2917 This->depth = 0;
2918 This->error = hr;
2922 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2923 *nodetype = This->nodetype;
2925 return hr;
2928 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2930 xmlreader *This = impl_from_IXmlReader(iface);
2932 TRACE("(%p)->(%p)\n", This, node_type);
2934 if (!node_type)
2935 return E_INVALIDARG;
2937 *node_type = reader_get_nodetype(This);
2938 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2941 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2943 if (!reader->attr_count)
2944 return S_FALSE;
2946 if (!reader->attr)
2947 reader_inc_depth(reader);
2949 reader->attr = LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry);
2950 reader->chunk_read_off = 0;
2951 reader_set_strvalue(reader, StringValue_Prefix, &reader->attr->prefix);
2952 reader_set_strvalue(reader, StringValue_QualifiedName, &reader->attr->qname);
2953 reader_set_strvalue(reader, StringValue_Value, &reader->attr->value);
2955 return S_OK;
2958 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2960 xmlreader *This = impl_from_IXmlReader(iface);
2962 TRACE("(%p)\n", This);
2964 return reader_move_to_first_attribute(This);
2967 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2969 xmlreader *This = impl_from_IXmlReader(iface);
2970 const struct list *next;
2972 TRACE("(%p)\n", This);
2974 if (!This->attr_count) return S_FALSE;
2976 if (!This->attr)
2977 return reader_move_to_first_attribute(This);
2979 next = list_next(&This->attrs, &This->attr->entry);
2980 if (next)
2982 This->attr = LIST_ENTRY(next, struct attribute, entry);
2983 This->chunk_read_off = 0;
2984 reader_set_strvalue(This, StringValue_Prefix, &This->attr->prefix);
2985 reader_set_strvalue(This, StringValue_QualifiedName, &This->attr->qname);
2986 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2989 return next ? S_OK : S_FALSE;
2992 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2993 LPCWSTR local_name,
2994 LPCWSTR namespaceUri)
2996 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2997 return E_NOTIMPL;
3000 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
3002 xmlreader *This = impl_from_IXmlReader(iface);
3004 TRACE("(%p)\n", This);
3006 if (!This->attr_count) return S_FALSE;
3008 if (This->attr)
3009 reader_dec_depth(This);
3011 This->attr = NULL;
3013 /* FIXME: support other node types with 'attributes' like DTD */
3014 if (This->is_empty_element) {
3015 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
3016 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
3018 else {
3019 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3020 if (element) {
3021 reader_set_strvalue(This, StringValue_Prefix, &element->prefix);
3022 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
3025 This->chunk_read_off = 0;
3026 reader_set_strvalue(This, StringValue_Value, &strval_empty);
3028 return S_OK;
3031 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3033 xmlreader *This = impl_from_IXmlReader(iface);
3034 struct attribute *attribute = This->attr;
3035 struct element *element;
3036 UINT length;
3038 TRACE("(%p)->(%p %p)\n", This, name, len);
3040 if (!len)
3041 len = &length;
3043 switch (reader_get_nodetype(This))
3045 case XmlNodeType_Text:
3046 case XmlNodeType_CDATA:
3047 case XmlNodeType_Comment:
3048 case XmlNodeType_Whitespace:
3049 *name = emptyW;
3050 *len = 0;
3051 break;
3052 case XmlNodeType_Element:
3053 case XmlNodeType_EndElement:
3054 element = reader_get_element(This);
3055 if (element->prefix.len)
3057 *name = element->qname.str;
3058 *len = element->qname.len;
3060 else
3062 *name = element->localname.str;
3063 *len = element->localname.len;
3065 break;
3066 case XmlNodeType_Attribute:
3067 if (attribute->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3069 *name = xmlnsW;
3070 *len = 5;
3071 } else if (attribute->prefix.len)
3073 *name = This->strvalues[StringValue_QualifiedName].str;
3074 *len = This->strvalues[StringValue_QualifiedName].len;
3076 else
3078 *name = attribute->localname.str;
3079 *len = attribute->localname.len;
3081 break;
3082 default:
3083 *name = This->strvalues[StringValue_QualifiedName].str;
3084 *len = This->strvalues[StringValue_QualifiedName].len;
3085 break;
3088 return S_OK;
3091 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
3093 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
3094 struct ns *ns;
3096 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
3097 if (strval_eq(reader, prefix, &ns->prefix))
3098 return ns;
3101 return NULL;
3104 static struct ns *reader_lookup_nsdef(xmlreader *reader)
3106 if (list_empty(&reader->nsdef))
3107 return NULL;
3109 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
3112 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
3114 xmlreader *This = impl_from_IXmlReader(iface);
3115 const strval *prefix = &This->strvalues[StringValue_Prefix];
3116 XmlNodeType nodetype;
3117 struct ns *ns;
3118 UINT length;
3120 TRACE("(%p %p %p)\n", iface, uri, len);
3122 if (!len)
3123 len = &length;
3125 *uri = NULL;
3126 *len = 0;
3128 switch ((nodetype = reader_get_nodetype(This)))
3130 case XmlNodeType_Attribute:
3132 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3133 '2','0','0','0','/','x','m','l','n','s','/',0};
3134 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3135 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3137 /* check for reserved prefixes first */
3138 if ((strval_eq(This, prefix, &strval_empty) && strval_eq(This, &This->attr->localname, &strval_xmlns)) ||
3139 strval_eq(This, prefix, &strval_xmlns))
3141 *uri = xmlns_uriW;
3142 *len = sizeof(xmlns_uriW)/sizeof(xmlns_uriW[0]) - 1;
3144 else if (strval_eq(This, prefix, &strval_xml)) {
3145 *uri = xml_uriW;
3146 *len = sizeof(xml_uriW)/sizeof(xml_uriW[0]) - 1;
3149 if (!*uri) {
3150 ns = reader_lookup_ns(This, prefix);
3151 if (ns) {
3152 *uri = ns->uri.str;
3153 *len = ns->uri.len;
3155 else {
3156 *uri = emptyW;
3157 *len = 0;
3161 break;
3162 case XmlNodeType_Element:
3163 case XmlNodeType_EndElement:
3165 ns = reader_lookup_ns(This, prefix);
3167 /* pick top default ns if any */
3168 if (!ns)
3169 ns = reader_lookup_nsdef(This);
3171 if (ns) {
3172 *uri = ns->uri.str;
3173 *len = ns->uri.len;
3175 else {
3176 *uri = emptyW;
3177 *len = 0;
3180 break;
3181 case XmlNodeType_Text:
3182 case XmlNodeType_CDATA:
3183 case XmlNodeType_ProcessingInstruction:
3184 case XmlNodeType_Comment:
3185 case XmlNodeType_Whitespace:
3186 case XmlNodeType_XmlDeclaration:
3187 *uri = emptyW;
3188 *len = 0;
3189 break;
3190 default:
3191 FIXME("Unhandled node type %d\n", nodetype);
3192 return E_NOTIMPL;
3195 return S_OK;
3198 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3200 xmlreader *This = impl_from_IXmlReader(iface);
3201 struct element *element;
3202 UINT length;
3204 TRACE("(%p)->(%p %p)\n", This, name, len);
3206 if (!len)
3207 len = &length;
3209 switch (reader_get_nodetype(This))
3211 case XmlNodeType_Text:
3212 case XmlNodeType_CDATA:
3213 case XmlNodeType_Comment:
3214 case XmlNodeType_Whitespace:
3215 *name = emptyW;
3216 *len = 0;
3217 break;
3218 case XmlNodeType_Element:
3219 case XmlNodeType_EndElement:
3220 element = reader_get_element(This);
3221 *name = element->localname.str;
3222 *len = element->localname.len;
3223 break;
3224 case XmlNodeType_Attribute:
3225 if (This->attr->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3227 *name = xmlnsW;
3228 *len = 5;
3230 else if (This->attr->flags & ATTRIBUTE_NS_DEFINITION)
3232 const struct ns *ns = reader_lookup_ns(This, &This->attr->localname);
3233 *name = ns->prefix.str;
3234 *len = ns->prefix.len;
3236 else
3238 *name = This->attr->localname.str;
3239 *len = This->attr->localname.len;
3241 break;
3242 default:
3243 *name = This->strvalues[StringValue_LocalName].str;
3244 *len = This->strvalues[StringValue_LocalName].len;
3245 break;
3248 return S_OK;
3251 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, const WCHAR **ret, UINT *len)
3253 xmlreader *This = impl_from_IXmlReader(iface);
3254 XmlNodeType nodetype;
3255 UINT length;
3257 TRACE("(%p)->(%p %p)\n", This, ret, len);
3259 if (!len)
3260 len = &length;
3262 *ret = emptyW;
3263 *len = 0;
3265 switch ((nodetype = reader_get_nodetype(This)))
3267 case XmlNodeType_Element:
3268 case XmlNodeType_EndElement:
3269 case XmlNodeType_Attribute:
3271 const strval *prefix = &This->strvalues[StringValue_Prefix];
3272 struct ns *ns;
3274 if (strval_eq(This, prefix, &strval_xml))
3276 *ret = xmlW;
3277 *len = 3;
3279 else if (strval_eq(This, prefix, &strval_xmlns))
3281 *ret = xmlnsW;
3282 *len = 5;
3284 else if ((ns = reader_lookup_ns(This, prefix)))
3286 *ret = ns->prefix.str;
3287 *len = ns->prefix.len;
3290 break;
3292 default:
3296 return S_OK;
3299 static const strval *reader_get_value(xmlreader *reader, BOOL ensure_allocated)
3301 strval *val;
3303 switch (reader_get_nodetype(reader))
3305 case XmlNodeType_XmlDeclaration:
3306 case XmlNodeType_EndElement:
3307 case XmlNodeType_None:
3308 return &strval_empty;
3309 case XmlNodeType_Attribute:
3310 /* For namespace definition attributes return values from namespace list */
3311 if (reader->attr->flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
3313 struct ns *ns;
3315 if (!(ns = reader_lookup_ns(reader, &reader->attr->localname)))
3316 ns = reader_lookup_nsdef(reader);
3318 return &ns->uri;
3320 return &reader->attr->value;
3321 default:
3322 break;
3325 val = &reader->strvalues[StringValue_Value];
3326 if (!val->str && ensure_allocated)
3328 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3329 if (!ptr) return NULL;
3330 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3331 ptr[val->len] = 0;
3332 val->str = ptr;
3335 return val;
3338 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3340 xmlreader *reader = impl_from_IXmlReader(iface);
3341 const strval *val = &reader->strvalues[StringValue_Value];
3342 UINT off;
3344 TRACE("(%p)->(%p %p)\n", reader, value, len);
3346 *value = NULL;
3348 if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader))
3350 XmlNodeType type;
3351 HRESULT hr;
3353 hr = IXmlReader_Read(iface, &type);
3354 if (FAILED(hr)) return hr;
3356 /* return if still pending, partially read values are not reported */
3357 if (is_reader_pending(reader)) return E_PENDING;
3360 val = reader_get_value(reader, TRUE);
3361 if (!val)
3362 return E_OUTOFMEMORY;
3364 off = abs(reader->chunk_read_off);
3365 assert(off <= val->len);
3366 *value = val->str + off;
3367 if (len) *len = val->len - off;
3368 reader->chunk_read_off = -off;
3369 return S_OK;
3372 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3374 xmlreader *reader = impl_from_IXmlReader(iface);
3375 const strval *val;
3376 UINT len = 0;
3378 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3380 val = reader_get_value(reader, FALSE);
3382 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3383 if (reader->chunk_read_off >= 0)
3385 assert(reader->chunk_read_off <= val->len);
3386 len = min(val->len - reader->chunk_read_off, chunk_size);
3388 if (read) *read = len;
3390 if (len)
3392 memcpy(buffer, reader_get_strptr(reader, val) + reader->chunk_read_off, len*sizeof(WCHAR));
3393 reader->chunk_read_off += len;
3396 return len || !chunk_size ? S_OK : S_FALSE;
3399 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3400 LPCWSTR *baseUri,
3401 UINT *baseUri_length)
3403 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3404 return E_NOTIMPL;
3407 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3409 FIXME("(%p): stub\n", iface);
3410 return FALSE;
3413 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3415 xmlreader *This = impl_from_IXmlReader(iface);
3416 TRACE("(%p)\n", This);
3417 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3418 when current node is start tag of an element */
3419 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3422 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *line_number)
3424 xmlreader *This = impl_from_IXmlReader(iface);
3425 const struct element *element;
3427 TRACE("(%p %p)\n", This, line_number);
3429 if (!line_number)
3430 return E_INVALIDARG;
3432 switch (reader_get_nodetype(This))
3434 case XmlNodeType_Element:
3435 case XmlNodeType_EndElement:
3436 element = reader_get_element(This);
3437 *line_number = element->position.line_number;
3438 break;
3439 case XmlNodeType_Attribute:
3440 *line_number = This->attr->position.line_number;
3441 break;
3442 case XmlNodeType_Whitespace:
3443 case XmlNodeType_XmlDeclaration:
3444 *line_number = This->empty_element.position.line_number;
3445 break;
3446 default:
3447 *line_number = This->position.line_number;
3448 break;
3451 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3454 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *line_position)
3456 xmlreader *This = impl_from_IXmlReader(iface);
3457 const struct element *element;
3459 TRACE("(%p %p)\n", This, line_position);
3461 if (!line_position)
3462 return E_INVALIDARG;
3464 switch (reader_get_nodetype(This))
3466 case XmlNodeType_Element:
3467 case XmlNodeType_EndElement:
3468 element = reader_get_element(This);
3469 *line_position = element->position.line_position;
3470 break;
3471 case XmlNodeType_Attribute:
3472 *line_position = This->attr->position.line_position;
3473 break;
3474 case XmlNodeType_Whitespace:
3475 case XmlNodeType_XmlDeclaration:
3476 *line_position = This->empty_element.position.line_position;
3477 break;
3478 default:
3479 *line_position = This->position.line_position;
3480 break;
3483 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3486 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3488 xmlreader *This = impl_from_IXmlReader(iface);
3490 TRACE("(%p)->(%p)\n", This, count);
3492 if (!count) return E_INVALIDARG;
3494 *count = This->attr_count;
3495 return S_OK;
3498 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3500 xmlreader *This = impl_from_IXmlReader(iface);
3501 TRACE("(%p)->(%p)\n", This, depth);
3502 *depth = This->depth;
3503 return S_OK;
3506 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3508 xmlreader *This = impl_from_IXmlReader(iface);
3509 TRACE("(%p)\n", iface);
3510 return This->state == XmlReadState_EndOfFile;
3513 static const struct IXmlReaderVtbl xmlreader_vtbl =
3515 xmlreader_QueryInterface,
3516 xmlreader_AddRef,
3517 xmlreader_Release,
3518 xmlreader_SetInput,
3519 xmlreader_GetProperty,
3520 xmlreader_SetProperty,
3521 xmlreader_Read,
3522 xmlreader_GetNodeType,
3523 xmlreader_MoveToFirstAttribute,
3524 xmlreader_MoveToNextAttribute,
3525 xmlreader_MoveToAttributeByName,
3526 xmlreader_MoveToElement,
3527 xmlreader_GetQualifiedName,
3528 xmlreader_GetNamespaceUri,
3529 xmlreader_GetLocalName,
3530 xmlreader_GetPrefix,
3531 xmlreader_GetValue,
3532 xmlreader_ReadValueChunk,
3533 xmlreader_GetBaseUri,
3534 xmlreader_IsDefault,
3535 xmlreader_IsEmptyElement,
3536 xmlreader_GetLineNumber,
3537 xmlreader_GetLinePosition,
3538 xmlreader_GetAttributeCount,
3539 xmlreader_GetDepth,
3540 xmlreader_IsEOF
3543 /** IXmlReaderInput **/
3544 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3546 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3548 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3550 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3551 IsEqualGUID(riid, &IID_IUnknown))
3553 *ppvObject = iface;
3555 else
3557 WARN("interface %s not implemented\n", debugstr_guid(riid));
3558 *ppvObject = NULL;
3559 return E_NOINTERFACE;
3562 IUnknown_AddRef(iface);
3564 return S_OK;
3567 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3569 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3570 ULONG ref = InterlockedIncrement(&This->ref);
3571 TRACE("(%p)->(%d)\n", This, ref);
3572 return ref;
3575 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3577 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3578 LONG ref = InterlockedDecrement(&This->ref);
3580 TRACE("(%p)->(%d)\n", This, ref);
3582 if (ref == 0)
3584 IMalloc *imalloc = This->imalloc;
3585 if (This->input) IUnknown_Release(This->input);
3586 if (This->stream) ISequentialStream_Release(This->stream);
3587 if (This->buffer) free_input_buffer(This->buffer);
3588 readerinput_free(This, This->baseuri);
3589 readerinput_free(This, This);
3590 if (imalloc) IMalloc_Release(imalloc);
3593 return ref;
3596 static const struct IUnknownVtbl xmlreaderinputvtbl =
3598 xmlreaderinput_QueryInterface,
3599 xmlreaderinput_AddRef,
3600 xmlreaderinput_Release
3603 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3605 xmlreader *reader;
3606 HRESULT hr;
3607 int i;
3609 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3611 if (imalloc)
3612 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3613 else
3614 reader = heap_alloc(sizeof(*reader));
3615 if (!reader)
3616 return E_OUTOFMEMORY;
3618 memset(reader, 0, sizeof(*reader));
3619 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3620 reader->ref = 1;
3621 reader->state = XmlReadState_Closed;
3622 reader->instate = XmlReadInState_Initial;
3623 reader->resumestate = XmlReadResumeState_Initial;
3624 reader->dtdmode = DtdProcessing_Prohibit;
3625 reader->imalloc = imalloc;
3626 if (imalloc) IMalloc_AddRef(imalloc);
3627 reader->nodetype = XmlNodeType_None;
3628 list_init(&reader->attrs);
3629 list_init(&reader->nsdef);
3630 list_init(&reader->ns);
3631 list_init(&reader->elements);
3632 reader->max_depth = 256;
3634 reader->chunk_read_off = 0;
3635 for (i = 0; i < StringValue_Last; i++)
3636 reader->strvalues[i] = strval_empty;
3638 hr = IXmlReader_QueryInterface(&reader->IXmlReader_iface, riid, obj);
3639 IXmlReader_Release(&reader->IXmlReader_iface);
3641 TRACE("returning iface %p, hr %#x\n", *obj, hr);
3643 return hr;
3646 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3647 IMalloc *imalloc,
3648 LPCWSTR encoding,
3649 BOOL hint,
3650 LPCWSTR base_uri,
3651 IXmlReaderInput **ppInput)
3653 xmlreaderinput *readerinput;
3654 HRESULT hr;
3656 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3657 hint, wine_dbgstr_w(base_uri), ppInput);
3659 if (!stream || !ppInput) return E_INVALIDARG;
3661 if (imalloc)
3662 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3663 else
3664 readerinput = heap_alloc(sizeof(*readerinput));
3665 if(!readerinput) return E_OUTOFMEMORY;
3667 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3668 readerinput->ref = 1;
3669 readerinput->imalloc = imalloc;
3670 readerinput->stream = NULL;
3671 if (imalloc) IMalloc_AddRef(imalloc);
3672 readerinput->encoding = parse_encoding_name(encoding, -1);
3673 readerinput->hint = hint;
3674 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3675 readerinput->pending = 0;
3677 hr = alloc_input_buffer(readerinput);
3678 if (hr != S_OK)
3680 readerinput_free(readerinput, readerinput->baseuri);
3681 readerinput_free(readerinput, readerinput);
3682 if (imalloc) IMalloc_Release(imalloc);
3683 return hr;
3685 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3687 *ppInput = &readerinput->IXmlReaderInput_iface;
3689 TRACE("returning iface %p\n", *ppInput);
3691 return S_OK;