ddraw/tests: Validate the "surface" pointer is unmodified after CreateSurface() witho...
[wine.git] / dlls / xmllite / reader.c
blob9bb0d56d346591f71702985b824fee2c1b249c27
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW[] = {'\"',0};
90 static const WCHAR quoteW[] = {'\'',0};
91 static const WCHAR ltW[] = {'<',0};
92 static const WCHAR gtW[] = {'>',0};
93 static const WCHAR commentW[] = {'<','!','-','-',0};
94 static const WCHAR piW[] = {'<','?',0};
96 static const char *debugstr_nodetype(XmlNodeType nodetype)
98 static const char * const type_names[] =
100 "None",
101 "Element",
102 "Attribute",
103 "Text",
104 "CDATA",
107 "ProcessingInstruction",
108 "Comment",
110 "DocumentType",
113 "Whitespace",
115 "EndElement",
117 "XmlDeclaration"
120 if (nodetype > _XmlNodeType_Last)
121 return wine_dbg_sprintf("unknown type=%d", nodetype);
123 return type_names[nodetype];
126 static const char *debugstr_reader_prop(XmlReaderProperty prop)
128 static const char * const prop_names[] =
130 "MultiLanguage",
131 "ConformanceLevel",
132 "RandomAccess",
133 "XmlResolver",
134 "DtdProcessing",
135 "ReadState",
136 "MaxElementDepth",
137 "MaxEntityExpansion"
140 if (prop > _XmlReaderProperty_Last)
141 return wine_dbg_sprintf("unknown property=%d", prop);
143 return prop_names[prop];
146 struct xml_encoding_data
148 const WCHAR *name;
149 xml_encoding enc;
150 UINT cp;
153 static const struct xml_encoding_data xml_encoding_map[] = {
154 { utf16W, XmlEncoding_UTF16, ~0 },
155 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
158 const WCHAR *get_encoding_name(xml_encoding encoding)
160 return xml_encoding_map[encoding].name;
163 xml_encoding get_encoding_from_codepage(UINT codepage)
165 int i;
166 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
168 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
170 return XmlEncoding_Unknown;
173 typedef struct
175 char *data;
176 UINT cur;
177 unsigned int allocated;
178 unsigned int written;
179 } encoded_buffer;
181 typedef struct input_buffer input_buffer;
183 typedef struct
185 IXmlReaderInput IXmlReaderInput_iface;
186 LONG ref;
187 /* reference passed on IXmlReaderInput creation, is kept when input is created */
188 IUnknown *input;
189 IMalloc *imalloc;
190 xml_encoding encoding;
191 BOOL hint;
192 WCHAR *baseuri;
193 /* stream reference set after SetInput() call from reader,
194 stored as sequential stream, cause currently
195 optimizations possible with IStream aren't implemented */
196 ISequentialStream *stream;
197 input_buffer *buffer;
198 unsigned int pending : 1;
199 } xmlreaderinput;
201 static const struct IUnknownVtbl xmlreaderinputvtbl;
203 /* Structure to hold parsed string of specific length.
205 Reader stores node value as 'start' pointer, on request
206 a null-terminated version of it is allocated.
208 To init a strval variable use reader_init_strval(),
209 to set strval as a reader value use reader_set_strval().
211 typedef struct
213 WCHAR *str; /* allocated null-terminated string */
214 UINT len; /* length in WCHARs, altered after ReadValueChunk */
215 UINT start; /* input position where value starts */
216 } strval;
218 static WCHAR emptyW[] = {0};
219 static WCHAR xmlW[] = {'x','m','l',0};
220 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
221 static const strval strval_empty = { emptyW };
222 static const strval strval_xml = { xmlW, 3 };
223 static const strval strval_xmlns = { xmlnsW, 5 };
225 struct attribute
227 struct list entry;
228 strval prefix;
229 strval localname;
230 strval value;
233 struct element
235 struct list entry;
236 strval prefix;
237 strval localname;
238 strval qname;
241 struct ns
243 struct list entry;
244 strval prefix;
245 strval uri;
246 struct element *element;
249 typedef struct
251 IXmlReader IXmlReader_iface;
252 LONG ref;
253 xmlreaderinput *input;
254 IMalloc *imalloc;
255 XmlReadState state;
256 XmlReaderInternalState instate;
257 XmlReaderResumeState resumestate;
258 XmlNodeType nodetype;
259 DtdProcessing dtdmode;
260 IXmlResolver *resolver;
261 IUnknown *mlang;
262 UINT line, pos; /* reader position in XML stream */
263 struct list attrs; /* attributes list for current node */
264 struct attribute *attr; /* current attribute */
265 UINT attr_count;
266 struct list nsdef;
267 struct list ns;
268 struct list elements;
269 strval strvalues[StringValue_Last];
270 UINT depth;
271 UINT max_depth;
272 BOOL is_empty_element;
273 struct element empty_element;
274 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
275 } xmlreader;
277 struct input_buffer
279 encoded_buffer utf16;
280 encoded_buffer encoded;
281 UINT code_page;
282 xmlreaderinput *input;
285 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
287 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
290 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
292 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
295 /* reader memory allocation functions */
296 static inline void *reader_alloc(xmlreader *reader, size_t len)
298 return m_alloc(reader->imalloc, len);
301 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
303 void *ret = reader_alloc(reader, len);
304 if (ret)
305 memset(ret, 0, len);
306 return ret;
309 static inline void reader_free(xmlreader *reader, void *mem)
311 m_free(reader->imalloc, mem);
314 /* Just return pointer from offset, no attempt to read more. */
315 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
317 encoded_buffer *buffer = &reader->input->buffer->utf16;
318 return (WCHAR*)buffer->data + offset;
321 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
323 return v->str ? v->str : reader_get_ptr2(reader, v->start);
326 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
328 *dest = *src;
330 if (src->str != strval_empty.str)
332 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
333 if (!dest->str) return E_OUTOFMEMORY;
334 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
335 dest->str[dest->len] = 0;
336 dest->start = 0;
339 return S_OK;
342 /* reader input memory allocation functions */
343 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
345 return m_alloc(input->imalloc, len);
348 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
350 return m_realloc(input->imalloc, mem, len);
353 static inline void readerinput_free(xmlreaderinput *input, void *mem)
355 m_free(input->imalloc, mem);
358 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
360 LPWSTR ret = NULL;
362 if(str) {
363 DWORD size;
365 size = (strlenW(str)+1)*sizeof(WCHAR);
366 ret = readerinput_alloc(input, size);
367 if (ret) memcpy(ret, str, size);
370 return ret;
373 static void reader_clear_attrs(xmlreader *reader)
375 struct attribute *attr, *attr2;
376 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
378 reader_free(reader, attr);
380 list_init(&reader->attrs);
381 reader->attr_count = 0;
382 reader->attr = NULL;
385 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
386 while we are on a node with attributes */
387 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *value)
389 struct attribute *attr;
391 attr = reader_alloc(reader, sizeof(*attr));
392 if (!attr) return E_OUTOFMEMORY;
394 if (prefix)
395 attr->prefix = *prefix;
396 else
397 memset(&attr->prefix, 0, sizeof(attr->prefix));
398 attr->localname = *localname;
399 attr->value = *value;
400 list_add_tail(&reader->attrs, &attr->entry);
401 reader->attr_count++;
403 return S_OK;
406 /* This one frees stored string value if needed */
407 static void reader_free_strvalued(xmlreader *reader, strval *v)
409 if (v->str != strval_empty.str)
411 reader_free(reader, v->str);
412 *v = strval_empty;
416 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
418 v->start = start;
419 v->len = len;
420 v->str = NULL;
423 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
425 return debugstr_wn(reader_get_strptr(reader, v), v->len);
428 /* used to initialize from constant string */
429 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
431 v->start = 0;
432 v->len = len;
433 v->str = str;
436 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
438 reader_free_strvalued(reader, &reader->strvalues[type]);
441 static void reader_free_strvalues(xmlreader *reader)
443 int type;
444 for (type = 0; type < StringValue_Last; type++)
445 reader_free_strvalue(reader, type);
448 /* This helper should only be used to test if strings are the same,
449 it doesn't try to sort. */
450 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
452 if (str1->len != str2->len) return 0;
453 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
456 static void reader_clear_elements(xmlreader *reader)
458 struct element *elem, *elem2;
459 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
461 reader_free_strvalued(reader, &elem->prefix);
462 reader_free_strvalued(reader, &elem->localname);
463 reader_free_strvalued(reader, &elem->qname);
464 reader_free(reader, elem);
466 list_init(&reader->elements);
467 reader->is_empty_element = FALSE;
470 static HRESULT reader_inc_depth(xmlreader *reader)
472 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
473 return S_OK;
476 static void reader_dec_depth(xmlreader *reader)
478 if (reader->depth > 1) reader->depth--;
481 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
483 struct ns *ns;
484 HRESULT hr;
486 ns = reader_alloc(reader, sizeof(*ns));
487 if (!ns) return E_OUTOFMEMORY;
489 if (def)
490 memset(&ns->prefix, 0, sizeof(ns->prefix));
491 else {
492 hr = reader_strvaldup(reader, prefix, &ns->prefix);
493 if (FAILED(hr)) {
494 reader_free(reader, ns);
495 return hr;
499 hr = reader_strvaldup(reader, uri, &ns->uri);
500 if (FAILED(hr)) {
501 reader_free_strvalued(reader, &ns->prefix);
502 reader_free(reader, ns);
503 return hr;
506 ns->element = NULL;
507 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
508 return hr;
511 static void reader_free_element(xmlreader *reader, struct element *element)
513 reader_free_strvalued(reader, &element->prefix);
514 reader_free_strvalued(reader, &element->localname);
515 reader_free_strvalued(reader, &element->qname);
516 reader_free(reader, element);
519 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
521 struct ns *ns;
523 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
524 if (ns->element)
525 break;
526 ns->element = element;
529 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
530 if (ns->element)
531 break;
532 ns->element = element;
536 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
537 strval *qname)
539 struct element *element;
540 HRESULT hr;
542 if (!list_empty(&reader->elements))
544 hr = reader_inc_depth(reader);
545 if (FAILED(hr))
546 return hr;
549 element = reader_alloc_zero(reader, sizeof(*element));
550 if (!element) {
551 hr = E_OUTOFMEMORY;
552 goto failed;
555 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) != S_OK ||
556 (hr = reader_strvaldup(reader, localname, &element->localname)) != S_OK ||
557 (hr = reader_strvaldup(reader, qname, &element->qname)) != S_OK)
559 reader_free_element(reader, element);
560 goto failed;
563 list_add_head(&reader->elements, &element->entry);
564 reader_mark_ns_nodes(reader, element);
565 reader->is_empty_element = FALSE;
567 failed:
568 reader_dec_depth(reader);
569 return hr;
572 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
574 struct ns *ns, *ns2;
576 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
577 if (ns->element != element)
578 break;
580 list_remove(&ns->entry);
581 reader_free_strvalued(reader, &ns->prefix);
582 reader_free_strvalued(reader, &ns->uri);
583 reader_free(reader, ns);
586 if (!list_empty(&reader->nsdef)) {
587 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
588 if (ns->element == element) {
589 list_remove(&ns->entry);
590 reader_free_strvalued(reader, &ns->prefix);
591 reader_free_strvalued(reader, &ns->uri);
592 reader_free(reader, ns);
597 static void reader_pop_element(xmlreader *reader)
599 struct element *element;
601 if (list_empty(&reader->elements))
602 return;
604 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
605 list_remove(&element->entry);
607 reader_pop_ns_nodes(reader, element);
608 reader_free_element(reader, element);
609 reader_dec_depth(reader);
611 /* It was a root element, the rest is expected as Misc */
612 if (list_empty(&reader->elements))
613 reader->instate = XmlReadInState_MiscEnd;
616 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
617 means node value is to be determined. */
618 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
620 strval *v = &reader->strvalues[type];
622 reader_free_strvalue(reader, type);
623 if (!value)
625 v->str = NULL;
626 v->start = 0;
627 v->len = 0;
628 return;
631 if (value->str == strval_empty.str)
632 *v = *value;
633 else
635 if (type == StringValue_Value)
637 /* defer allocation for value string */
638 v->str = NULL;
639 v->start = value->start;
640 v->len = value->len;
642 else
644 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
645 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
646 v->str[value->len] = 0;
647 v->len = value->len;
652 static inline int is_reader_pending(xmlreader *reader)
654 return reader->input->pending;
657 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
659 const int initial_len = 0x2000;
660 buffer->data = readerinput_alloc(input, initial_len);
661 if (!buffer->data) return E_OUTOFMEMORY;
663 memset(buffer->data, 0, 4);
664 buffer->cur = 0;
665 buffer->allocated = initial_len;
666 buffer->written = 0;
668 return S_OK;
671 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
673 readerinput_free(input, buffer->data);
676 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
678 if (encoding == XmlEncoding_Unknown)
680 FIXME("unsupported encoding %d\n", encoding);
681 return E_NOTIMPL;
684 *cp = xml_encoding_map[encoding].cp;
686 return S_OK;
689 xml_encoding parse_encoding_name(const WCHAR *name, int len)
691 int min, max, n, c;
693 if (!name) return XmlEncoding_Unknown;
695 min = 0;
696 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
698 while (min <= max)
700 n = (min+max)/2;
702 if (len != -1)
703 c = strncmpiW(xml_encoding_map[n].name, name, len);
704 else
705 c = strcmpiW(xml_encoding_map[n].name, name);
706 if (!c)
707 return xml_encoding_map[n].enc;
709 if (c > 0)
710 max = n-1;
711 else
712 min = n+1;
715 return XmlEncoding_Unknown;
718 static HRESULT alloc_input_buffer(xmlreaderinput *input)
720 input_buffer *buffer;
721 HRESULT hr;
723 input->buffer = NULL;
725 buffer = readerinput_alloc(input, sizeof(*buffer));
726 if (!buffer) return E_OUTOFMEMORY;
728 buffer->input = input;
729 buffer->code_page = ~0; /* code page is unknown at this point */
730 hr = init_encoded_buffer(input, &buffer->utf16);
731 if (hr != S_OK) {
732 readerinput_free(input, buffer);
733 return hr;
736 hr = init_encoded_buffer(input, &buffer->encoded);
737 if (hr != S_OK) {
738 free_encoded_buffer(input, &buffer->utf16);
739 readerinput_free(input, buffer);
740 return hr;
743 input->buffer = buffer;
744 return S_OK;
747 static void free_input_buffer(input_buffer *buffer)
749 free_encoded_buffer(buffer->input, &buffer->encoded);
750 free_encoded_buffer(buffer->input, &buffer->utf16);
751 readerinput_free(buffer->input, buffer);
754 static void readerinput_release_stream(xmlreaderinput *readerinput)
756 if (readerinput->stream) {
757 ISequentialStream_Release(readerinput->stream);
758 readerinput->stream = NULL;
762 /* Queries already stored interface for IStream/ISequentialStream.
763 Interface supplied on creation will be overwritten */
764 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
766 HRESULT hr;
768 readerinput_release_stream(readerinput);
769 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
770 if (hr != S_OK)
771 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
773 return hr;
776 /* reads a chunk to raw buffer */
777 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
779 encoded_buffer *buffer = &readerinput->buffer->encoded;
780 /* to make sure aligned length won't exceed allocated length */
781 ULONG len = buffer->allocated - buffer->written - 4;
782 ULONG read;
783 HRESULT hr;
785 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
786 variable width encodings like UTF-8 */
787 len = (len + 3) & ~3;
788 /* try to use allocated space or grow */
789 if (buffer->allocated - buffer->written < len)
791 buffer->allocated *= 2;
792 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
793 len = buffer->allocated - buffer->written;
796 read = 0;
797 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
798 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
799 readerinput->pending = hr == E_PENDING;
800 if (FAILED(hr)) return hr;
801 buffer->written += read;
803 return hr;
806 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
807 static void readerinput_grow(xmlreaderinput *readerinput, int length)
809 encoded_buffer *buffer = &readerinput->buffer->utf16;
811 length *= sizeof(WCHAR);
812 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
813 if (buffer->allocated < buffer->written + length + 4)
815 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
816 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
817 buffer->allocated = grown_size;
821 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
823 static const char startA[] = {'<','?'};
824 static const char commentA[] = {'<','!'};
825 encoded_buffer *buffer = &readerinput->buffer->encoded;
826 unsigned char *ptr = (unsigned char*)buffer->data;
828 return !memcmp(buffer->data, startA, sizeof(startA)) ||
829 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
830 /* test start byte */
831 (ptr[0] == '<' &&
833 (ptr[1] && (ptr[1] <= 0x7f)) ||
834 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
835 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
836 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
840 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
842 encoded_buffer *buffer = &readerinput->buffer->encoded;
843 static const WCHAR startW[] = {'<','?'};
844 static const WCHAR commentW[] = {'<','!'};
845 static const char utf8bom[] = {0xef,0xbb,0xbf};
846 static const char utf16lebom[] = {0xff,0xfe};
848 *enc = XmlEncoding_Unknown;
850 if (buffer->written <= 3)
852 HRESULT hr = readerinput_growraw(readerinput);
853 if (FAILED(hr)) return hr;
854 if (buffer->written <= 3) return MX_E_INPUTEND;
857 /* try start symbols if we have enough data to do that, input buffer should contain
858 first chunk already */
859 if (readerinput_is_utf8(readerinput))
860 *enc = XmlEncoding_UTF8;
861 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
862 !memcmp(buffer->data, commentW, sizeof(commentW)))
863 *enc = XmlEncoding_UTF16;
864 /* try with BOM now */
865 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
867 buffer->cur += sizeof(utf8bom);
868 *enc = XmlEncoding_UTF8;
870 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
872 buffer->cur += sizeof(utf16lebom);
873 *enc = XmlEncoding_UTF16;
876 return S_OK;
879 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
881 encoded_buffer *buffer = &readerinput->buffer->encoded;
882 int len = buffer->written;
884 /* complete single byte char */
885 if (!(buffer->data[len-1] & 0x80)) return len;
887 /* find start byte of multibyte char */
888 while (--len && !(buffer->data[len] & 0xc0))
891 return len;
894 /* Returns byte length of complete char sequence for buffer code page,
895 it's relative to current buffer position which is currently used for BOM handling
896 only. */
897 static int readerinput_get_convlen(xmlreaderinput *readerinput)
899 encoded_buffer *buffer = &readerinput->buffer->encoded;
900 int len;
902 if (readerinput->buffer->code_page == CP_UTF8)
903 len = readerinput_get_utf8_convlen(readerinput);
904 else
905 len = buffer->written;
907 TRACE("%d\n", len - buffer->cur);
908 return len - buffer->cur;
911 /* It's possible that raw buffer has some leftovers from last conversion - some char
912 sequence that doesn't represent a full code point. Length argument should be calculated with
913 readerinput_get_convlen(), if it's -1 it will be calculated here. */
914 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
916 encoded_buffer *buffer = &readerinput->buffer->encoded;
918 if (len == -1)
919 len = readerinput_get_convlen(readerinput);
921 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
922 /* everything below cur is lost too */
923 buffer->written -= len + buffer->cur;
924 /* after this point we don't need cur offset really,
925 it's used only to mark where actual data begins when first chunk is read */
926 buffer->cur = 0;
929 /* note that raw buffer content is kept */
930 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
932 encoded_buffer *src = &readerinput->buffer->encoded;
933 encoded_buffer *dest = &readerinput->buffer->utf16;
934 int len, dest_len;
935 HRESULT hr;
936 WCHAR *ptr;
937 UINT cp;
939 hr = get_code_page(enc, &cp);
940 if (FAILED(hr)) return;
942 readerinput->buffer->code_page = cp;
943 len = readerinput_get_convlen(readerinput);
945 TRACE("switching to cp %d\n", cp);
947 /* just copy in this case */
948 if (enc == XmlEncoding_UTF16)
950 readerinput_grow(readerinput, len);
951 memcpy(dest->data, src->data + src->cur, len);
952 dest->written += len*sizeof(WCHAR);
953 return;
956 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
957 readerinput_grow(readerinput, dest_len);
958 ptr = (WCHAR*)dest->data;
959 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
960 ptr[dest_len] = 0;
961 dest->written += dest_len*sizeof(WCHAR);
964 /* shrinks parsed data a buffer begins with */
965 static void reader_shrink(xmlreader *reader)
967 encoded_buffer *buffer = &reader->input->buffer->utf16;
969 /* avoid to move too often using threshold shrink length */
970 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
972 buffer->written -= buffer->cur*sizeof(WCHAR);
973 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
974 buffer->cur = 0;
975 *(WCHAR*)&buffer->data[buffer->written] = 0;
979 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
980 It won't attempt to shrink but will grow destination buffer if needed */
981 static HRESULT reader_more(xmlreader *reader)
983 xmlreaderinput *readerinput = reader->input;
984 encoded_buffer *src = &readerinput->buffer->encoded;
985 encoded_buffer *dest = &readerinput->buffer->utf16;
986 UINT cp = readerinput->buffer->code_page;
987 int len, dest_len;
988 HRESULT hr;
989 WCHAR *ptr;
991 /* get some raw data from stream first */
992 hr = readerinput_growraw(readerinput);
993 len = readerinput_get_convlen(readerinput);
995 /* just copy for UTF-16 case */
996 if (cp == ~0)
998 readerinput_grow(readerinput, len);
999 memcpy(dest->data + dest->written, src->data + src->cur, len);
1000 dest->written += len*sizeof(WCHAR);
1001 return hr;
1004 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1005 readerinput_grow(readerinput, dest_len);
1006 ptr = (WCHAR*)(dest->data + dest->written);
1007 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1008 ptr[dest_len] = 0;
1009 dest->written += dest_len*sizeof(WCHAR);
1010 /* get rid of processed data */
1011 readerinput_shrinkraw(readerinput, len);
1013 return hr;
1016 static inline UINT reader_get_cur(xmlreader *reader)
1018 return reader->input->buffer->utf16.cur;
1021 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1023 encoded_buffer *buffer = &reader->input->buffer->utf16;
1024 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1025 if (!*ptr) reader_more(reader);
1026 return (WCHAR*)buffer->data + buffer->cur;
1029 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1031 int i=0;
1032 const WCHAR *ptr = reader_get_ptr(reader);
1033 while (str[i])
1035 if (!ptr[i])
1037 reader_more(reader);
1038 ptr = reader_get_ptr(reader);
1040 if (str[i] != ptr[i])
1041 return ptr[i] - str[i];
1042 i++;
1044 return 0;
1047 /* moves cursor n WCHARs forward */
1048 static void reader_skipn(xmlreader *reader, int n)
1050 encoded_buffer *buffer = &reader->input->buffer->utf16;
1051 const WCHAR *ptr = reader_get_ptr(reader);
1053 while (*ptr++ && n--)
1055 buffer->cur++;
1056 reader->pos++;
1060 static inline BOOL is_wchar_space(WCHAR ch)
1062 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1065 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1066 static int reader_skipspaces(xmlreader *reader)
1068 encoded_buffer *buffer = &reader->input->buffer->utf16;
1069 const WCHAR *ptr = reader_get_ptr(reader);
1070 UINT start = reader_get_cur(reader);
1072 while (is_wchar_space(*ptr))
1074 if (*ptr == '\r')
1075 reader->pos = 0;
1076 else if (*ptr == '\n')
1078 reader->line++;
1079 reader->pos = 0;
1081 else
1082 reader->pos++;
1084 buffer->cur++;
1085 ptr = reader_get_ptr(reader);
1088 return reader_get_cur(reader) - start;
1091 /* [26] VersionNum ::= '1.' [0-9]+ */
1092 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1094 static const WCHAR onedotW[] = {'1','.',0};
1095 WCHAR *ptr, *ptr2;
1096 UINT start;
1098 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1100 start = reader_get_cur(reader);
1101 /* skip "1." */
1102 reader_skipn(reader, 2);
1104 ptr2 = ptr = reader_get_ptr(reader);
1105 while (*ptr >= '0' && *ptr <= '9')
1107 reader_skipn(reader, 1);
1108 ptr = reader_get_ptr(reader);
1111 if (ptr2 == ptr) return WC_E_DIGIT;
1112 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1113 TRACE("version=%s\n", debug_strval(reader, val));
1114 return S_OK;
1117 /* [25] Eq ::= S? '=' S? */
1118 static HRESULT reader_parse_eq(xmlreader *reader)
1120 static const WCHAR eqW[] = {'=',0};
1121 reader_skipspaces(reader);
1122 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1123 /* skip '=' */
1124 reader_skipn(reader, 1);
1125 reader_skipspaces(reader);
1126 return S_OK;
1129 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1130 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1132 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1133 strval val, name;
1134 HRESULT hr;
1136 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1138 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1139 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1140 /* skip 'version' */
1141 reader_skipn(reader, 7);
1143 hr = reader_parse_eq(reader);
1144 if (FAILED(hr)) return hr;
1146 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1147 return WC_E_QUOTE;
1148 /* skip "'"|'"' */
1149 reader_skipn(reader, 1);
1151 hr = reader_parse_versionnum(reader, &val);
1152 if (FAILED(hr)) return hr;
1154 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1155 return WC_E_QUOTE;
1157 /* skip "'"|'"' */
1158 reader_skipn(reader, 1);
1160 return reader_add_attr(reader, NULL, &name, &val);
1163 /* ([A-Za-z0-9._] | '-') */
1164 static inline BOOL is_wchar_encname(WCHAR ch)
1166 return ((ch >= 'A' && ch <= 'Z') ||
1167 (ch >= 'a' && ch <= 'z') ||
1168 (ch >= '0' && ch <= '9') ||
1169 (ch == '.') || (ch == '_') ||
1170 (ch == '-'));
1173 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1174 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1176 WCHAR *start = reader_get_ptr(reader), *ptr;
1177 xml_encoding enc;
1178 int len;
1180 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1181 return WC_E_ENCNAME;
1183 val->start = reader_get_cur(reader);
1185 ptr = start;
1186 while (is_wchar_encname(*++ptr))
1189 len = ptr - start;
1190 enc = parse_encoding_name(start, len);
1191 TRACE("encoding name %s\n", debugstr_wn(start, len));
1192 val->str = start;
1193 val->len = len;
1195 if (enc == XmlEncoding_Unknown)
1196 return WC_E_ENCNAME;
1198 /* skip encoding name */
1199 reader_skipn(reader, len);
1200 return S_OK;
1203 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1204 static HRESULT reader_parse_encdecl(xmlreader *reader)
1206 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1207 strval name, val;
1208 HRESULT hr;
1210 if (!reader_skipspaces(reader)) return S_FALSE;
1212 if (reader_cmp(reader, encodingW)) return S_FALSE;
1213 name.str = reader_get_ptr(reader);
1214 name.start = reader_get_cur(reader);
1215 name.len = 8;
1216 /* skip 'encoding' */
1217 reader_skipn(reader, 8);
1219 hr = reader_parse_eq(reader);
1220 if (FAILED(hr)) return hr;
1222 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1223 return WC_E_QUOTE;
1224 /* skip "'"|'"' */
1225 reader_skipn(reader, 1);
1227 hr = reader_parse_encname(reader, &val);
1228 if (FAILED(hr)) return hr;
1230 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1231 return WC_E_QUOTE;
1233 /* skip "'"|'"' */
1234 reader_skipn(reader, 1);
1236 return reader_add_attr(reader, NULL, &name, &val);
1239 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1240 static HRESULT reader_parse_sddecl(xmlreader *reader)
1242 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1243 static const WCHAR yesW[] = {'y','e','s',0};
1244 static const WCHAR noW[] = {'n','o',0};
1245 strval name, val;
1246 UINT start;
1247 HRESULT hr;
1249 if (!reader_skipspaces(reader)) return S_FALSE;
1251 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1252 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1253 /* skip 'standalone' */
1254 reader_skipn(reader, 10);
1256 hr = reader_parse_eq(reader);
1257 if (FAILED(hr)) return hr;
1259 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1260 return WC_E_QUOTE;
1261 /* skip "'"|'"' */
1262 reader_skipn(reader, 1);
1264 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1265 return WC_E_XMLDECL;
1267 start = reader_get_cur(reader);
1268 /* skip 'yes'|'no' */
1269 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1270 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1271 TRACE("standalone=%s\n", debug_strval(reader, &val));
1273 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1274 return WC_E_QUOTE;
1275 /* skip "'"|'"' */
1276 reader_skipn(reader, 1);
1278 return reader_add_attr(reader, NULL, &name, &val);
1281 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1282 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1284 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1285 static const WCHAR declcloseW[] = {'?','>',0};
1286 HRESULT hr;
1288 /* check if we have "<?xml " */
1289 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1291 reader_skipn(reader, 5);
1292 hr = reader_parse_versioninfo(reader);
1293 if (FAILED(hr))
1294 return hr;
1296 hr = reader_parse_encdecl(reader);
1297 if (FAILED(hr))
1298 return hr;
1300 hr = reader_parse_sddecl(reader);
1301 if (FAILED(hr))
1302 return hr;
1304 reader_skipspaces(reader);
1305 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1306 reader_skipn(reader, 2);
1308 reader_inc_depth(reader);
1309 reader->nodetype = XmlNodeType_XmlDeclaration;
1310 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1311 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1312 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1314 return S_OK;
1317 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1318 static HRESULT reader_parse_comment(xmlreader *reader)
1320 WCHAR *ptr;
1321 UINT start;
1323 if (reader->resumestate == XmlReadResumeState_Comment)
1325 start = reader->resume[XmlReadResume_Body];
1326 ptr = reader_get_ptr(reader);
1328 else
1330 /* skip '<!--' */
1331 reader_skipn(reader, 4);
1332 reader_shrink(reader);
1333 ptr = reader_get_ptr(reader);
1334 start = reader_get_cur(reader);
1335 reader->nodetype = XmlNodeType_Comment;
1336 reader->resume[XmlReadResume_Body] = start;
1337 reader->resumestate = XmlReadResumeState_Comment;
1338 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1339 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1340 reader_set_strvalue(reader, StringValue_Value, NULL);
1343 /* will exit when there's no more data, it won't attempt to
1344 read more from stream */
1345 while (*ptr)
1347 if (ptr[0] == '-')
1349 if (ptr[1] == '-')
1351 if (ptr[2] == '>')
1353 strval value;
1355 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1356 TRACE("%s\n", debug_strval(reader, &value));
1358 /* skip rest of markup '->' */
1359 reader_skipn(reader, 3);
1361 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1362 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1363 reader_set_strvalue(reader, StringValue_Value, &value);
1364 reader->resume[XmlReadResume_Body] = 0;
1365 reader->resumestate = XmlReadResumeState_Initial;
1366 return S_OK;
1368 else
1369 return WC_E_COMMENT;
1373 reader_skipn(reader, 1);
1374 ptr++;
1377 return S_OK;
1380 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1381 static inline BOOL is_char(WCHAR ch)
1383 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1384 (ch >= 0x20 && ch <= 0xd7ff) ||
1385 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1386 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1387 (ch >= 0xe000 && ch <= 0xfffd);
1390 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1391 static inline BOOL is_pubchar(WCHAR ch)
1393 return (ch == ' ') ||
1394 (ch >= 'a' && ch <= 'z') ||
1395 (ch >= 'A' && ch <= 'Z') ||
1396 (ch >= '0' && ch <= '9') ||
1397 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1398 (ch == '=') || (ch == '?') ||
1399 (ch == '@') || (ch == '!') ||
1400 (ch >= '#' && ch <= '%') || /* #$% */
1401 (ch == '_') || (ch == '\r') || (ch == '\n');
1404 static inline BOOL is_namestartchar(WCHAR ch)
1406 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1407 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1408 (ch >= 0xc0 && ch <= 0xd6) ||
1409 (ch >= 0xd8 && ch <= 0xf6) ||
1410 (ch >= 0xf8 && ch <= 0x2ff) ||
1411 (ch >= 0x370 && ch <= 0x37d) ||
1412 (ch >= 0x37f && ch <= 0x1fff) ||
1413 (ch >= 0x200c && ch <= 0x200d) ||
1414 (ch >= 0x2070 && ch <= 0x218f) ||
1415 (ch >= 0x2c00 && ch <= 0x2fef) ||
1416 (ch >= 0x3001 && ch <= 0xd7ff) ||
1417 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1418 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1419 (ch >= 0xf900 && ch <= 0xfdcf) ||
1420 (ch >= 0xfdf0 && ch <= 0xfffd);
1423 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1424 static inline BOOL is_ncnamechar(WCHAR ch)
1426 return (ch >= 'A' && ch <= 'Z') ||
1427 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1428 (ch == '-') || (ch == '.') ||
1429 (ch >= '0' && ch <= '9') ||
1430 (ch == 0xb7) ||
1431 (ch >= 0xc0 && ch <= 0xd6) ||
1432 (ch >= 0xd8 && ch <= 0xf6) ||
1433 (ch >= 0xf8 && ch <= 0x2ff) ||
1434 (ch >= 0x300 && ch <= 0x36f) ||
1435 (ch >= 0x370 && ch <= 0x37d) ||
1436 (ch >= 0x37f && ch <= 0x1fff) ||
1437 (ch >= 0x200c && ch <= 0x200d) ||
1438 (ch >= 0x203f && ch <= 0x2040) ||
1439 (ch >= 0x2070 && ch <= 0x218f) ||
1440 (ch >= 0x2c00 && ch <= 0x2fef) ||
1441 (ch >= 0x3001 && ch <= 0xd7ff) ||
1442 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1443 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1444 (ch >= 0xf900 && ch <= 0xfdcf) ||
1445 (ch >= 0xfdf0 && ch <= 0xfffd);
1448 static inline BOOL is_namechar(WCHAR ch)
1450 return (ch == ':') || is_ncnamechar(ch);
1453 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1455 /* When we're on attribute always return attribute type, container node type is kept.
1456 Note that container is not necessarily an element, and attribute doesn't mean it's
1457 an attribute in XML spec terms. */
1458 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1461 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1462 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1463 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1464 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1465 [5] Name ::= NameStartChar (NameChar)* */
1466 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1468 WCHAR *ptr;
1469 UINT start;
1471 if (reader->resume[XmlReadResume_Name])
1473 start = reader->resume[XmlReadResume_Name];
1474 ptr = reader_get_ptr(reader);
1476 else
1478 ptr = reader_get_ptr(reader);
1479 start = reader_get_cur(reader);
1480 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1483 while (is_namechar(*ptr))
1485 reader_skipn(reader, 1);
1486 ptr = reader_get_ptr(reader);
1489 if (is_reader_pending(reader))
1491 reader->resume[XmlReadResume_Name] = start;
1492 return E_PENDING;
1494 else
1495 reader->resume[XmlReadResume_Name] = 0;
1497 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1498 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1500 return S_OK;
1503 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1504 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1506 static const WCHAR xmlW[] = {'x','m','l'};
1507 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1508 strval name;
1509 WCHAR *ptr;
1510 HRESULT hr;
1511 UINT i;
1513 hr = reader_parse_name(reader, &name);
1514 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1516 /* now that we got name check for illegal content */
1517 if (strval_eq(reader, &name, &xmlval))
1518 return WC_E_LEADINGXML;
1520 /* PITarget can't be a qualified name */
1521 ptr = reader_get_strptr(reader, &name);
1522 for (i = 0; i < name.len; i++)
1523 if (ptr[i] == ':')
1524 return i ? NC_E_NAMECOLON : WC_E_PI;
1526 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1527 *target = name;
1528 return S_OK;
1531 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1532 static HRESULT reader_parse_pi(xmlreader *reader)
1534 strval target;
1535 WCHAR *ptr;
1536 UINT start;
1537 HRESULT hr;
1539 switch (reader->resumestate)
1541 case XmlReadResumeState_Initial:
1542 /* skip '<?' */
1543 reader_skipn(reader, 2);
1544 reader_shrink(reader);
1545 reader->resumestate = XmlReadResumeState_PITarget;
1546 case XmlReadResumeState_PITarget:
1547 hr = reader_parse_pitarget(reader, &target);
1548 if (FAILED(hr)) return hr;
1549 reader_set_strvalue(reader, StringValue_LocalName, &target);
1550 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1551 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1552 reader->resumestate = XmlReadResumeState_PIBody;
1553 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1554 default:
1558 start = reader->resume[XmlReadResume_Body];
1559 ptr = reader_get_ptr(reader);
1560 while (*ptr)
1562 if (ptr[0] == '?')
1564 if (ptr[1] == '>')
1566 UINT cur = reader_get_cur(reader);
1567 strval value;
1569 /* strip all leading whitespace chars */
1570 while (start < cur)
1572 ptr = reader_get_ptr2(reader, start);
1573 if (!is_wchar_space(*ptr)) break;
1574 start++;
1577 reader_init_strvalue(start, cur-start, &value);
1579 /* skip '?>' */
1580 reader_skipn(reader, 2);
1581 TRACE("%s\n", debug_strval(reader, &value));
1582 reader->nodetype = XmlNodeType_ProcessingInstruction;
1583 reader->resumestate = XmlReadResumeState_Initial;
1584 reader->resume[XmlReadResume_Body] = 0;
1585 reader_set_strvalue(reader, StringValue_Value, &value);
1586 return S_OK;
1590 reader_skipn(reader, 1);
1591 ptr = reader_get_ptr(reader);
1594 return S_OK;
1597 /* This one is used to parse significant whitespace nodes, like in Misc production */
1598 static HRESULT reader_parse_whitespace(xmlreader *reader)
1600 switch (reader->resumestate)
1602 case XmlReadResumeState_Initial:
1603 reader_shrink(reader);
1604 reader->resumestate = XmlReadResumeState_Whitespace;
1605 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1606 reader->nodetype = XmlNodeType_Whitespace;
1607 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1608 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1609 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1610 /* fallthrough */
1611 case XmlReadResumeState_Whitespace:
1613 strval value;
1614 UINT start;
1616 reader_skipspaces(reader);
1617 if (is_reader_pending(reader)) return S_OK;
1619 start = reader->resume[XmlReadResume_Body];
1620 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1621 reader_set_strvalue(reader, StringValue_Value, &value);
1622 TRACE("%s\n", debug_strval(reader, &value));
1623 reader->resumestate = XmlReadResumeState_Initial;
1625 default:
1629 return S_OK;
1632 /* [27] Misc ::= Comment | PI | S */
1633 static HRESULT reader_parse_misc(xmlreader *reader)
1635 HRESULT hr = S_FALSE;
1637 if (reader->resumestate != XmlReadResumeState_Initial)
1639 hr = reader_more(reader);
1640 if (FAILED(hr)) return hr;
1642 /* finish current node */
1643 switch (reader->resumestate)
1645 case XmlReadResumeState_PITarget:
1646 case XmlReadResumeState_PIBody:
1647 return reader_parse_pi(reader);
1648 case XmlReadResumeState_Comment:
1649 return reader_parse_comment(reader);
1650 case XmlReadResumeState_Whitespace:
1651 return reader_parse_whitespace(reader);
1652 default:
1653 ERR("unknown resume state %d\n", reader->resumestate);
1657 while (1)
1659 const WCHAR *cur = reader_get_ptr(reader);
1661 if (is_wchar_space(*cur))
1662 hr = reader_parse_whitespace(reader);
1663 else if (!reader_cmp(reader, commentW))
1664 hr = reader_parse_comment(reader);
1665 else if (!reader_cmp(reader, piW))
1666 hr = reader_parse_pi(reader);
1667 else
1668 break;
1670 if (hr != S_FALSE) return hr;
1673 return hr;
1676 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1677 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1679 WCHAR *cur = reader_get_ptr(reader), quote;
1680 UINT start;
1682 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1684 quote = *cur;
1685 reader_skipn(reader, 1);
1687 cur = reader_get_ptr(reader);
1688 start = reader_get_cur(reader);
1689 while (is_char(*cur) && *cur != quote)
1691 reader_skipn(reader, 1);
1692 cur = reader_get_ptr(reader);
1694 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1695 if (*cur == quote) reader_skipn(reader, 1);
1697 TRACE("%s\n", debug_strval(reader, literal));
1698 return S_OK;
1701 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1702 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1703 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1705 WCHAR *cur = reader_get_ptr(reader), quote;
1706 UINT start;
1708 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1710 quote = *cur;
1711 reader_skipn(reader, 1);
1713 start = reader_get_cur(reader);
1714 cur = reader_get_ptr(reader);
1715 while (is_pubchar(*cur) && *cur != quote)
1717 reader_skipn(reader, 1);
1718 cur = reader_get_ptr(reader);
1720 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1721 if (*cur == quote) reader_skipn(reader, 1);
1723 TRACE("%s\n", debug_strval(reader, literal));
1724 return S_OK;
1727 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1728 static HRESULT reader_parse_externalid(xmlreader *reader)
1730 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1731 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1732 strval name, sys;
1733 HRESULT hr;
1734 int cnt;
1736 if (!reader_cmp(reader, publicW)) {
1737 strval pub;
1739 /* public id */
1740 reader_skipn(reader, 6);
1741 cnt = reader_skipspaces(reader);
1742 if (!cnt) return WC_E_WHITESPACE;
1744 hr = reader_parse_pub_literal(reader, &pub);
1745 if (FAILED(hr)) return hr;
1747 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1748 hr = reader_add_attr(reader, NULL, &name, &pub);
1749 if (FAILED(hr)) return hr;
1751 cnt = reader_skipspaces(reader);
1752 if (!cnt) return S_OK;
1754 /* optional system id */
1755 hr = reader_parse_sys_literal(reader, &sys);
1756 if (FAILED(hr)) return S_OK;
1758 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1759 hr = reader_add_attr(reader, NULL, &name, &sys);
1760 if (FAILED(hr)) return hr;
1762 return S_OK;
1763 } else if (!reader_cmp(reader, systemW)) {
1764 /* system id */
1765 reader_skipn(reader, 6);
1766 cnt = reader_skipspaces(reader);
1767 if (!cnt) return WC_E_WHITESPACE;
1769 hr = reader_parse_sys_literal(reader, &sys);
1770 if (FAILED(hr)) return hr;
1772 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1773 return reader_add_attr(reader, NULL, &name, &sys);
1776 return S_FALSE;
1779 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1780 static HRESULT reader_parse_dtd(xmlreader *reader)
1782 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1783 strval name;
1784 WCHAR *cur;
1785 HRESULT hr;
1787 /* check if we have "<!DOCTYPE" */
1788 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1789 reader_shrink(reader);
1791 /* DTD processing is not allowed by default */
1792 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1794 reader_skipn(reader, 9);
1795 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1797 /* name */
1798 hr = reader_parse_name(reader, &name);
1799 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1801 reader_skipspaces(reader);
1803 hr = reader_parse_externalid(reader);
1804 if (FAILED(hr)) return hr;
1806 reader_skipspaces(reader);
1808 cur = reader_get_ptr(reader);
1809 if (*cur != '>')
1811 FIXME("internal subset parsing not implemented\n");
1812 return E_NOTIMPL;
1815 /* skip '>' */
1816 reader_skipn(reader, 1);
1818 reader->nodetype = XmlNodeType_DocumentType;
1819 reader_set_strvalue(reader, StringValue_LocalName, &name);
1820 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1822 return S_OK;
1825 /* [11 NS] LocalPart ::= NCName */
1826 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1828 WCHAR *ptr;
1829 UINT start;
1831 if (reader->resume[XmlReadResume_Local])
1833 start = reader->resume[XmlReadResume_Local];
1834 ptr = reader_get_ptr(reader);
1836 else
1838 ptr = reader_get_ptr(reader);
1839 start = reader_get_cur(reader);
1842 while (is_ncnamechar(*ptr))
1844 reader_skipn(reader, 1);
1845 ptr = reader_get_ptr(reader);
1848 if (is_reader_pending(reader))
1850 reader->resume[XmlReadResume_Local] = start;
1851 return E_PENDING;
1853 else
1854 reader->resume[XmlReadResume_Local] = 0;
1856 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1858 return S_OK;
1861 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1862 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1863 [9 NS] UnprefixedName ::= LocalPart
1864 [10 NS] Prefix ::= NCName */
1865 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1867 WCHAR *ptr;
1868 UINT start;
1869 HRESULT hr;
1871 if (reader->resume[XmlReadResume_Name])
1873 start = reader->resume[XmlReadResume_Name];
1874 ptr = reader_get_ptr(reader);
1876 else
1878 ptr = reader_get_ptr(reader);
1879 start = reader_get_cur(reader);
1880 reader->resume[XmlReadResume_Name] = start;
1881 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1884 if (reader->resume[XmlReadResume_Local])
1886 hr = reader_parse_local(reader, local);
1887 if (FAILED(hr)) return hr;
1889 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1890 local->start - reader->resume[XmlReadResume_Name] - 1,
1891 prefix);
1893 else
1895 /* skip prefix part */
1896 while (is_ncnamechar(*ptr))
1898 reader_skipn(reader, 1);
1899 ptr = reader_get_ptr(reader);
1902 if (is_reader_pending(reader)) return E_PENDING;
1904 /* got a qualified name */
1905 if (*ptr == ':')
1907 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1909 /* skip ':' */
1910 reader_skipn(reader, 1);
1911 hr = reader_parse_local(reader, local);
1912 if (FAILED(hr)) return hr;
1914 else
1916 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1917 reader_init_strvalue(0, 0, prefix);
1921 if (prefix->len)
1922 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1923 else
1924 TRACE("ncname %s\n", debug_strval(reader, local));
1926 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1927 /* count ':' too */
1928 (prefix->len ? prefix->len + 1 : 0) + local->len,
1929 qname);
1931 reader->resume[XmlReadResume_Name] = 0;
1932 reader->resume[XmlReadResume_Local] = 0;
1934 return S_OK;
1937 /* Applies normalization rules to a single char, used for attribute values.
1939 Rules include 2 steps:
1941 1) replacing \r\n with a single \n;
1942 2) replacing all whitespace chars with ' '.
1945 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1947 encoded_buffer *buffer = &reader->input->buffer->utf16;
1949 if (!is_wchar_space(*ptr)) return;
1951 if (*ptr == '\r' && *(ptr+1) == '\n')
1953 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1954 memmove(ptr+1, ptr+2, len);
1956 *ptr = ' ';
1959 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1961 static const WCHAR entltW[] = {'l','t'};
1962 static const WCHAR entgtW[] = {'g','t'};
1963 static const WCHAR entampW[] = {'a','m','p'};
1964 static const WCHAR entaposW[] = {'a','p','o','s'};
1965 static const WCHAR entquotW[] = {'q','u','o','t'};
1966 static const strval lt = { (WCHAR*)entltW, 2 };
1967 static const strval gt = { (WCHAR*)entgtW, 2 };
1968 static const strval amp = { (WCHAR*)entampW, 3 };
1969 static const strval apos = { (WCHAR*)entaposW, 4 };
1970 static const strval quot = { (WCHAR*)entquotW, 4 };
1971 WCHAR *str = reader_get_strptr(reader, name);
1973 switch (*str)
1975 case 'l':
1976 if (strval_eq(reader, name, &lt)) return '<';
1977 break;
1978 case 'g':
1979 if (strval_eq(reader, name, &gt)) return '>';
1980 break;
1981 case 'a':
1982 if (strval_eq(reader, name, &amp))
1983 return '&';
1984 else if (strval_eq(reader, name, &apos))
1985 return '\'';
1986 break;
1987 case 'q':
1988 if (strval_eq(reader, name, &quot)) return '\"';
1989 break;
1990 default:
1994 return 0;
1997 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1998 [67] Reference ::= EntityRef | CharRef
1999 [68] EntityRef ::= '&' Name ';' */
2000 static HRESULT reader_parse_reference(xmlreader *reader)
2002 encoded_buffer *buffer = &reader->input->buffer->utf16;
2003 WCHAR *start = reader_get_ptr(reader), *ptr;
2004 UINT cur = reader_get_cur(reader);
2005 WCHAR ch = 0;
2006 int len;
2008 /* skip '&' */
2009 reader_skipn(reader, 1);
2010 ptr = reader_get_ptr(reader);
2012 if (*ptr == '#')
2014 reader_skipn(reader, 1);
2015 ptr = reader_get_ptr(reader);
2017 /* hex char or decimal */
2018 if (*ptr == 'x')
2020 reader_skipn(reader, 1);
2021 ptr = reader_get_ptr(reader);
2023 while (*ptr != ';')
2025 if ((*ptr >= '0' && *ptr <= '9'))
2026 ch = ch*16 + *ptr - '0';
2027 else if ((*ptr >= 'a' && *ptr <= 'f'))
2028 ch = ch*16 + *ptr - 'a' + 10;
2029 else if ((*ptr >= 'A' && *ptr <= 'F'))
2030 ch = ch*16 + *ptr - 'A' + 10;
2031 else
2032 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2033 reader_skipn(reader, 1);
2034 ptr = reader_get_ptr(reader);
2037 else
2039 while (*ptr != ';')
2041 if ((*ptr >= '0' && *ptr <= '9'))
2043 ch = ch*10 + *ptr - '0';
2044 reader_skipn(reader, 1);
2045 ptr = reader_get_ptr(reader);
2047 else
2048 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2052 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2054 /* normalize */
2055 if (is_wchar_space(ch)) ch = ' ';
2057 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2058 memmove(start+1, ptr+1, len);
2059 buffer->cur = cur + 1;
2061 *start = ch;
2063 else
2065 strval name;
2066 HRESULT hr;
2068 hr = reader_parse_name(reader, &name);
2069 if (FAILED(hr)) return hr;
2071 ptr = reader_get_ptr(reader);
2072 if (*ptr != ';') return WC_E_SEMICOLON;
2074 /* predefined entities resolve to a single character */
2075 ch = get_predefined_entity(reader, &name);
2076 if (ch)
2078 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2079 memmove(start+1, ptr+1, len);
2080 buffer->cur = cur + 1;
2082 *start = ch;
2084 else
2086 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2087 return WC_E_UNDECLAREDENTITY;
2092 return S_OK;
2095 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2096 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2098 WCHAR *ptr, quote;
2099 UINT start;
2101 ptr = reader_get_ptr(reader);
2103 /* skip opening quote */
2104 quote = *ptr;
2105 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2106 reader_skipn(reader, 1);
2108 ptr = reader_get_ptr(reader);
2109 start = reader_get_cur(reader);
2110 while (*ptr)
2112 if (*ptr == '<') return WC_E_LESSTHAN;
2114 if (*ptr == quote)
2116 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2117 /* skip closing quote */
2118 reader_skipn(reader, 1);
2119 return S_OK;
2122 if (*ptr == '&')
2124 HRESULT hr = reader_parse_reference(reader);
2125 if (FAILED(hr)) return hr;
2127 else
2129 reader_normalize_space(reader, ptr);
2130 reader_skipn(reader, 1);
2132 ptr = reader_get_ptr(reader);
2135 return WC_E_QUOTE;
2138 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2139 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2140 [3 NS] DefaultAttName ::= 'xmlns'
2141 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2142 static HRESULT reader_parse_attribute(xmlreader *reader)
2144 strval prefix, local, qname, value;
2145 BOOL ns = FALSE, nsdef = FALSE;
2146 HRESULT hr;
2148 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2149 if (FAILED(hr)) return hr;
2151 if (strval_eq(reader, &prefix, &strval_xmlns))
2152 ns = TRUE;
2154 if (strval_eq(reader, &qname, &strval_xmlns))
2155 ns = nsdef = TRUE;
2157 hr = reader_parse_eq(reader);
2158 if (FAILED(hr)) return hr;
2160 hr = reader_parse_attvalue(reader, &value);
2161 if (FAILED(hr)) return hr;
2163 if (ns)
2164 reader_push_ns(reader, nsdef ? &strval_xmlns : &local, &value, nsdef);
2166 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2167 return reader_add_attr(reader, &prefix, &local, &value);
2170 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2171 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2172 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2174 HRESULT hr;
2176 hr = reader_parse_qname(reader, prefix, local, qname);
2177 if (FAILED(hr)) return hr;
2179 while (1)
2181 static const WCHAR endW[] = {'/','>',0};
2183 reader_skipspaces(reader);
2185 /* empty element */
2186 if ((*empty = !reader_cmp(reader, endW)))
2188 /* skip '/>' */
2189 reader_skipn(reader, 2);
2190 reader->is_empty_element = TRUE;
2191 reader->empty_element.prefix = *prefix;
2192 reader->empty_element.localname = *local;
2193 reader->empty_element.qname = *qname;
2194 reader_mark_ns_nodes(reader, &reader->empty_element);
2195 return S_OK;
2198 /* got a start tag */
2199 if (!reader_cmp(reader, gtW))
2201 /* skip '>' */
2202 reader_skipn(reader, 1);
2203 return reader_push_element(reader, prefix, local, qname);
2206 hr = reader_parse_attribute(reader);
2207 if (FAILED(hr)) return hr;
2210 return S_OK;
2213 /* [39] element ::= EmptyElemTag | STag content ETag */
2214 static HRESULT reader_parse_element(xmlreader *reader)
2216 HRESULT hr;
2218 switch (reader->resumestate)
2220 case XmlReadResumeState_Initial:
2221 /* check if we are really on element */
2222 if (reader_cmp(reader, ltW)) return S_FALSE;
2224 /* skip '<' */
2225 reader_skipn(reader, 1);
2227 reader_shrink(reader);
2228 reader->resumestate = XmlReadResumeState_STag;
2229 case XmlReadResumeState_STag:
2231 strval qname, prefix, local;
2232 int empty = 0;
2234 /* this handles empty elements too */
2235 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2236 if (FAILED(hr)) return hr;
2238 /* FIXME: need to check for defined namespace to reject invalid prefix */
2240 /* if we got empty element and stack is empty go straight to Misc */
2241 if (empty && list_empty(&reader->elements))
2242 reader->instate = XmlReadInState_MiscEnd;
2243 else
2244 reader->instate = XmlReadInState_Content;
2246 reader->nodetype = XmlNodeType_Element;
2247 reader->resumestate = XmlReadResumeState_Initial;
2248 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2249 reader_set_strvalue(reader, StringValue_LocalName, &local);
2250 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2251 break;
2253 default:
2254 hr = E_FAIL;
2257 return hr;
2260 /* [13 NS] ETag ::= '</' QName S? '>' */
2261 static HRESULT reader_parse_endtag(xmlreader *reader)
2263 strval prefix, local, qname;
2264 struct element *elem;
2265 HRESULT hr;
2267 /* skip '</' */
2268 reader_skipn(reader, 2);
2270 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2271 if (FAILED(hr)) return hr;
2273 reader_skipspaces(reader);
2275 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2277 /* skip '>' */
2278 reader_skipn(reader, 1);
2280 /* Element stack should never be empty at this point, cause we shouldn't get to
2281 content parsing if it's empty. */
2282 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2283 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2285 reader->nodetype = XmlNodeType_EndElement;
2286 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2287 reader_set_strvalue(reader, StringValue_LocalName, &local);
2288 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2290 return S_OK;
2293 /* [18] CDSect ::= CDStart CData CDEnd
2294 [19] CDStart ::= '<![CDATA['
2295 [20] CData ::= (Char* - (Char* ']]>' Char*))
2296 [21] CDEnd ::= ']]>' */
2297 static HRESULT reader_parse_cdata(xmlreader *reader)
2299 WCHAR *ptr;
2300 UINT start;
2302 if (reader->resumestate == XmlReadResumeState_CDATA)
2304 start = reader->resume[XmlReadResume_Body];
2305 ptr = reader_get_ptr(reader);
2307 else
2309 /* skip markup '<![CDATA[' */
2310 reader_skipn(reader, 9);
2311 reader_shrink(reader);
2312 ptr = reader_get_ptr(reader);
2313 start = reader_get_cur(reader);
2314 reader->nodetype = XmlNodeType_CDATA;
2315 reader->resume[XmlReadResume_Body] = start;
2316 reader->resumestate = XmlReadResumeState_CDATA;
2317 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2318 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2319 reader_set_strvalue(reader, StringValue_Value, NULL);
2322 while (*ptr)
2324 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2326 strval value;
2328 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2330 /* skip ']]>' */
2331 reader_skipn(reader, 3);
2332 TRACE("%s\n", debug_strval(reader, &value));
2334 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2335 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2336 reader_set_strvalue(reader, StringValue_Value, &value);
2337 reader->resume[XmlReadResume_Body] = 0;
2338 reader->resumestate = XmlReadResumeState_Initial;
2339 return S_OK;
2341 else
2343 /* Value normalization is not fully implemented, rules are:
2345 - single '\r' -> '\n';
2346 - sequence '\r\n' -> '\n', in this case value length changes;
2348 if (*ptr == '\r') *ptr = '\n';
2349 reader_skipn(reader, 1);
2350 ptr++;
2354 return S_OK;
2357 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2358 static HRESULT reader_parse_chardata(xmlreader *reader)
2360 WCHAR *ptr;
2361 UINT start;
2363 if (reader->resumestate == XmlReadResumeState_CharData)
2365 start = reader->resume[XmlReadResume_Body];
2366 ptr = reader_get_ptr(reader);
2368 else
2370 reader_shrink(reader);
2371 ptr = reader_get_ptr(reader);
2372 start = reader_get_cur(reader);
2373 /* There's no text */
2374 if (!*ptr || *ptr == '<') return S_OK;
2375 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2376 reader->resume[XmlReadResume_Body] = start;
2377 reader->resumestate = XmlReadResumeState_CharData;
2378 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2379 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2380 reader_set_strvalue(reader, StringValue_Value, NULL);
2383 while (*ptr)
2385 static const WCHAR ampW[] = {'&',0};
2387 /* CDATA closing sequence ']]>' is not allowed */
2388 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2389 return WC_E_CDSECTEND;
2391 /* Found next markup part */
2392 if (ptr[0] == '<')
2394 strval value;
2396 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2397 reader_set_strvalue(reader, StringValue_Value, &value);
2398 reader->resume[XmlReadResume_Body] = 0;
2399 reader->resumestate = XmlReadResumeState_Initial;
2400 return S_OK;
2403 /* this covers a case when text has leading whitespace chars */
2404 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2406 if (!reader_cmp(reader, ampW))
2407 reader_parse_reference(reader);
2408 else
2409 reader_skipn(reader, 1);
2411 ptr = reader_get_ptr(reader);
2414 return S_OK;
2417 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2418 static HRESULT reader_parse_content(xmlreader *reader)
2420 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2421 static const WCHAR etagW[] = {'<','/',0};
2423 if (reader->resumestate != XmlReadResumeState_Initial)
2425 switch (reader->resumestate)
2427 case XmlReadResumeState_CDATA:
2428 return reader_parse_cdata(reader);
2429 case XmlReadResumeState_Comment:
2430 return reader_parse_comment(reader);
2431 case XmlReadResumeState_PIBody:
2432 case XmlReadResumeState_PITarget:
2433 return reader_parse_pi(reader);
2434 case XmlReadResumeState_CharData:
2435 return reader_parse_chardata(reader);
2436 default:
2437 ERR("unknown resume state %d\n", reader->resumestate);
2441 reader_shrink(reader);
2443 /* handle end tag here, it indicates end of content as well */
2444 if (!reader_cmp(reader, etagW))
2445 return reader_parse_endtag(reader);
2447 if (!reader_cmp(reader, commentW))
2448 return reader_parse_comment(reader);
2450 if (!reader_cmp(reader, piW))
2451 return reader_parse_pi(reader);
2453 if (!reader_cmp(reader, cdstartW))
2454 return reader_parse_cdata(reader);
2456 if (!reader_cmp(reader, ltW))
2457 return reader_parse_element(reader);
2459 /* what's left must be CharData */
2460 return reader_parse_chardata(reader);
2463 static HRESULT reader_parse_nextnode(xmlreader *reader)
2465 XmlNodeType nodetype = reader_get_nodetype(reader);
2466 HRESULT hr;
2468 if (!is_reader_pending(reader))
2469 reader_clear_attrs(reader);
2471 /* When moving from EndElement or empty element, pop its own namespace definitions */
2472 if (nodetype == XmlNodeType_Element && reader->is_empty_element)
2473 reader_pop_ns_nodes(reader, &reader->empty_element);
2474 else if (nodetype == XmlNodeType_EndElement)
2475 reader_pop_element(reader);
2477 while (1)
2479 switch (reader->instate)
2481 /* if it's a first call for a new input we need to detect stream encoding */
2482 case XmlReadInState_Initial:
2484 xml_encoding enc;
2486 hr = readerinput_growraw(reader->input);
2487 if (FAILED(hr)) return hr;
2489 /* try to detect encoding by BOM or data and set input code page */
2490 hr = readerinput_detectencoding(reader->input, &enc);
2491 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2492 if (FAILED(hr)) return hr;
2494 /* always switch first time cause we have to put something in */
2495 readerinput_switchencoding(reader->input, enc);
2497 /* parse xml declaration */
2498 hr = reader_parse_xmldecl(reader);
2499 if (FAILED(hr)) return hr;
2501 readerinput_shrinkraw(reader->input, -1);
2502 reader->instate = XmlReadInState_Misc_DTD;
2503 if (hr == S_OK) return hr;
2505 break;
2506 case XmlReadInState_Misc_DTD:
2507 hr = reader_parse_misc(reader);
2508 if (FAILED(hr)) return hr;
2510 if (hr == S_FALSE)
2511 reader->instate = XmlReadInState_DTD;
2512 else
2513 return hr;
2514 break;
2515 case XmlReadInState_DTD:
2516 hr = reader_parse_dtd(reader);
2517 if (FAILED(hr)) return hr;
2519 if (hr == S_OK)
2521 reader->instate = XmlReadInState_DTD_Misc;
2522 return hr;
2524 else
2525 reader->instate = XmlReadInState_Element;
2526 break;
2527 case XmlReadInState_DTD_Misc:
2528 hr = reader_parse_misc(reader);
2529 if (FAILED(hr)) return hr;
2531 if (hr == S_FALSE)
2532 reader->instate = XmlReadInState_Element;
2533 else
2534 return hr;
2535 break;
2536 case XmlReadInState_Element:
2537 return reader_parse_element(reader);
2538 case XmlReadInState_Content:
2539 return reader_parse_content(reader);
2540 case XmlReadInState_MiscEnd:
2541 hr = reader_parse_misc(reader);
2542 if (FAILED(hr)) return hr;
2544 if (hr == S_FALSE)
2545 reader->instate = XmlReadInState_Eof;
2546 return hr;
2547 case XmlReadInState_Eof:
2548 return S_FALSE;
2549 default:
2550 FIXME("internal state %d not handled\n", reader->instate);
2551 return E_NOTIMPL;
2555 return E_NOTIMPL;
2558 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2560 xmlreader *This = impl_from_IXmlReader(iface);
2562 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2564 if (IsEqualGUID(riid, &IID_IUnknown) ||
2565 IsEqualGUID(riid, &IID_IXmlReader))
2567 *ppvObject = iface;
2569 else
2571 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2572 *ppvObject = NULL;
2573 return E_NOINTERFACE;
2576 IXmlReader_AddRef(iface);
2578 return S_OK;
2581 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2583 xmlreader *This = impl_from_IXmlReader(iface);
2584 ULONG ref = InterlockedIncrement(&This->ref);
2585 TRACE("(%p)->(%d)\n", This, ref);
2586 return ref;
2589 static void reader_clear_ns(xmlreader *reader)
2591 struct ns *ns, *ns2;
2593 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2594 reader_free_strvalued(reader, &ns->prefix);
2595 reader_free_strvalued(reader, &ns->uri);
2596 reader_free(reader, ns);
2599 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2600 reader_free_strvalued(reader, &ns->uri);
2601 reader_free(reader, ns);
2605 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2607 xmlreader *This = impl_from_IXmlReader(iface);
2608 LONG ref = InterlockedDecrement(&This->ref);
2610 TRACE("(%p)->(%d)\n", This, ref);
2612 if (ref == 0)
2614 IMalloc *imalloc = This->imalloc;
2615 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2616 if (This->resolver) IXmlResolver_Release(This->resolver);
2617 if (This->mlang) IUnknown_Release(This->mlang);
2618 reader_clear_attrs(This);
2619 reader_clear_ns(This);
2620 reader_clear_elements(This);
2621 reader_free_strvalues(This);
2622 reader_free(This, This);
2623 if (imalloc) IMalloc_Release(imalloc);
2626 return ref;
2629 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2631 xmlreader *This = impl_from_IXmlReader(iface);
2632 IXmlReaderInput *readerinput;
2633 HRESULT hr;
2635 TRACE("(%p)->(%p)\n", This, input);
2637 if (This->input)
2639 readerinput_release_stream(This->input);
2640 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2641 This->input = NULL;
2644 This->line = This->pos = 0;
2645 reader_clear_elements(This);
2646 This->depth = 0;
2647 This->resumestate = XmlReadResumeState_Initial;
2648 memset(This->resume, 0, sizeof(This->resume));
2650 /* just reset current input */
2651 if (!input)
2653 This->state = XmlReadState_Initial;
2654 return S_OK;
2657 /* now try IXmlReaderInput, ISequentialStream, IStream */
2658 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2659 if (hr == S_OK)
2661 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2662 This->input = impl_from_IXmlReaderInput(readerinput);
2663 else
2665 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2666 readerinput, readerinput->lpVtbl);
2667 IUnknown_Release(readerinput);
2668 return E_FAIL;
2673 if (hr != S_OK || !readerinput)
2675 /* create IXmlReaderInput basing on supplied interface */
2676 hr = CreateXmlReaderInputWithEncodingName(input,
2677 This->imalloc, NULL, FALSE, NULL, &readerinput);
2678 if (hr != S_OK) return hr;
2679 This->input = impl_from_IXmlReaderInput(readerinput);
2682 /* set stream for supplied IXmlReaderInput */
2683 hr = readerinput_query_for_stream(This->input);
2684 if (hr == S_OK)
2686 This->state = XmlReadState_Initial;
2687 This->instate = XmlReadInState_Initial;
2690 return hr;
2693 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2695 xmlreader *This = impl_from_IXmlReader(iface);
2697 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2699 if (!value) return E_INVALIDARG;
2701 switch (property)
2703 case XmlReaderProperty_MultiLanguage:
2704 *value = (LONG_PTR)This->mlang;
2705 if (This->mlang)
2706 IUnknown_AddRef(This->mlang);
2707 break;
2708 case XmlReaderProperty_XmlResolver:
2709 *value = (LONG_PTR)This->resolver;
2710 if (This->resolver)
2711 IXmlResolver_AddRef(This->resolver);
2712 break;
2713 case XmlReaderProperty_DtdProcessing:
2714 *value = This->dtdmode;
2715 break;
2716 case XmlReaderProperty_ReadState:
2717 *value = This->state;
2718 break;
2719 default:
2720 FIXME("Unimplemented property (%u)\n", property);
2721 return E_NOTIMPL;
2724 return S_OK;
2727 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2729 xmlreader *This = impl_from_IXmlReader(iface);
2731 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2733 switch (property)
2735 case XmlReaderProperty_MultiLanguage:
2736 if (This->mlang)
2737 IUnknown_Release(This->mlang);
2738 This->mlang = (IUnknown*)value;
2739 if (This->mlang)
2740 IUnknown_AddRef(This->mlang);
2741 if (This->mlang)
2742 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2743 break;
2744 case XmlReaderProperty_XmlResolver:
2745 if (This->resolver)
2746 IXmlResolver_Release(This->resolver);
2747 This->resolver = (IXmlResolver*)value;
2748 if (This->resolver)
2749 IXmlResolver_AddRef(This->resolver);
2750 break;
2751 case XmlReaderProperty_DtdProcessing:
2752 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2753 This->dtdmode = value;
2754 break;
2755 case XmlReaderProperty_MaxElementDepth:
2756 FIXME("Ignoring MaxElementDepth %ld\n", value);
2757 break;
2758 default:
2759 FIXME("Unimplemented property (%u)\n", property);
2760 return E_NOTIMPL;
2763 return S_OK;
2766 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2768 xmlreader *This = impl_from_IXmlReader(iface);
2769 XmlNodeType oldtype = This->nodetype;
2770 HRESULT hr;
2772 TRACE("(%p)->(%p)\n", This, nodetype);
2774 if (This->state == XmlReadState_Closed) return S_FALSE;
2776 hr = reader_parse_nextnode(This);
2777 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2778 This->state = XmlReadState_Interactive;
2779 if (hr == S_OK)
2781 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2782 if (nodetype)
2783 *nodetype = This->nodetype;
2786 return hr;
2789 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2791 xmlreader *This = impl_from_IXmlReader(iface);
2793 TRACE("(%p)->(%p)\n", This, node_type);
2795 if (!node_type)
2796 return E_INVALIDARG;
2798 *node_type = reader_get_nodetype(This);
2799 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2802 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2804 if (!reader->attr_count)
2805 return S_FALSE;
2807 reader->attr = LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry);
2808 reader_set_strvalue(reader, StringValue_Prefix, &reader->attr->prefix);
2809 reader_set_strvalue(reader, StringValue_LocalName, &reader->attr->localname);
2810 reader_set_strvalue(reader, StringValue_Value, &reader->attr->value);
2812 return S_OK;
2815 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2817 xmlreader *This = impl_from_IXmlReader(iface);
2819 TRACE("(%p)\n", This);
2821 return reader_move_to_first_attribute(This);
2824 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2826 xmlreader *This = impl_from_IXmlReader(iface);
2827 const struct list *next;
2829 TRACE("(%p)\n", This);
2831 if (!This->attr_count) return S_FALSE;
2833 if (!This->attr)
2834 return reader_move_to_first_attribute(This);
2836 next = list_next(&This->attrs, &This->attr->entry);
2837 if (next)
2839 This->attr = LIST_ENTRY(next, struct attribute, entry);
2840 reader_set_strvalue(This, StringValue_Prefix, &This->attr->prefix);
2841 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2842 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2845 return next ? S_OK : S_FALSE;
2848 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2849 LPCWSTR local_name,
2850 LPCWSTR namespaceUri)
2852 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2853 return E_NOTIMPL;
2856 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2858 xmlreader *This = impl_from_IXmlReader(iface);
2860 TRACE("(%p)\n", This);
2862 if (!This->attr_count) return S_FALSE;
2863 This->attr = NULL;
2865 /* FIXME: support other node types with 'attributes' like DTD */
2866 if (This->is_empty_element) {
2867 reader_set_strvalue(This, StringValue_LocalName, &This->empty_element.localname);
2868 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
2870 else {
2871 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2872 if (element) {
2873 reader_set_strvalue(This, StringValue_LocalName, &element->localname);
2874 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
2878 return S_OK;
2881 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2883 xmlreader *This = impl_from_IXmlReader(iface);
2885 TRACE("(%p)->(%p %p)\n", This, name, len);
2886 *name = This->strvalues[StringValue_QualifiedName].str;
2887 if (len) *len = This->strvalues[StringValue_QualifiedName].len;
2888 return S_OK;
2891 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
2893 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
2894 struct ns *ns;
2896 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
2897 if (strval_eq(reader, prefix, &ns->prefix))
2898 return ns;
2901 return NULL;
2904 static struct ns *reader_lookup_nsdef(xmlreader *reader)
2906 if (list_empty(&reader->nsdef))
2907 return NULL;
2909 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
2912 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
2914 xmlreader *This = impl_from_IXmlReader(iface);
2915 const strval *prefix = &This->strvalues[StringValue_Prefix];
2916 XmlNodeType nodetype;
2917 struct ns *ns;
2918 UINT length;
2920 TRACE("(%p %p %p)\n", iface, uri, len);
2922 if (!len)
2923 len = &length;
2925 *uri = NULL;
2926 *len = 0;
2928 switch ((nodetype = reader_get_nodetype(This)))
2930 case XmlNodeType_Attribute:
2932 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2933 '2','0','0','0','/','x','m','l','n','s','/',0};
2934 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2935 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
2936 const strval *local = &This->strvalues[StringValue_LocalName];
2938 /* check for reserved prefixes first */
2939 if ((strval_eq(This, prefix, &strval_empty) && strval_eq(This, local, &strval_xmlns)) ||
2940 strval_eq(This, prefix, &strval_xmlns))
2942 *uri = xmlns_uriW;
2943 *len = sizeof(xmlns_uriW)/sizeof(xmlns_uriW[0]) - 1;
2945 else if (strval_eq(This, prefix, &strval_xml)) {
2946 *uri = xml_uriW;
2947 *len = sizeof(xml_uriW)/sizeof(xml_uriW[0]) - 1;
2950 if (!*uri) {
2951 ns = reader_lookup_ns(This, prefix);
2952 if (ns) {
2953 *uri = ns->uri.str;
2954 *len = ns->uri.len;
2956 else {
2957 *uri = emptyW;
2958 *len = 0;
2962 break;
2963 case XmlNodeType_Element:
2964 case XmlNodeType_EndElement:
2966 ns = reader_lookup_ns(This, prefix);
2968 /* pick top default ns if any */
2969 if (!ns)
2970 ns = reader_lookup_nsdef(This);
2972 if (ns) {
2973 *uri = ns->uri.str;
2974 *len = ns->uri.len;
2976 else {
2977 *uri = emptyW;
2978 *len = 0;
2981 break;
2982 default:
2983 FIXME("Unhandled node type %d\n", nodetype);
2984 return E_NOTIMPL;
2987 return S_OK;
2990 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2992 xmlreader *This = impl_from_IXmlReader(iface);
2994 TRACE("(%p)->(%p %p)\n", This, name, len);
2995 *name = This->strvalues[StringValue_LocalName].str;
2996 if (len) *len = This->strvalues[StringValue_LocalName].len;
2997 return S_OK;
3000 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
3002 xmlreader *This = impl_from_IXmlReader(iface);
3004 TRACE("(%p)->(%p %p)\n", This, prefix, len);
3005 *prefix = This->strvalues[StringValue_Prefix].str;
3006 if (len) *len = This->strvalues[StringValue_Prefix].len;
3007 return S_OK;
3010 static BOOL is_namespace_definition(xmlreader *reader)
3012 const strval *local = &reader->strvalues[StringValue_LocalName];
3013 const strval *prefix = &reader->strvalues[StringValue_Prefix];
3015 if (reader_get_nodetype(reader) != XmlNodeType_Attribute)
3016 return FALSE;
3018 return ((strval_eq(reader, prefix, &strval_empty) && strval_eq(reader, local, &strval_xmlns)) ||
3019 strval_eq(reader, prefix, &strval_xmlns));
3022 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3024 xmlreader *reader = impl_from_IXmlReader(iface);
3025 strval *val = &reader->strvalues[StringValue_Value];
3027 TRACE("(%p)->(%p %p)\n", reader, value, len);
3029 *value = NULL;
3031 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
3033 XmlNodeType type;
3034 HRESULT hr;
3036 hr = IXmlReader_Read(iface, &type);
3037 if (FAILED(hr)) return hr;
3039 /* return if still pending, partially read values are not reported */
3040 if (is_reader_pending(reader)) return E_PENDING;
3043 if (!val->str)
3045 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3046 if (!ptr) return E_OUTOFMEMORY;
3047 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3048 ptr[val->len] = 0;
3049 val->str = ptr;
3052 /* For namespace definition attributes return values from namespace list */
3053 if (is_namespace_definition(reader)) {
3054 const strval *local = &reader->strvalues[StringValue_LocalName];
3055 struct ns *ns;
3057 ns = reader_lookup_ns(reader, local);
3058 if (!ns)
3059 ns = reader_lookup_nsdef(reader);
3061 val = &ns->uri;
3064 *value = val->str;
3065 if (len) *len = val->len;
3066 return S_OK;
3069 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3071 xmlreader *reader = impl_from_IXmlReader(iface);
3072 strval *val = &reader->strvalues[StringValue_Value];
3073 UINT len;
3075 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3077 /* Value is already allocated, chunked reads are not possible. */
3078 if (val->str) return S_FALSE;
3080 if (val->len)
3082 len = min(chunk_size, val->len);
3083 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
3084 val->start += len;
3085 val->len -= len;
3086 if (read) *read = len;
3089 return S_OK;
3092 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3093 LPCWSTR *baseUri,
3094 UINT *baseUri_length)
3096 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3097 return E_NOTIMPL;
3100 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3102 FIXME("(%p): stub\n", iface);
3103 return FALSE;
3106 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3108 xmlreader *This = impl_from_IXmlReader(iface);
3109 TRACE("(%p)\n", This);
3110 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3111 when current node is start tag of an element */
3112 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3115 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
3117 xmlreader *This = impl_from_IXmlReader(iface);
3119 TRACE("(%p %p)\n", This, lineNumber);
3121 if (!lineNumber) return E_INVALIDARG;
3123 *lineNumber = This->line;
3125 return S_OK;
3128 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
3130 xmlreader *This = impl_from_IXmlReader(iface);
3132 TRACE("(%p %p)\n", This, linePosition);
3134 if (!linePosition) return E_INVALIDARG;
3136 *linePosition = This->pos;
3138 return S_OK;
3141 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3143 xmlreader *This = impl_from_IXmlReader(iface);
3145 TRACE("(%p)->(%p)\n", This, count);
3147 if (!count) return E_INVALIDARG;
3149 *count = This->attr_count;
3150 return S_OK;
3153 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3155 xmlreader *This = impl_from_IXmlReader(iface);
3156 TRACE("(%p)->(%p)\n", This, depth);
3157 *depth = This->depth;
3158 return S_OK;
3161 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3163 FIXME("(%p): stub\n", iface);
3164 return FALSE;
3167 static const struct IXmlReaderVtbl xmlreader_vtbl =
3169 xmlreader_QueryInterface,
3170 xmlreader_AddRef,
3171 xmlreader_Release,
3172 xmlreader_SetInput,
3173 xmlreader_GetProperty,
3174 xmlreader_SetProperty,
3175 xmlreader_Read,
3176 xmlreader_GetNodeType,
3177 xmlreader_MoveToFirstAttribute,
3178 xmlreader_MoveToNextAttribute,
3179 xmlreader_MoveToAttributeByName,
3180 xmlreader_MoveToElement,
3181 xmlreader_GetQualifiedName,
3182 xmlreader_GetNamespaceUri,
3183 xmlreader_GetLocalName,
3184 xmlreader_GetPrefix,
3185 xmlreader_GetValue,
3186 xmlreader_ReadValueChunk,
3187 xmlreader_GetBaseUri,
3188 xmlreader_IsDefault,
3189 xmlreader_IsEmptyElement,
3190 xmlreader_GetLineNumber,
3191 xmlreader_GetLinePosition,
3192 xmlreader_GetAttributeCount,
3193 xmlreader_GetDepth,
3194 xmlreader_IsEOF
3197 /** IXmlReaderInput **/
3198 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3200 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3202 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3204 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3205 IsEqualGUID(riid, &IID_IUnknown))
3207 *ppvObject = iface;
3209 else
3211 WARN("interface %s not implemented\n", debugstr_guid(riid));
3212 *ppvObject = NULL;
3213 return E_NOINTERFACE;
3216 IUnknown_AddRef(iface);
3218 return S_OK;
3221 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3223 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3224 ULONG ref = InterlockedIncrement(&This->ref);
3225 TRACE("(%p)->(%d)\n", This, ref);
3226 return ref;
3229 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3231 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3232 LONG ref = InterlockedDecrement(&This->ref);
3234 TRACE("(%p)->(%d)\n", This, ref);
3236 if (ref == 0)
3238 IMalloc *imalloc = This->imalloc;
3239 if (This->input) IUnknown_Release(This->input);
3240 if (This->stream) ISequentialStream_Release(This->stream);
3241 if (This->buffer) free_input_buffer(This->buffer);
3242 readerinput_free(This, This->baseuri);
3243 readerinput_free(This, This);
3244 if (imalloc) IMalloc_Release(imalloc);
3247 return ref;
3250 static const struct IUnknownVtbl xmlreaderinputvtbl =
3252 xmlreaderinput_QueryInterface,
3253 xmlreaderinput_AddRef,
3254 xmlreaderinput_Release
3257 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3259 xmlreader *reader;
3260 int i;
3262 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3264 if (!IsEqualGUID(riid, &IID_IXmlReader))
3266 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
3267 return E_FAIL;
3270 if (imalloc)
3271 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3272 else
3273 reader = heap_alloc(sizeof(*reader));
3274 if(!reader) return E_OUTOFMEMORY;
3276 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3277 reader->ref = 1;
3278 reader->input = NULL;
3279 reader->state = XmlReadState_Closed;
3280 reader->instate = XmlReadInState_Initial;
3281 reader->resumestate = XmlReadResumeState_Initial;
3282 reader->dtdmode = DtdProcessing_Prohibit;
3283 reader->resolver = NULL;
3284 reader->mlang = NULL;
3285 reader->line = reader->pos = 0;
3286 reader->imalloc = imalloc;
3287 if (imalloc) IMalloc_AddRef(imalloc);
3288 reader->nodetype = XmlNodeType_None;
3289 list_init(&reader->attrs);
3290 reader->attr_count = 0;
3291 reader->attr = NULL;
3292 list_init(&reader->nsdef);
3293 list_init(&reader->ns);
3294 list_init(&reader->elements);
3295 reader->depth = 0;
3296 reader->max_depth = 256;
3297 reader->is_empty_element = FALSE;
3298 memset(reader->resume, 0, sizeof(reader->resume));
3300 for (i = 0; i < StringValue_Last; i++)
3301 reader->strvalues[i] = strval_empty;
3303 *obj = &reader->IXmlReader_iface;
3305 TRACE("returning iface %p\n", *obj);
3307 return S_OK;
3310 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3311 IMalloc *imalloc,
3312 LPCWSTR encoding,
3313 BOOL hint,
3314 LPCWSTR base_uri,
3315 IXmlReaderInput **ppInput)
3317 xmlreaderinput *readerinput;
3318 HRESULT hr;
3320 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3321 hint, wine_dbgstr_w(base_uri), ppInput);
3323 if (!stream || !ppInput) return E_INVALIDARG;
3325 if (imalloc)
3326 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3327 else
3328 readerinput = heap_alloc(sizeof(*readerinput));
3329 if(!readerinput) return E_OUTOFMEMORY;
3331 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3332 readerinput->ref = 1;
3333 readerinput->imalloc = imalloc;
3334 readerinput->stream = NULL;
3335 if (imalloc) IMalloc_AddRef(imalloc);
3336 readerinput->encoding = parse_encoding_name(encoding, -1);
3337 readerinput->hint = hint;
3338 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3339 readerinput->pending = 0;
3341 hr = alloc_input_buffer(readerinput);
3342 if (hr != S_OK)
3344 readerinput_free(readerinput, readerinput->baseuri);
3345 readerinput_free(readerinput, readerinput);
3346 if (imalloc) IMalloc_Release(imalloc);
3347 return hr;
3349 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3351 *ppInput = &readerinput->IXmlReaderInput_iface;
3353 TRACE("returning iface %p\n", *ppInput);
3355 return S_OK;