ddraw/tests: Rewrite StructSizeTest().
[wine.git] / dlls / xmllite / reader.c
blobde61d135ab14f91d01dd150494d4c6050cc73d3d
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW[] = {'\"',0};
90 static const WCHAR quoteW[] = {'\'',0};
91 static const WCHAR ltW[] = {'<',0};
92 static const WCHAR gtW[] = {'>',0};
93 static const WCHAR commentW[] = {'<','!','-','-',0};
94 static const WCHAR piW[] = {'<','?',0};
96 static const char *debugstr_nodetype(XmlNodeType nodetype)
98 static const char * const type_names[] =
100 "None",
101 "Element",
102 "Attribute",
103 "Text",
104 "CDATA",
107 "ProcessingInstruction",
108 "Comment",
110 "DocumentType",
113 "Whitespace",
115 "EndElement",
117 "XmlDeclaration"
120 if (nodetype > _XmlNodeType_Last)
121 return wine_dbg_sprintf("unknown type=%d", nodetype);
123 return type_names[nodetype];
126 static const char *debugstr_reader_prop(XmlReaderProperty prop)
128 static const char * const prop_names[] =
130 "MultiLanguage",
131 "ConformanceLevel",
132 "RandomAccess",
133 "XmlResolver",
134 "DtdProcessing",
135 "ReadState",
136 "MaxElementDepth",
137 "MaxEntityExpansion"
140 if (prop > _XmlReaderProperty_Last)
141 return wine_dbg_sprintf("unknown property=%d", prop);
143 return prop_names[prop];
146 struct xml_encoding_data
148 const WCHAR *name;
149 xml_encoding enc;
150 UINT cp;
153 static const struct xml_encoding_data xml_encoding_map[] = {
154 { utf16W, XmlEncoding_UTF16, ~0 },
155 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
158 const WCHAR *get_encoding_name(xml_encoding encoding)
160 return xml_encoding_map[encoding].name;
163 xml_encoding get_encoding_from_codepage(UINT codepage)
165 int i;
166 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
168 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
170 return XmlEncoding_Unknown;
173 typedef struct
175 char *data;
176 UINT cur;
177 unsigned int allocated;
178 unsigned int written;
179 } encoded_buffer;
181 typedef struct input_buffer input_buffer;
183 typedef struct
185 IXmlReaderInput IXmlReaderInput_iface;
186 LONG ref;
187 /* reference passed on IXmlReaderInput creation, is kept when input is created */
188 IUnknown *input;
189 IMalloc *imalloc;
190 xml_encoding encoding;
191 BOOL hint;
192 WCHAR *baseuri;
193 /* stream reference set after SetInput() call from reader,
194 stored as sequential stream, cause currently
195 optimizations possible with IStream aren't implemented */
196 ISequentialStream *stream;
197 input_buffer *buffer;
198 unsigned int pending : 1;
199 } xmlreaderinput;
201 static const struct IUnknownVtbl xmlreaderinputvtbl;
203 /* Structure to hold parsed string of specific length.
205 Reader stores node value as 'start' pointer, on request
206 a null-terminated version of it is allocated.
208 To init a strval variable use reader_init_strval(),
209 to set strval as a reader value use reader_set_strval().
211 typedef struct
213 WCHAR *str; /* allocated null-terminated string */
214 UINT len; /* length in WCHARs, altered after ReadValueChunk */
215 UINT start; /* input position where value starts */
216 } strval;
218 static WCHAR emptyW[] = {0};
219 static WCHAR xmlW[] = {'x','m','l',0};
220 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
221 static const strval strval_empty = { emptyW };
222 static const strval strval_xml = { xmlW, 3 };
223 static const strval strval_xmlns = { xmlnsW, 5 };
225 struct attribute
227 struct list entry;
228 strval prefix;
229 strval localname;
230 strval value;
233 struct element
235 struct list entry;
236 strval prefix;
237 strval localname;
238 strval qname;
241 struct ns
243 struct list entry;
244 strval prefix;
245 strval uri;
246 struct element *element;
249 typedef struct
251 IXmlReader IXmlReader_iface;
252 LONG ref;
253 xmlreaderinput *input;
254 IMalloc *imalloc;
255 XmlReadState state;
256 XmlReaderInternalState instate;
257 XmlReaderResumeState resumestate;
258 XmlNodeType nodetype;
259 DtdProcessing dtdmode;
260 IXmlResolver *resolver;
261 IUnknown *mlang;
262 UINT line, pos; /* reader position in XML stream */
263 struct list attrs; /* attributes list for current node */
264 struct attribute *attr; /* current attribute */
265 UINT attr_count;
266 struct list nsdef;
267 struct list ns;
268 struct list elements;
269 strval strvalues[StringValue_Last];
270 UINT depth;
271 UINT max_depth;
272 BOOL is_empty_element;
273 struct element empty_element;
274 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
275 } xmlreader;
277 struct input_buffer
279 encoded_buffer utf16;
280 encoded_buffer encoded;
281 UINT code_page;
282 xmlreaderinput *input;
285 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
287 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
290 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
292 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
295 /* reader memory allocation functions */
296 static inline void *reader_alloc(xmlreader *reader, size_t len)
298 return m_alloc(reader->imalloc, len);
301 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
303 void *ret = reader_alloc(reader, len);
304 if (ret)
305 memset(ret, 0, len);
306 return ret;
309 static inline void reader_free(xmlreader *reader, void *mem)
311 m_free(reader->imalloc, mem);
314 /* Just return pointer from offset, no attempt to read more. */
315 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
317 encoded_buffer *buffer = &reader->input->buffer->utf16;
318 return (WCHAR*)buffer->data + offset;
321 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
323 return v->str ? v->str : reader_get_ptr2(reader, v->start);
326 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
328 *dest = *src;
330 if (src->str != strval_empty.str)
332 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
333 if (!dest->str) return E_OUTOFMEMORY;
334 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
335 dest->str[dest->len] = 0;
336 dest->start = 0;
339 return S_OK;
342 /* reader input memory allocation functions */
343 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
345 return m_alloc(input->imalloc, len);
348 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
350 return m_realloc(input->imalloc, mem, len);
353 static inline void readerinput_free(xmlreaderinput *input, void *mem)
355 m_free(input->imalloc, mem);
358 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
360 LPWSTR ret = NULL;
362 if(str) {
363 DWORD size;
365 size = (strlenW(str)+1)*sizeof(WCHAR);
366 ret = readerinput_alloc(input, size);
367 if (ret) memcpy(ret, str, size);
370 return ret;
373 static void reader_clear_attrs(xmlreader *reader)
375 struct attribute *attr, *attr2;
376 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
378 reader_free(reader, attr);
380 list_init(&reader->attrs);
381 reader->attr_count = 0;
382 reader->attr = NULL;
385 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
386 while we are on a node with attributes */
387 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *value)
389 struct attribute *attr;
391 attr = reader_alloc(reader, sizeof(*attr));
392 if (!attr) return E_OUTOFMEMORY;
394 if (prefix)
395 attr->prefix = *prefix;
396 else
397 memset(&attr->prefix, 0, sizeof(attr->prefix));
398 attr->localname = *localname;
399 attr->value = *value;
400 list_add_tail(&reader->attrs, &attr->entry);
401 reader->attr_count++;
403 return S_OK;
406 /* This one frees stored string value if needed */
407 static void reader_free_strvalued(xmlreader *reader, strval *v)
409 if (v->str != strval_empty.str)
411 reader_free(reader, v->str);
412 *v = strval_empty;
416 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
418 v->start = start;
419 v->len = len;
420 v->str = NULL;
423 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
425 return debugstr_wn(reader_get_strptr(reader, v), v->len);
428 /* used to initialize from constant string */
429 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
431 v->start = 0;
432 v->len = len;
433 v->str = str;
436 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
438 reader_free_strvalued(reader, &reader->strvalues[type]);
441 static void reader_free_strvalues(xmlreader *reader)
443 int type;
444 for (type = 0; type < StringValue_Last; type++)
445 reader_free_strvalue(reader, type);
448 /* This helper should only be used to test if strings are the same,
449 it doesn't try to sort. */
450 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
452 if (str1->len != str2->len) return 0;
453 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
456 static void reader_clear_elements(xmlreader *reader)
458 struct element *elem, *elem2;
459 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
461 reader_free_strvalued(reader, &elem->prefix);
462 reader_free_strvalued(reader, &elem->localname);
463 reader_free_strvalued(reader, &elem->qname);
464 reader_free(reader, elem);
466 list_init(&reader->elements);
467 reader->is_empty_element = FALSE;
470 static HRESULT reader_inc_depth(xmlreader *reader)
472 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
473 return S_OK;
476 static void reader_dec_depth(xmlreader *reader)
478 if (reader->depth > 1) reader->depth--;
481 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
483 struct ns *ns;
484 HRESULT hr;
486 ns = reader_alloc(reader, sizeof(*ns));
487 if (!ns) return E_OUTOFMEMORY;
489 if (def)
490 memset(&ns->prefix, 0, sizeof(ns->prefix));
491 else {
492 hr = reader_strvaldup(reader, prefix, &ns->prefix);
493 if (FAILED(hr)) {
494 reader_free(reader, ns);
495 return hr;
499 hr = reader_strvaldup(reader, uri, &ns->uri);
500 if (FAILED(hr)) {
501 reader_free_strvalued(reader, &ns->prefix);
502 reader_free(reader, ns);
503 return hr;
506 ns->element = NULL;
507 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
508 return hr;
511 static void reader_free_element(xmlreader *reader, struct element *element)
513 reader_free_strvalued(reader, &element->prefix);
514 reader_free_strvalued(reader, &element->localname);
515 reader_free_strvalued(reader, &element->qname);
516 reader_free(reader, element);
519 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
521 struct ns *ns;
523 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
524 if (ns->element)
525 break;
526 ns->element = element;
529 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
530 if (ns->element)
531 break;
532 ns->element = element;
536 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
537 strval *qname)
539 struct element *element;
540 HRESULT hr;
542 if (!list_empty(&reader->elements))
544 hr = reader_inc_depth(reader);
545 if (FAILED(hr))
546 return hr;
549 element = reader_alloc_zero(reader, sizeof(*element));
550 if (!element) {
551 hr = E_OUTOFMEMORY;
552 goto failed;
555 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) != S_OK ||
556 (hr = reader_strvaldup(reader, localname, &element->localname)) != S_OK ||
557 (hr = reader_strvaldup(reader, qname, &element->qname)) != S_OK)
559 reader_free_element(reader, element);
560 goto failed;
563 list_add_head(&reader->elements, &element->entry);
564 reader_mark_ns_nodes(reader, element);
565 reader->is_empty_element = FALSE;
567 failed:
568 reader_dec_depth(reader);
569 return hr;
572 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
574 struct ns *ns, *ns2;
576 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
577 if (ns->element != element)
578 break;
580 list_remove(&ns->entry);
581 reader_free_strvalued(reader, &ns->prefix);
582 reader_free_strvalued(reader, &ns->uri);
583 reader_free(reader, ns);
586 if (!list_empty(&reader->nsdef)) {
587 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
588 if (ns->element == element) {
589 list_remove(&ns->entry);
590 reader_free_strvalued(reader, &ns->prefix);
591 reader_free_strvalued(reader, &ns->uri);
592 reader_free(reader, ns);
597 static void reader_pop_element(xmlreader *reader)
599 struct element *element;
601 if (list_empty(&reader->elements))
602 return;
604 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
605 list_remove(&element->entry);
607 reader_pop_ns_nodes(reader, element);
608 reader_free_element(reader, element);
609 reader_dec_depth(reader);
611 /* It was a root element, the rest is expected as Misc */
612 if (list_empty(&reader->elements))
613 reader->instate = XmlReadInState_MiscEnd;
616 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
617 means node value is to be determined. */
618 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
620 strval *v = &reader->strvalues[type];
622 reader_free_strvalue(reader, type);
623 if (!value)
625 v->str = NULL;
626 v->start = 0;
627 v->len = 0;
628 return;
631 if (value->str == strval_empty.str)
632 *v = *value;
633 else
635 if (type == StringValue_Value)
637 /* defer allocation for value string */
638 v->str = NULL;
639 v->start = value->start;
640 v->len = value->len;
642 else
644 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
645 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
646 v->str[value->len] = 0;
647 v->len = value->len;
652 static inline int is_reader_pending(xmlreader *reader)
654 return reader->input->pending;
657 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
659 const int initial_len = 0x2000;
660 buffer->data = readerinput_alloc(input, initial_len);
661 if (!buffer->data) return E_OUTOFMEMORY;
663 memset(buffer->data, 0, 4);
664 buffer->cur = 0;
665 buffer->allocated = initial_len;
666 buffer->written = 0;
668 return S_OK;
671 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
673 readerinput_free(input, buffer->data);
676 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
678 if (encoding == XmlEncoding_Unknown)
680 FIXME("unsupported encoding %d\n", encoding);
681 return E_NOTIMPL;
684 *cp = xml_encoding_map[encoding].cp;
686 return S_OK;
689 xml_encoding parse_encoding_name(const WCHAR *name, int len)
691 int min, max, n, c;
693 if (!name) return XmlEncoding_Unknown;
695 min = 0;
696 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
698 while (min <= max)
700 n = (min+max)/2;
702 if (len != -1)
703 c = strncmpiW(xml_encoding_map[n].name, name, len);
704 else
705 c = strcmpiW(xml_encoding_map[n].name, name);
706 if (!c)
707 return xml_encoding_map[n].enc;
709 if (c > 0)
710 max = n-1;
711 else
712 min = n+1;
715 return XmlEncoding_Unknown;
718 static HRESULT alloc_input_buffer(xmlreaderinput *input)
720 input_buffer *buffer;
721 HRESULT hr;
723 input->buffer = NULL;
725 buffer = readerinput_alloc(input, sizeof(*buffer));
726 if (!buffer) return E_OUTOFMEMORY;
728 buffer->input = input;
729 buffer->code_page = ~0; /* code page is unknown at this point */
730 hr = init_encoded_buffer(input, &buffer->utf16);
731 if (hr != S_OK) {
732 readerinput_free(input, buffer);
733 return hr;
736 hr = init_encoded_buffer(input, &buffer->encoded);
737 if (hr != S_OK) {
738 free_encoded_buffer(input, &buffer->utf16);
739 readerinput_free(input, buffer);
740 return hr;
743 input->buffer = buffer;
744 return S_OK;
747 static void free_input_buffer(input_buffer *buffer)
749 free_encoded_buffer(buffer->input, &buffer->encoded);
750 free_encoded_buffer(buffer->input, &buffer->utf16);
751 readerinput_free(buffer->input, buffer);
754 static void readerinput_release_stream(xmlreaderinput *readerinput)
756 if (readerinput->stream) {
757 ISequentialStream_Release(readerinput->stream);
758 readerinput->stream = NULL;
762 /* Queries already stored interface for IStream/ISequentialStream.
763 Interface supplied on creation will be overwritten */
764 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
766 HRESULT hr;
768 readerinput_release_stream(readerinput);
769 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
770 if (hr != S_OK)
771 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
773 return hr;
776 /* reads a chunk to raw buffer */
777 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
779 encoded_buffer *buffer = &readerinput->buffer->encoded;
780 /* to make sure aligned length won't exceed allocated length */
781 ULONG len = buffer->allocated - buffer->written - 4;
782 ULONG read;
783 HRESULT hr;
785 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
786 variable width encodings like UTF-8 */
787 len = (len + 3) & ~3;
788 /* try to use allocated space or grow */
789 if (buffer->allocated - buffer->written < len)
791 buffer->allocated *= 2;
792 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
793 len = buffer->allocated - buffer->written;
796 read = 0;
797 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
798 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
799 readerinput->pending = hr == E_PENDING;
800 if (FAILED(hr)) return hr;
801 buffer->written += read;
803 return hr;
806 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
807 static void readerinput_grow(xmlreaderinput *readerinput, int length)
809 encoded_buffer *buffer = &readerinput->buffer->utf16;
811 length *= sizeof(WCHAR);
812 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
813 if (buffer->allocated < buffer->written + length + 4)
815 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
816 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
817 buffer->allocated = grown_size;
821 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
823 static const char startA[] = {'<','?'};
824 static const char commentA[] = {'<','!'};
825 encoded_buffer *buffer = &readerinput->buffer->encoded;
826 unsigned char *ptr = (unsigned char*)buffer->data;
828 return !memcmp(buffer->data, startA, sizeof(startA)) ||
829 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
830 /* test start byte */
831 (ptr[0] == '<' &&
833 (ptr[1] && (ptr[1] <= 0x7f)) ||
834 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
835 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
836 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
840 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
842 encoded_buffer *buffer = &readerinput->buffer->encoded;
843 static const WCHAR startW[] = {'<','?'};
844 static const WCHAR commentW[] = {'<','!'};
845 static const char utf8bom[] = {0xef,0xbb,0xbf};
846 static const char utf16lebom[] = {0xff,0xfe};
848 *enc = XmlEncoding_Unknown;
850 if (buffer->written <= 3)
852 HRESULT hr = readerinput_growraw(readerinput);
853 if (FAILED(hr)) return hr;
854 if (buffer->written <= 3) return MX_E_INPUTEND;
857 /* try start symbols if we have enough data to do that, input buffer should contain
858 first chunk already */
859 if (readerinput_is_utf8(readerinput))
860 *enc = XmlEncoding_UTF8;
861 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
862 !memcmp(buffer->data, commentW, sizeof(commentW)))
863 *enc = XmlEncoding_UTF16;
864 /* try with BOM now */
865 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
867 buffer->cur += sizeof(utf8bom);
868 *enc = XmlEncoding_UTF8;
870 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
872 buffer->cur += sizeof(utf16lebom);
873 *enc = XmlEncoding_UTF16;
876 return S_OK;
879 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
881 encoded_buffer *buffer = &readerinput->buffer->encoded;
882 int len = buffer->written;
884 /* complete single byte char */
885 if (!(buffer->data[len-1] & 0x80)) return len;
887 /* find start byte of multibyte char */
888 while (--len && !(buffer->data[len] & 0xc0))
891 return len;
894 /* Returns byte length of complete char sequence for buffer code page,
895 it's relative to current buffer position which is currently used for BOM handling
896 only. */
897 static int readerinput_get_convlen(xmlreaderinput *readerinput)
899 encoded_buffer *buffer = &readerinput->buffer->encoded;
900 int len;
902 if (readerinput->buffer->code_page == CP_UTF8)
903 len = readerinput_get_utf8_convlen(readerinput);
904 else
905 len = buffer->written;
907 TRACE("%d\n", len - buffer->cur);
908 return len - buffer->cur;
911 /* It's possible that raw buffer has some leftovers from last conversion - some char
912 sequence that doesn't represent a full code point. Length argument should be calculated with
913 readerinput_get_convlen(), if it's -1 it will be calculated here. */
914 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
916 encoded_buffer *buffer = &readerinput->buffer->encoded;
918 if (len == -1)
919 len = readerinput_get_convlen(readerinput);
921 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
922 /* everything below cur is lost too */
923 buffer->written -= len + buffer->cur;
924 /* after this point we don't need cur offset really,
925 it's used only to mark where actual data begins when first chunk is read */
926 buffer->cur = 0;
929 /* note that raw buffer content is kept */
930 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
932 encoded_buffer *src = &readerinput->buffer->encoded;
933 encoded_buffer *dest = &readerinput->buffer->utf16;
934 int len, dest_len;
935 HRESULT hr;
936 WCHAR *ptr;
937 UINT cp;
939 hr = get_code_page(enc, &cp);
940 if (FAILED(hr)) return;
942 readerinput->buffer->code_page = cp;
943 len = readerinput_get_convlen(readerinput);
945 TRACE("switching to cp %d\n", cp);
947 /* just copy in this case */
948 if (enc == XmlEncoding_UTF16)
950 readerinput_grow(readerinput, len);
951 memcpy(dest->data, src->data + src->cur, len);
952 dest->written += len*sizeof(WCHAR);
953 return;
956 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
957 readerinput_grow(readerinput, dest_len);
958 ptr = (WCHAR*)dest->data;
959 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
960 ptr[dest_len] = 0;
961 dest->written += dest_len*sizeof(WCHAR);
964 /* shrinks parsed data a buffer begins with */
965 static void reader_shrink(xmlreader *reader)
967 encoded_buffer *buffer = &reader->input->buffer->utf16;
969 /* avoid to move too often using threshold shrink length */
970 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
972 buffer->written -= buffer->cur*sizeof(WCHAR);
973 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
974 buffer->cur = 0;
975 *(WCHAR*)&buffer->data[buffer->written] = 0;
979 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
980 It won't attempt to shrink but will grow destination buffer if needed */
981 static HRESULT reader_more(xmlreader *reader)
983 xmlreaderinput *readerinput = reader->input;
984 encoded_buffer *src = &readerinput->buffer->encoded;
985 encoded_buffer *dest = &readerinput->buffer->utf16;
986 UINT cp = readerinput->buffer->code_page;
987 int len, dest_len;
988 HRESULT hr;
989 WCHAR *ptr;
991 /* get some raw data from stream first */
992 hr = readerinput_growraw(readerinput);
993 len = readerinput_get_convlen(readerinput);
995 /* just copy for UTF-16 case */
996 if (cp == ~0)
998 readerinput_grow(readerinput, len);
999 memcpy(dest->data + dest->written, src->data + src->cur, len);
1000 dest->written += len*sizeof(WCHAR);
1001 return hr;
1004 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1005 readerinput_grow(readerinput, dest_len);
1006 ptr = (WCHAR*)(dest->data + dest->written);
1007 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1008 ptr[dest_len] = 0;
1009 dest->written += dest_len*sizeof(WCHAR);
1010 /* get rid of processed data */
1011 readerinput_shrinkraw(readerinput, len);
1013 return hr;
1016 static inline UINT reader_get_cur(xmlreader *reader)
1018 return reader->input->buffer->utf16.cur;
1021 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1023 encoded_buffer *buffer = &reader->input->buffer->utf16;
1024 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1025 if (!*ptr) reader_more(reader);
1026 return (WCHAR*)buffer->data + buffer->cur;
1029 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1031 int i=0;
1032 const WCHAR *ptr = reader_get_ptr(reader);
1033 while (str[i])
1035 if (!ptr[i])
1037 reader_more(reader);
1038 ptr = reader_get_ptr(reader);
1040 if (str[i] != ptr[i])
1041 return ptr[i] - str[i];
1042 i++;
1044 return 0;
1047 /* moves cursor n WCHARs forward */
1048 static void reader_skipn(xmlreader *reader, int n)
1050 encoded_buffer *buffer = &reader->input->buffer->utf16;
1051 const WCHAR *ptr = reader_get_ptr(reader);
1053 while (*ptr++ && n--)
1055 buffer->cur++;
1056 reader->pos++;
1060 static inline BOOL is_wchar_space(WCHAR ch)
1062 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1065 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1066 static int reader_skipspaces(xmlreader *reader)
1068 encoded_buffer *buffer = &reader->input->buffer->utf16;
1069 const WCHAR *ptr = reader_get_ptr(reader);
1070 UINT start = reader_get_cur(reader);
1072 while (is_wchar_space(*ptr))
1074 if (*ptr == '\r')
1075 reader->pos = 0;
1076 else if (*ptr == '\n')
1078 reader->line++;
1079 reader->pos = 0;
1081 else
1082 reader->pos++;
1084 buffer->cur++;
1085 ptr = reader_get_ptr(reader);
1088 return reader_get_cur(reader) - start;
1091 /* [26] VersionNum ::= '1.' [0-9]+ */
1092 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1094 static const WCHAR onedotW[] = {'1','.',0};
1095 WCHAR *ptr, *ptr2;
1096 UINT start;
1098 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1100 start = reader_get_cur(reader);
1101 /* skip "1." */
1102 reader_skipn(reader, 2);
1104 ptr2 = ptr = reader_get_ptr(reader);
1105 while (*ptr >= '0' && *ptr <= '9')
1107 reader_skipn(reader, 1);
1108 ptr = reader_get_ptr(reader);
1111 if (ptr2 == ptr) return WC_E_DIGIT;
1112 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1113 TRACE("version=%s\n", debug_strval(reader, val));
1114 return S_OK;
1117 /* [25] Eq ::= S? '=' S? */
1118 static HRESULT reader_parse_eq(xmlreader *reader)
1120 static const WCHAR eqW[] = {'=',0};
1121 reader_skipspaces(reader);
1122 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1123 /* skip '=' */
1124 reader_skipn(reader, 1);
1125 reader_skipspaces(reader);
1126 return S_OK;
1129 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1130 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1132 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1133 strval val, name;
1134 HRESULT hr;
1136 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1138 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1139 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1140 /* skip 'version' */
1141 reader_skipn(reader, 7);
1143 hr = reader_parse_eq(reader);
1144 if (FAILED(hr)) return hr;
1146 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1147 return WC_E_QUOTE;
1148 /* skip "'"|'"' */
1149 reader_skipn(reader, 1);
1151 hr = reader_parse_versionnum(reader, &val);
1152 if (FAILED(hr)) return hr;
1154 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1155 return WC_E_QUOTE;
1157 /* skip "'"|'"' */
1158 reader_skipn(reader, 1);
1160 return reader_add_attr(reader, NULL, &name, &val);
1163 /* ([A-Za-z0-9._] | '-') */
1164 static inline BOOL is_wchar_encname(WCHAR ch)
1166 return ((ch >= 'A' && ch <= 'Z') ||
1167 (ch >= 'a' && ch <= 'z') ||
1168 (ch >= '0' && ch <= '9') ||
1169 (ch == '.') || (ch == '_') ||
1170 (ch == '-'));
1173 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1174 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1176 WCHAR *start = reader_get_ptr(reader), *ptr;
1177 xml_encoding enc;
1178 int len;
1180 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1181 return WC_E_ENCNAME;
1183 val->start = reader_get_cur(reader);
1185 ptr = start;
1186 while (is_wchar_encname(*++ptr))
1189 len = ptr - start;
1190 enc = parse_encoding_name(start, len);
1191 TRACE("encoding name %s\n", debugstr_wn(start, len));
1192 val->str = start;
1193 val->len = len;
1195 if (enc == XmlEncoding_Unknown)
1196 return WC_E_ENCNAME;
1198 /* skip encoding name */
1199 reader_skipn(reader, len);
1200 return S_OK;
1203 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1204 static HRESULT reader_parse_encdecl(xmlreader *reader)
1206 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1207 strval name, val;
1208 HRESULT hr;
1210 if (!reader_skipspaces(reader)) return S_FALSE;
1212 if (reader_cmp(reader, encodingW)) return S_FALSE;
1213 name.str = reader_get_ptr(reader);
1214 name.start = reader_get_cur(reader);
1215 name.len = 8;
1216 /* skip 'encoding' */
1217 reader_skipn(reader, 8);
1219 hr = reader_parse_eq(reader);
1220 if (FAILED(hr)) return hr;
1222 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1223 return WC_E_QUOTE;
1224 /* skip "'"|'"' */
1225 reader_skipn(reader, 1);
1227 hr = reader_parse_encname(reader, &val);
1228 if (FAILED(hr)) return hr;
1230 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1231 return WC_E_QUOTE;
1233 /* skip "'"|'"' */
1234 reader_skipn(reader, 1);
1236 return reader_add_attr(reader, NULL, &name, &val);
1239 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1240 static HRESULT reader_parse_sddecl(xmlreader *reader)
1242 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1243 static const WCHAR yesW[] = {'y','e','s',0};
1244 static const WCHAR noW[] = {'n','o',0};
1245 strval name, val;
1246 UINT start;
1247 HRESULT hr;
1249 if (!reader_skipspaces(reader)) return S_FALSE;
1251 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1252 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1253 /* skip 'standalone' */
1254 reader_skipn(reader, 10);
1256 hr = reader_parse_eq(reader);
1257 if (FAILED(hr)) return hr;
1259 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1260 return WC_E_QUOTE;
1261 /* skip "'"|'"' */
1262 reader_skipn(reader, 1);
1264 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1265 return WC_E_XMLDECL;
1267 start = reader_get_cur(reader);
1268 /* skip 'yes'|'no' */
1269 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1270 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1271 TRACE("standalone=%s\n", debug_strval(reader, &val));
1273 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1274 return WC_E_QUOTE;
1275 /* skip "'"|'"' */
1276 reader_skipn(reader, 1);
1278 return reader_add_attr(reader, NULL, &name, &val);
1281 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1282 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1284 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1285 static const WCHAR declcloseW[] = {'?','>',0};
1286 HRESULT hr;
1288 /* check if we have "<?xml " */
1289 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1291 reader_skipn(reader, 5);
1292 hr = reader_parse_versioninfo(reader);
1293 if (FAILED(hr))
1294 return hr;
1296 hr = reader_parse_encdecl(reader);
1297 if (FAILED(hr))
1298 return hr;
1300 hr = reader_parse_sddecl(reader);
1301 if (FAILED(hr))
1302 return hr;
1304 reader_skipspaces(reader);
1305 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1306 reader_skipn(reader, 2);
1308 reader_inc_depth(reader);
1309 reader->nodetype = XmlNodeType_XmlDeclaration;
1310 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1311 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1312 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1314 return S_OK;
1317 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1318 static HRESULT reader_parse_comment(xmlreader *reader)
1320 WCHAR *ptr;
1321 UINT start;
1323 if (reader->resumestate == XmlReadResumeState_Comment)
1325 start = reader->resume[XmlReadResume_Body];
1326 ptr = reader_get_ptr(reader);
1328 else
1330 /* skip '<!--' */
1331 reader_skipn(reader, 4);
1332 reader_shrink(reader);
1333 ptr = reader_get_ptr(reader);
1334 start = reader_get_cur(reader);
1335 reader->nodetype = XmlNodeType_Comment;
1336 reader->resume[XmlReadResume_Body] = start;
1337 reader->resumestate = XmlReadResumeState_Comment;
1338 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1339 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1340 reader_set_strvalue(reader, StringValue_Value, NULL);
1343 /* will exit when there's no more data, it won't attempt to
1344 read more from stream */
1345 while (*ptr)
1347 if (ptr[0] == '-')
1349 if (ptr[1] == '-')
1351 if (ptr[2] == '>')
1353 strval value;
1355 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1356 TRACE("%s\n", debug_strval(reader, &value));
1358 /* skip rest of markup '->' */
1359 reader_skipn(reader, 3);
1361 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1362 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1363 reader_set_strvalue(reader, StringValue_Value, &value);
1364 reader->resume[XmlReadResume_Body] = 0;
1365 reader->resumestate = XmlReadResumeState_Initial;
1366 return S_OK;
1368 else
1369 return WC_E_COMMENT;
1373 reader_skipn(reader, 1);
1374 ptr++;
1377 return S_OK;
1380 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1381 static inline BOOL is_char(WCHAR ch)
1383 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1384 (ch >= 0x20 && ch <= 0xd7ff) ||
1385 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1386 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1387 (ch >= 0xe000 && ch <= 0xfffd);
1390 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1391 static inline BOOL is_pubchar(WCHAR ch)
1393 return (ch == ' ') ||
1394 (ch >= 'a' && ch <= 'z') ||
1395 (ch >= 'A' && ch <= 'Z') ||
1396 (ch >= '0' && ch <= '9') ||
1397 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1398 (ch == '=') || (ch == '?') ||
1399 (ch == '@') || (ch == '!') ||
1400 (ch >= '#' && ch <= '%') || /* #$% */
1401 (ch == '_') || (ch == '\r') || (ch == '\n');
1404 static inline BOOL is_namestartchar(WCHAR ch)
1406 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1407 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1408 (ch >= 0xc0 && ch <= 0xd6) ||
1409 (ch >= 0xd8 && ch <= 0xf6) ||
1410 (ch >= 0xf8 && ch <= 0x2ff) ||
1411 (ch >= 0x370 && ch <= 0x37d) ||
1412 (ch >= 0x37f && ch <= 0x1fff) ||
1413 (ch >= 0x200c && ch <= 0x200d) ||
1414 (ch >= 0x2070 && ch <= 0x218f) ||
1415 (ch >= 0x2c00 && ch <= 0x2fef) ||
1416 (ch >= 0x3001 && ch <= 0xd7ff) ||
1417 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1418 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1419 (ch >= 0xf900 && ch <= 0xfdcf) ||
1420 (ch >= 0xfdf0 && ch <= 0xfffd);
1423 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1424 static inline BOOL is_ncnamechar(WCHAR ch)
1426 return (ch >= 'A' && ch <= 'Z') ||
1427 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1428 (ch == '-') || (ch == '.') ||
1429 (ch >= '0' && ch <= '9') ||
1430 (ch == 0xb7) ||
1431 (ch >= 0xc0 && ch <= 0xd6) ||
1432 (ch >= 0xd8 && ch <= 0xf6) ||
1433 (ch >= 0xf8 && ch <= 0x2ff) ||
1434 (ch >= 0x300 && ch <= 0x36f) ||
1435 (ch >= 0x370 && ch <= 0x37d) ||
1436 (ch >= 0x37f && ch <= 0x1fff) ||
1437 (ch >= 0x200c && ch <= 0x200d) ||
1438 (ch >= 0x203f && ch <= 0x2040) ||
1439 (ch >= 0x2070 && ch <= 0x218f) ||
1440 (ch >= 0x2c00 && ch <= 0x2fef) ||
1441 (ch >= 0x3001 && ch <= 0xd7ff) ||
1442 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1443 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1444 (ch >= 0xf900 && ch <= 0xfdcf) ||
1445 (ch >= 0xfdf0 && ch <= 0xfffd);
1448 static inline BOOL is_namechar(WCHAR ch)
1450 return (ch == ':') || is_ncnamechar(ch);
1453 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1455 /* When we're on attribute always return attribute type, container node type is kept.
1456 Note that container is not necessarily an element, and attribute doesn't mean it's
1457 an attribute in XML spec terms. */
1458 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1461 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1462 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1463 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1464 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1465 [5] Name ::= NameStartChar (NameChar)* */
1466 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1468 WCHAR *ptr;
1469 UINT start;
1471 if (reader->resume[XmlReadResume_Name])
1473 start = reader->resume[XmlReadResume_Name];
1474 ptr = reader_get_ptr(reader);
1476 else
1478 ptr = reader_get_ptr(reader);
1479 start = reader_get_cur(reader);
1480 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1483 while (is_namechar(*ptr))
1485 reader_skipn(reader, 1);
1486 ptr = reader_get_ptr(reader);
1489 if (is_reader_pending(reader))
1491 reader->resume[XmlReadResume_Name] = start;
1492 return E_PENDING;
1494 else
1495 reader->resume[XmlReadResume_Name] = 0;
1497 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1498 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1500 return S_OK;
1503 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1504 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1506 static const WCHAR xmlW[] = {'x','m','l'};
1507 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1508 strval name;
1509 WCHAR *ptr;
1510 HRESULT hr;
1511 UINT i;
1513 hr = reader_parse_name(reader, &name);
1514 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1516 /* now that we got name check for illegal content */
1517 if (strval_eq(reader, &name, &xmlval))
1518 return WC_E_LEADINGXML;
1520 /* PITarget can't be a qualified name */
1521 ptr = reader_get_strptr(reader, &name);
1522 for (i = 0; i < name.len; i++)
1523 if (ptr[i] == ':')
1524 return i ? NC_E_NAMECOLON : WC_E_PI;
1526 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1527 *target = name;
1528 return S_OK;
1531 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1532 static HRESULT reader_parse_pi(xmlreader *reader)
1534 strval target;
1535 WCHAR *ptr;
1536 UINT start;
1537 HRESULT hr;
1539 switch (reader->resumestate)
1541 case XmlReadResumeState_Initial:
1542 /* skip '<?' */
1543 reader_skipn(reader, 2);
1544 reader_shrink(reader);
1545 reader->resumestate = XmlReadResumeState_PITarget;
1546 case XmlReadResumeState_PITarget:
1547 hr = reader_parse_pitarget(reader, &target);
1548 if (FAILED(hr)) return hr;
1549 reader_set_strvalue(reader, StringValue_LocalName, &target);
1550 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1551 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1552 reader->resumestate = XmlReadResumeState_PIBody;
1553 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1554 default:
1558 start = reader->resume[XmlReadResume_Body];
1559 ptr = reader_get_ptr(reader);
1560 while (*ptr)
1562 if (ptr[0] == '?')
1564 if (ptr[1] == '>')
1566 UINT cur = reader_get_cur(reader);
1567 strval value;
1569 /* strip all leading whitespace chars */
1570 while (start < cur)
1572 ptr = reader_get_ptr2(reader, start);
1573 if (!is_wchar_space(*ptr)) break;
1574 start++;
1577 reader_init_strvalue(start, cur-start, &value);
1579 /* skip '?>' */
1580 reader_skipn(reader, 2);
1581 TRACE("%s\n", debug_strval(reader, &value));
1582 reader->nodetype = XmlNodeType_ProcessingInstruction;
1583 reader->resumestate = XmlReadResumeState_Initial;
1584 reader->resume[XmlReadResume_Body] = 0;
1585 reader_set_strvalue(reader, StringValue_Value, &value);
1586 return S_OK;
1590 reader_skipn(reader, 1);
1591 ptr = reader_get_ptr(reader);
1594 return S_OK;
1597 /* This one is used to parse significant whitespace nodes, like in Misc production */
1598 static HRESULT reader_parse_whitespace(xmlreader *reader)
1600 switch (reader->resumestate)
1602 case XmlReadResumeState_Initial:
1603 reader_shrink(reader);
1604 reader->resumestate = XmlReadResumeState_Whitespace;
1605 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1606 reader->nodetype = XmlNodeType_Whitespace;
1607 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1608 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1609 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1610 /* fallthrough */
1611 case XmlReadResumeState_Whitespace:
1613 strval value;
1614 UINT start;
1616 reader_skipspaces(reader);
1617 if (is_reader_pending(reader)) return S_OK;
1619 start = reader->resume[XmlReadResume_Body];
1620 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1621 reader_set_strvalue(reader, StringValue_Value, &value);
1622 TRACE("%s\n", debug_strval(reader, &value));
1623 reader->resumestate = XmlReadResumeState_Initial;
1625 default:
1629 return S_OK;
1632 /* [27] Misc ::= Comment | PI | S */
1633 static HRESULT reader_parse_misc(xmlreader *reader)
1635 HRESULT hr = S_FALSE;
1637 if (reader->resumestate != XmlReadResumeState_Initial)
1639 hr = reader_more(reader);
1640 if (FAILED(hr)) return hr;
1642 /* finish current node */
1643 switch (reader->resumestate)
1645 case XmlReadResumeState_PITarget:
1646 case XmlReadResumeState_PIBody:
1647 return reader_parse_pi(reader);
1648 case XmlReadResumeState_Comment:
1649 return reader_parse_comment(reader);
1650 case XmlReadResumeState_Whitespace:
1651 return reader_parse_whitespace(reader);
1652 default:
1653 ERR("unknown resume state %d\n", reader->resumestate);
1657 while (1)
1659 const WCHAR *cur = reader_get_ptr(reader);
1661 if (is_wchar_space(*cur))
1662 hr = reader_parse_whitespace(reader);
1663 else if (!reader_cmp(reader, commentW))
1664 hr = reader_parse_comment(reader);
1665 else if (!reader_cmp(reader, piW))
1666 hr = reader_parse_pi(reader);
1667 else
1668 break;
1670 if (hr != S_FALSE) return hr;
1673 return hr;
1676 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1677 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1679 WCHAR *cur = reader_get_ptr(reader), quote;
1680 UINT start;
1682 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1684 quote = *cur;
1685 reader_skipn(reader, 1);
1687 cur = reader_get_ptr(reader);
1688 start = reader_get_cur(reader);
1689 while (is_char(*cur) && *cur != quote)
1691 reader_skipn(reader, 1);
1692 cur = reader_get_ptr(reader);
1694 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1695 if (*cur == quote) reader_skipn(reader, 1);
1697 TRACE("%s\n", debug_strval(reader, literal));
1698 return S_OK;
1701 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1702 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1703 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1705 WCHAR *cur = reader_get_ptr(reader), quote;
1706 UINT start;
1708 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1710 quote = *cur;
1711 reader_skipn(reader, 1);
1713 start = reader_get_cur(reader);
1714 cur = reader_get_ptr(reader);
1715 while (is_pubchar(*cur) && *cur != quote)
1717 reader_skipn(reader, 1);
1718 cur = reader_get_ptr(reader);
1720 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1721 if (*cur == quote) reader_skipn(reader, 1);
1723 TRACE("%s\n", debug_strval(reader, literal));
1724 return S_OK;
1727 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1728 static HRESULT reader_parse_externalid(xmlreader *reader)
1730 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1731 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1732 strval name, sys;
1733 HRESULT hr;
1734 int cnt;
1736 if (!reader_cmp(reader, publicW)) {
1737 strval pub;
1739 /* public id */
1740 reader_skipn(reader, 6);
1741 cnt = reader_skipspaces(reader);
1742 if (!cnt) return WC_E_WHITESPACE;
1744 hr = reader_parse_pub_literal(reader, &pub);
1745 if (FAILED(hr)) return hr;
1747 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1748 hr = reader_add_attr(reader, NULL, &name, &pub);
1749 if (FAILED(hr)) return hr;
1751 cnt = reader_skipspaces(reader);
1752 if (!cnt) return S_OK;
1754 /* optional system id */
1755 hr = reader_parse_sys_literal(reader, &sys);
1756 if (FAILED(hr)) return S_OK;
1758 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1759 hr = reader_add_attr(reader, NULL, &name, &sys);
1760 if (FAILED(hr)) return hr;
1762 return S_OK;
1763 } else if (!reader_cmp(reader, systemW)) {
1764 /* system id */
1765 reader_skipn(reader, 6);
1766 cnt = reader_skipspaces(reader);
1767 if (!cnt) return WC_E_WHITESPACE;
1769 hr = reader_parse_sys_literal(reader, &sys);
1770 if (FAILED(hr)) return hr;
1772 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1773 return reader_add_attr(reader, NULL, &name, &sys);
1776 return S_FALSE;
1779 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1780 static HRESULT reader_parse_dtd(xmlreader *reader)
1782 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1783 strval name;
1784 WCHAR *cur;
1785 HRESULT hr;
1787 /* check if we have "<!DOCTYPE" */
1788 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1789 reader_shrink(reader);
1791 /* DTD processing is not allowed by default */
1792 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1794 reader_skipn(reader, 9);
1795 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1797 /* name */
1798 hr = reader_parse_name(reader, &name);
1799 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1801 reader_skipspaces(reader);
1803 hr = reader_parse_externalid(reader);
1804 if (FAILED(hr)) return hr;
1806 reader_skipspaces(reader);
1808 cur = reader_get_ptr(reader);
1809 if (*cur != '>')
1811 FIXME("internal subset parsing not implemented\n");
1812 return E_NOTIMPL;
1815 /* skip '>' */
1816 reader_skipn(reader, 1);
1818 reader->nodetype = XmlNodeType_DocumentType;
1819 reader_set_strvalue(reader, StringValue_LocalName, &name);
1820 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1822 return S_OK;
1825 /* [11 NS] LocalPart ::= NCName */
1826 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1828 WCHAR *ptr;
1829 UINT start;
1831 if (reader->resume[XmlReadResume_Local])
1833 start = reader->resume[XmlReadResume_Local];
1834 ptr = reader_get_ptr(reader);
1836 else
1838 ptr = reader_get_ptr(reader);
1839 start = reader_get_cur(reader);
1842 while (is_ncnamechar(*ptr))
1844 reader_skipn(reader, 1);
1845 ptr = reader_get_ptr(reader);
1848 if (is_reader_pending(reader))
1850 reader->resume[XmlReadResume_Local] = start;
1851 return E_PENDING;
1853 else
1854 reader->resume[XmlReadResume_Local] = 0;
1856 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1858 return S_OK;
1861 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1862 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1863 [9 NS] UnprefixedName ::= LocalPart
1864 [10 NS] Prefix ::= NCName */
1865 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1867 WCHAR *ptr;
1868 UINT start;
1869 HRESULT hr;
1871 if (reader->resume[XmlReadResume_Name])
1873 start = reader->resume[XmlReadResume_Name];
1874 ptr = reader_get_ptr(reader);
1876 else
1878 ptr = reader_get_ptr(reader);
1879 start = reader_get_cur(reader);
1880 reader->resume[XmlReadResume_Name] = start;
1881 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1884 if (reader->resume[XmlReadResume_Local])
1886 hr = reader_parse_local(reader, local);
1887 if (FAILED(hr)) return hr;
1889 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1890 local->start - reader->resume[XmlReadResume_Name] - 1,
1891 prefix);
1893 else
1895 /* skip prefix part */
1896 while (is_ncnamechar(*ptr))
1898 reader_skipn(reader, 1);
1899 ptr = reader_get_ptr(reader);
1902 if (is_reader_pending(reader)) return E_PENDING;
1904 /* got a qualified name */
1905 if (*ptr == ':')
1907 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1909 /* skip ':' */
1910 reader_skipn(reader, 1);
1911 hr = reader_parse_local(reader, local);
1912 if (FAILED(hr)) return hr;
1914 else
1916 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1917 reader_init_strvalue(0, 0, prefix);
1921 if (prefix->len)
1922 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1923 else
1924 TRACE("ncname %s\n", debug_strval(reader, local));
1926 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1927 /* count ':' too */
1928 (prefix->len ? prefix->len + 1 : 0) + local->len,
1929 qname);
1931 reader->resume[XmlReadResume_Name] = 0;
1932 reader->resume[XmlReadResume_Local] = 0;
1934 return S_OK;
1937 /* Applies normalization rules to a single char, used for attribute values.
1939 Rules include 2 steps:
1941 1) replacing \r\n with a single \n;
1942 2) replacing all whitespace chars with ' '.
1945 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1947 encoded_buffer *buffer = &reader->input->buffer->utf16;
1949 if (!is_wchar_space(*ptr)) return;
1951 if (*ptr == '\r' && *(ptr+1) == '\n')
1953 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1954 memmove(ptr+1, ptr+2, len);
1956 *ptr = ' ';
1959 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1961 static const WCHAR entltW[] = {'l','t'};
1962 static const WCHAR entgtW[] = {'g','t'};
1963 static const WCHAR entampW[] = {'a','m','p'};
1964 static const WCHAR entaposW[] = {'a','p','o','s'};
1965 static const WCHAR entquotW[] = {'q','u','o','t'};
1966 static const strval lt = { (WCHAR*)entltW, 2 };
1967 static const strval gt = { (WCHAR*)entgtW, 2 };
1968 static const strval amp = { (WCHAR*)entampW, 3 };
1969 static const strval apos = { (WCHAR*)entaposW, 4 };
1970 static const strval quot = { (WCHAR*)entquotW, 4 };
1971 WCHAR *str = reader_get_strptr(reader, name);
1973 switch (*str)
1975 case 'l':
1976 if (strval_eq(reader, name, &lt)) return '<';
1977 break;
1978 case 'g':
1979 if (strval_eq(reader, name, &gt)) return '>';
1980 break;
1981 case 'a':
1982 if (strval_eq(reader, name, &amp))
1983 return '&';
1984 else if (strval_eq(reader, name, &apos))
1985 return '\'';
1986 break;
1987 case 'q':
1988 if (strval_eq(reader, name, &quot)) return '\"';
1989 break;
1990 default:
1994 return 0;
1997 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1998 [67] Reference ::= EntityRef | CharRef
1999 [68] EntityRef ::= '&' Name ';' */
2000 static HRESULT reader_parse_reference(xmlreader *reader)
2002 encoded_buffer *buffer = &reader->input->buffer->utf16;
2003 WCHAR *start = reader_get_ptr(reader), *ptr;
2004 UINT cur = reader_get_cur(reader);
2005 WCHAR ch = 0;
2006 int len;
2008 /* skip '&' */
2009 reader_skipn(reader, 1);
2010 ptr = reader_get_ptr(reader);
2012 if (*ptr == '#')
2014 reader_skipn(reader, 1);
2015 ptr = reader_get_ptr(reader);
2017 /* hex char or decimal */
2018 if (*ptr == 'x')
2020 reader_skipn(reader, 1);
2021 ptr = reader_get_ptr(reader);
2023 while (*ptr != ';')
2025 if ((*ptr >= '0' && *ptr <= '9'))
2026 ch = ch*16 + *ptr - '0';
2027 else if ((*ptr >= 'a' && *ptr <= 'f'))
2028 ch = ch*16 + *ptr - 'a' + 10;
2029 else if ((*ptr >= 'A' && *ptr <= 'F'))
2030 ch = ch*16 + *ptr - 'A' + 10;
2031 else
2032 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2033 reader_skipn(reader, 1);
2034 ptr = reader_get_ptr(reader);
2037 else
2039 while (*ptr != ';')
2041 if ((*ptr >= '0' && *ptr <= '9'))
2043 ch = ch*10 + *ptr - '0';
2044 reader_skipn(reader, 1);
2045 ptr = reader_get_ptr(reader);
2047 else
2048 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2052 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2054 /* normalize */
2055 if (is_wchar_space(ch)) ch = ' ';
2057 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2058 memmove(start+1, ptr+1, len);
2059 buffer->cur = cur + 1;
2061 *start = ch;
2063 else
2065 strval name;
2066 HRESULT hr;
2068 hr = reader_parse_name(reader, &name);
2069 if (FAILED(hr)) return hr;
2071 ptr = reader_get_ptr(reader);
2072 if (*ptr != ';') return WC_E_SEMICOLON;
2074 /* predefined entities resolve to a single character */
2075 ch = get_predefined_entity(reader, &name);
2076 if (ch)
2078 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2079 memmove(start+1, ptr+1, len);
2080 buffer->cur = cur + 1;
2082 *start = ch;
2084 else
2086 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2087 return WC_E_UNDECLAREDENTITY;
2092 return S_OK;
2095 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2096 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2098 WCHAR *ptr, quote;
2099 UINT start;
2101 ptr = reader_get_ptr(reader);
2103 /* skip opening quote */
2104 quote = *ptr;
2105 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2106 reader_skipn(reader, 1);
2108 ptr = reader_get_ptr(reader);
2109 start = reader_get_cur(reader);
2110 while (*ptr)
2112 if (*ptr == '<') return WC_E_LESSTHAN;
2114 if (*ptr == quote)
2116 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2117 /* skip closing quote */
2118 reader_skipn(reader, 1);
2119 return S_OK;
2122 if (*ptr == '&')
2124 HRESULT hr = reader_parse_reference(reader);
2125 if (FAILED(hr)) return hr;
2127 else
2129 reader_normalize_space(reader, ptr);
2130 reader_skipn(reader, 1);
2132 ptr = reader_get_ptr(reader);
2135 return WC_E_QUOTE;
2138 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2139 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2140 [3 NS] DefaultAttName ::= 'xmlns'
2141 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2142 static HRESULT reader_parse_attribute(xmlreader *reader)
2144 strval prefix, local, qname, value;
2145 BOOL ns = FALSE, nsdef = FALSE;
2146 HRESULT hr;
2148 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2149 if (FAILED(hr)) return hr;
2151 if (strval_eq(reader, &prefix, &strval_xmlns))
2152 ns = TRUE;
2154 if (strval_eq(reader, &qname, &strval_xmlns))
2155 ns = nsdef = TRUE;
2157 hr = reader_parse_eq(reader);
2158 if (FAILED(hr)) return hr;
2160 hr = reader_parse_attvalue(reader, &value);
2161 if (FAILED(hr)) return hr;
2163 if (ns)
2164 reader_push_ns(reader, nsdef ? &strval_xmlns : &local, &value, nsdef);
2166 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2167 return reader_add_attr(reader, &prefix, &local, &value);
2170 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2171 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2172 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2174 HRESULT hr;
2176 hr = reader_parse_qname(reader, prefix, local, qname);
2177 if (FAILED(hr)) return hr;
2179 while (1)
2181 static const WCHAR endW[] = {'/','>',0};
2183 reader_skipspaces(reader);
2185 /* empty element */
2186 if ((*empty = !reader_cmp(reader, endW)))
2188 /* skip '/>' */
2189 reader_skipn(reader, 2);
2190 reader->is_empty_element = TRUE;
2191 reader->empty_element.prefix = *prefix;
2192 reader->empty_element.localname = *local;
2193 reader->empty_element.qname = *qname;
2194 reader_mark_ns_nodes(reader, &reader->empty_element);
2195 return S_OK;
2198 /* got a start tag */
2199 if (!reader_cmp(reader, gtW))
2201 /* skip '>' */
2202 reader_skipn(reader, 1);
2203 return reader_push_element(reader, prefix, local, qname);
2206 hr = reader_parse_attribute(reader);
2207 if (FAILED(hr)) return hr;
2210 return S_OK;
2213 /* [39] element ::= EmptyElemTag | STag content ETag */
2214 static HRESULT reader_parse_element(xmlreader *reader)
2216 HRESULT hr;
2218 switch (reader->resumestate)
2220 case XmlReadResumeState_Initial:
2221 /* check if we are really on element */
2222 if (reader_cmp(reader, ltW)) return S_FALSE;
2224 /* skip '<' */
2225 reader_skipn(reader, 1);
2227 reader_shrink(reader);
2228 reader->resumestate = XmlReadResumeState_STag;
2229 case XmlReadResumeState_STag:
2231 strval qname, prefix, local;
2232 int empty = 0;
2234 /* this handles empty elements too */
2235 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2236 if (FAILED(hr)) return hr;
2238 /* FIXME: need to check for defined namespace to reject invalid prefix */
2240 /* if we got empty element and stack is empty go straight to Misc */
2241 if (empty && list_empty(&reader->elements))
2242 reader->instate = XmlReadInState_MiscEnd;
2243 else
2244 reader->instate = XmlReadInState_Content;
2246 reader->nodetype = XmlNodeType_Element;
2247 reader->resumestate = XmlReadResumeState_Initial;
2248 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2249 reader_set_strvalue(reader, StringValue_LocalName, &local);
2250 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2251 break;
2253 default:
2254 hr = E_FAIL;
2257 return hr;
2260 /* [13 NS] ETag ::= '</' QName S? '>' */
2261 static HRESULT reader_parse_endtag(xmlreader *reader)
2263 strval prefix, local, qname;
2264 struct element *elem;
2265 HRESULT hr;
2267 /* skip '</' */
2268 reader_skipn(reader, 2);
2270 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2271 if (FAILED(hr)) return hr;
2273 reader_skipspaces(reader);
2275 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2277 /* skip '>' */
2278 reader_skipn(reader, 1);
2280 /* Element stack should never be empty at this point, cause we shouldn't get to
2281 content parsing if it's empty. */
2282 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2283 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2285 reader->nodetype = XmlNodeType_EndElement;
2286 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2287 reader_set_strvalue(reader, StringValue_LocalName, &local);
2288 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2290 return S_OK;
2293 /* [18] CDSect ::= CDStart CData CDEnd
2294 [19] CDStart ::= '<![CDATA['
2295 [20] CData ::= (Char* - (Char* ']]>' Char*))
2296 [21] CDEnd ::= ']]>' */
2297 static HRESULT reader_parse_cdata(xmlreader *reader)
2299 WCHAR *ptr;
2300 UINT start;
2302 if (reader->resumestate == XmlReadResumeState_CDATA)
2304 start = reader->resume[XmlReadResume_Body];
2305 ptr = reader_get_ptr(reader);
2307 else
2309 /* skip markup '<![CDATA[' */
2310 reader_skipn(reader, 9);
2311 reader_shrink(reader);
2312 ptr = reader_get_ptr(reader);
2313 start = reader_get_cur(reader);
2314 reader->nodetype = XmlNodeType_CDATA;
2315 reader->resume[XmlReadResume_Body] = start;
2316 reader->resumestate = XmlReadResumeState_CDATA;
2317 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2318 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2319 reader_set_strvalue(reader, StringValue_Value, NULL);
2322 while (*ptr)
2324 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2326 strval value;
2328 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2330 /* skip ']]>' */
2331 reader_skipn(reader, 3);
2332 TRACE("%s\n", debug_strval(reader, &value));
2334 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2335 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2336 reader_set_strvalue(reader, StringValue_Value, &value);
2337 reader->resume[XmlReadResume_Body] = 0;
2338 reader->resumestate = XmlReadResumeState_Initial;
2339 return S_OK;
2341 else
2343 /* Value normalization is not fully implemented, rules are:
2345 - single '\r' -> '\n';
2346 - sequence '\r\n' -> '\n', in this case value length changes;
2348 if (*ptr == '\r') *ptr = '\n';
2349 reader_skipn(reader, 1);
2350 ptr++;
2354 return S_OK;
2357 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2358 static HRESULT reader_parse_chardata(xmlreader *reader)
2360 WCHAR *ptr;
2361 UINT start;
2363 if (reader->resumestate == XmlReadResumeState_CharData)
2365 start = reader->resume[XmlReadResume_Body];
2366 ptr = reader_get_ptr(reader);
2368 else
2370 reader_shrink(reader);
2371 ptr = reader_get_ptr(reader);
2372 start = reader_get_cur(reader);
2373 /* There's no text */
2374 if (!*ptr || *ptr == '<') return S_OK;
2375 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2376 reader->resume[XmlReadResume_Body] = start;
2377 reader->resumestate = XmlReadResumeState_CharData;
2378 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2379 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2380 reader_set_strvalue(reader, StringValue_Value, NULL);
2383 while (*ptr)
2385 /* CDATA closing sequence ']]>' is not allowed */
2386 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2387 return WC_E_CDSECTEND;
2389 /* Found next markup part */
2390 if (ptr[0] == '<')
2392 strval value;
2394 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2395 reader_set_strvalue(reader, StringValue_Value, &value);
2396 reader->resume[XmlReadResume_Body] = 0;
2397 reader->resumestate = XmlReadResumeState_Initial;
2398 return S_OK;
2401 reader_skipn(reader, 1);
2403 /* this covers a case when text has leading whitespace chars */
2404 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2405 ptr++;
2408 return S_OK;
2411 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2412 static HRESULT reader_parse_content(xmlreader *reader)
2414 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2415 static const WCHAR etagW[] = {'<','/',0};
2416 static const WCHAR ampW[] = {'&',0};
2418 if (reader->resumestate != XmlReadResumeState_Initial)
2420 switch (reader->resumestate)
2422 case XmlReadResumeState_CDATA:
2423 return reader_parse_cdata(reader);
2424 case XmlReadResumeState_Comment:
2425 return reader_parse_comment(reader);
2426 case XmlReadResumeState_PIBody:
2427 case XmlReadResumeState_PITarget:
2428 return reader_parse_pi(reader);
2429 case XmlReadResumeState_CharData:
2430 return reader_parse_chardata(reader);
2431 default:
2432 ERR("unknown resume state %d\n", reader->resumestate);
2436 reader_shrink(reader);
2438 /* handle end tag here, it indicates end of content as well */
2439 if (!reader_cmp(reader, etagW))
2440 return reader_parse_endtag(reader);
2442 if (!reader_cmp(reader, commentW))
2443 return reader_parse_comment(reader);
2445 if (!reader_cmp(reader, piW))
2446 return reader_parse_pi(reader);
2448 if (!reader_cmp(reader, cdstartW))
2449 return reader_parse_cdata(reader);
2451 if (!reader_cmp(reader, ampW))
2452 return reader_parse_reference(reader);
2454 if (!reader_cmp(reader, ltW))
2455 return reader_parse_element(reader);
2457 /* what's left must be CharData */
2458 return reader_parse_chardata(reader);
2461 static HRESULT reader_parse_nextnode(xmlreader *reader)
2463 XmlNodeType nodetype = reader_get_nodetype(reader);
2464 HRESULT hr;
2466 if (!is_reader_pending(reader))
2467 reader_clear_attrs(reader);
2469 /* When moving from EndElement or empty element, pop its own namespace definitions */
2470 if (nodetype == XmlNodeType_Element && reader->is_empty_element)
2471 reader_pop_ns_nodes(reader, &reader->empty_element);
2472 else if (nodetype == XmlNodeType_EndElement)
2473 reader_pop_element(reader);
2475 while (1)
2477 switch (reader->instate)
2479 /* if it's a first call for a new input we need to detect stream encoding */
2480 case XmlReadInState_Initial:
2482 xml_encoding enc;
2484 hr = readerinput_growraw(reader->input);
2485 if (FAILED(hr)) return hr;
2487 /* try to detect encoding by BOM or data and set input code page */
2488 hr = readerinput_detectencoding(reader->input, &enc);
2489 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2490 if (FAILED(hr)) return hr;
2492 /* always switch first time cause we have to put something in */
2493 readerinput_switchencoding(reader->input, enc);
2495 /* parse xml declaration */
2496 hr = reader_parse_xmldecl(reader);
2497 if (FAILED(hr)) return hr;
2499 readerinput_shrinkraw(reader->input, -1);
2500 reader->instate = XmlReadInState_Misc_DTD;
2501 if (hr == S_OK) return hr;
2503 break;
2504 case XmlReadInState_Misc_DTD:
2505 hr = reader_parse_misc(reader);
2506 if (FAILED(hr)) return hr;
2508 if (hr == S_FALSE)
2509 reader->instate = XmlReadInState_DTD;
2510 else
2511 return hr;
2512 break;
2513 case XmlReadInState_DTD:
2514 hr = reader_parse_dtd(reader);
2515 if (FAILED(hr)) return hr;
2517 if (hr == S_OK)
2519 reader->instate = XmlReadInState_DTD_Misc;
2520 return hr;
2522 else
2523 reader->instate = XmlReadInState_Element;
2524 break;
2525 case XmlReadInState_DTD_Misc:
2526 hr = reader_parse_misc(reader);
2527 if (FAILED(hr)) return hr;
2529 if (hr == S_FALSE)
2530 reader->instate = XmlReadInState_Element;
2531 else
2532 return hr;
2533 break;
2534 case XmlReadInState_Element:
2535 return reader_parse_element(reader);
2536 case XmlReadInState_Content:
2537 return reader_parse_content(reader);
2538 case XmlReadInState_MiscEnd:
2539 hr = reader_parse_misc(reader);
2540 if (FAILED(hr)) return hr;
2542 if (hr == S_FALSE)
2543 reader->instate = XmlReadInState_Eof;
2544 return hr;
2545 case XmlReadInState_Eof:
2546 return S_FALSE;
2547 default:
2548 FIXME("internal state %d not handled\n", reader->instate);
2549 return E_NOTIMPL;
2553 return E_NOTIMPL;
2556 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2558 xmlreader *This = impl_from_IXmlReader(iface);
2560 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2562 if (IsEqualGUID(riid, &IID_IUnknown) ||
2563 IsEqualGUID(riid, &IID_IXmlReader))
2565 *ppvObject = iface;
2567 else
2569 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2570 *ppvObject = NULL;
2571 return E_NOINTERFACE;
2574 IXmlReader_AddRef(iface);
2576 return S_OK;
2579 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2581 xmlreader *This = impl_from_IXmlReader(iface);
2582 ULONG ref = InterlockedIncrement(&This->ref);
2583 TRACE("(%p)->(%d)\n", This, ref);
2584 return ref;
2587 static void reader_clear_ns(xmlreader *reader)
2589 struct ns *ns, *ns2;
2591 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2592 reader_free_strvalued(reader, &ns->prefix);
2593 reader_free_strvalued(reader, &ns->uri);
2594 reader_free(reader, ns);
2597 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2598 reader_free_strvalued(reader, &ns->uri);
2599 reader_free(reader, ns);
2603 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2605 xmlreader *This = impl_from_IXmlReader(iface);
2606 LONG ref = InterlockedDecrement(&This->ref);
2608 TRACE("(%p)->(%d)\n", This, ref);
2610 if (ref == 0)
2612 IMalloc *imalloc = This->imalloc;
2613 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2614 if (This->resolver) IXmlResolver_Release(This->resolver);
2615 if (This->mlang) IUnknown_Release(This->mlang);
2616 reader_clear_attrs(This);
2617 reader_clear_ns(This);
2618 reader_clear_elements(This);
2619 reader_free_strvalues(This);
2620 reader_free(This, This);
2621 if (imalloc) IMalloc_Release(imalloc);
2624 return ref;
2627 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2629 xmlreader *This = impl_from_IXmlReader(iface);
2630 IXmlReaderInput *readerinput;
2631 HRESULT hr;
2633 TRACE("(%p)->(%p)\n", This, input);
2635 if (This->input)
2637 readerinput_release_stream(This->input);
2638 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2639 This->input = NULL;
2642 This->line = This->pos = 0;
2643 reader_clear_elements(This);
2644 This->depth = 0;
2645 This->resumestate = XmlReadResumeState_Initial;
2646 memset(This->resume, 0, sizeof(This->resume));
2648 /* just reset current input */
2649 if (!input)
2651 This->state = XmlReadState_Initial;
2652 return S_OK;
2655 /* now try IXmlReaderInput, ISequentialStream, IStream */
2656 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2657 if (hr == S_OK)
2659 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2660 This->input = impl_from_IXmlReaderInput(readerinput);
2661 else
2663 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2664 readerinput, readerinput->lpVtbl);
2665 IUnknown_Release(readerinput);
2666 return E_FAIL;
2671 if (hr != S_OK || !readerinput)
2673 /* create IXmlReaderInput basing on supplied interface */
2674 hr = CreateXmlReaderInputWithEncodingName(input,
2675 This->imalloc, NULL, FALSE, NULL, &readerinput);
2676 if (hr != S_OK) return hr;
2677 This->input = impl_from_IXmlReaderInput(readerinput);
2680 /* set stream for supplied IXmlReaderInput */
2681 hr = readerinput_query_for_stream(This->input);
2682 if (hr == S_OK)
2684 This->state = XmlReadState_Initial;
2685 This->instate = XmlReadInState_Initial;
2688 return hr;
2691 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2693 xmlreader *This = impl_from_IXmlReader(iface);
2695 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2697 if (!value) return E_INVALIDARG;
2699 switch (property)
2701 case XmlReaderProperty_MultiLanguage:
2702 *value = (LONG_PTR)This->mlang;
2703 if (This->mlang)
2704 IUnknown_AddRef(This->mlang);
2705 break;
2706 case XmlReaderProperty_XmlResolver:
2707 *value = (LONG_PTR)This->resolver;
2708 if (This->resolver)
2709 IXmlResolver_AddRef(This->resolver);
2710 break;
2711 case XmlReaderProperty_DtdProcessing:
2712 *value = This->dtdmode;
2713 break;
2714 case XmlReaderProperty_ReadState:
2715 *value = This->state;
2716 break;
2717 default:
2718 FIXME("Unimplemented property (%u)\n", property);
2719 return E_NOTIMPL;
2722 return S_OK;
2725 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2727 xmlreader *This = impl_from_IXmlReader(iface);
2729 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2731 switch (property)
2733 case XmlReaderProperty_MultiLanguage:
2734 if (This->mlang)
2735 IUnknown_Release(This->mlang);
2736 This->mlang = (IUnknown*)value;
2737 if (This->mlang)
2738 IUnknown_AddRef(This->mlang);
2739 if (This->mlang)
2740 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2741 break;
2742 case XmlReaderProperty_XmlResolver:
2743 if (This->resolver)
2744 IXmlResolver_Release(This->resolver);
2745 This->resolver = (IXmlResolver*)value;
2746 if (This->resolver)
2747 IXmlResolver_AddRef(This->resolver);
2748 break;
2749 case XmlReaderProperty_DtdProcessing:
2750 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2751 This->dtdmode = value;
2752 break;
2753 case XmlReaderProperty_MaxElementDepth:
2754 FIXME("Ignoring MaxElementDepth %ld\n", value);
2755 break;
2756 default:
2757 FIXME("Unimplemented property (%u)\n", property);
2758 return E_NOTIMPL;
2761 return S_OK;
2764 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2766 xmlreader *This = impl_from_IXmlReader(iface);
2767 XmlNodeType oldtype = This->nodetype;
2768 HRESULT hr;
2770 TRACE("(%p)->(%p)\n", This, nodetype);
2772 if (This->state == XmlReadState_Closed) return S_FALSE;
2774 hr = reader_parse_nextnode(This);
2775 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2776 This->state = XmlReadState_Interactive;
2777 if (hr == S_OK)
2779 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2780 *nodetype = This->nodetype;
2783 return hr;
2786 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2788 xmlreader *This = impl_from_IXmlReader(iface);
2789 TRACE("(%p)->(%p)\n", This, node_type);
2791 *node_type = reader_get_nodetype(This);
2792 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2795 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2797 if (!reader->attr_count)
2798 return S_FALSE;
2800 reader->attr = LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry);
2801 reader_set_strvalue(reader, StringValue_Prefix, &reader->attr->prefix);
2802 reader_set_strvalue(reader, StringValue_LocalName, &reader->attr->localname);
2803 reader_set_strvalue(reader, StringValue_Value, &reader->attr->value);
2805 return S_OK;
2808 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2810 xmlreader *This = impl_from_IXmlReader(iface);
2812 TRACE("(%p)\n", This);
2814 return reader_move_to_first_attribute(This);
2817 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2819 xmlreader *This = impl_from_IXmlReader(iface);
2820 const struct list *next;
2822 TRACE("(%p)\n", This);
2824 if (!This->attr_count) return S_FALSE;
2826 if (!This->attr)
2827 return reader_move_to_first_attribute(This);
2829 next = list_next(&This->attrs, &This->attr->entry);
2830 if (next)
2832 This->attr = LIST_ENTRY(next, struct attribute, entry);
2833 reader_set_strvalue(This, StringValue_Prefix, &This->attr->prefix);
2834 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2835 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2838 return next ? S_OK : S_FALSE;
2841 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2842 LPCWSTR local_name,
2843 LPCWSTR namespaceUri)
2845 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2846 return E_NOTIMPL;
2849 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2851 xmlreader *This = impl_from_IXmlReader(iface);
2853 TRACE("(%p)\n", This);
2855 if (!This->attr_count) return S_FALSE;
2856 This->attr = NULL;
2858 /* FIXME: support other node types with 'attributes' like DTD */
2859 if (This->is_empty_element) {
2860 reader_set_strvalue(This, StringValue_LocalName, &This->empty_element.localname);
2861 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
2863 else {
2864 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2865 if (element) {
2866 reader_set_strvalue(This, StringValue_LocalName, &element->localname);
2867 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
2871 return S_OK;
2874 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2876 xmlreader *This = impl_from_IXmlReader(iface);
2878 TRACE("(%p)->(%p %p)\n", This, name, len);
2879 *name = This->strvalues[StringValue_QualifiedName].str;
2880 if (len) *len = This->strvalues[StringValue_QualifiedName].len;
2881 return S_OK;
2884 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
2886 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
2887 struct ns *ns;
2889 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
2890 if (strval_eq(reader, prefix, &ns->prefix))
2891 return ns;
2894 return NULL;
2897 static struct ns *reader_lookup_nsdef(xmlreader *reader)
2899 if (list_empty(&reader->nsdef))
2900 return NULL;
2902 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
2905 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
2907 xmlreader *This = impl_from_IXmlReader(iface);
2908 const strval *prefix = &This->strvalues[StringValue_Prefix];
2909 XmlNodeType nodetype;
2910 struct ns *ns;
2911 UINT length;
2913 TRACE("(%p %p %p)\n", iface, uri, len);
2915 if (!len)
2916 len = &length;
2918 *uri = NULL;
2919 *len = 0;
2921 switch ((nodetype = reader_get_nodetype(This)))
2923 case XmlNodeType_Attribute:
2925 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2926 '2','0','0','0','/','x','m','l','n','s','/',0};
2927 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2928 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
2929 const strval *local = &This->strvalues[StringValue_LocalName];
2931 /* check for reserved prefixes first */
2932 if ((strval_eq(This, prefix, &strval_empty) && strval_eq(This, local, &strval_xmlns)) ||
2933 strval_eq(This, prefix, &strval_xmlns))
2935 *uri = xmlns_uriW;
2936 *len = sizeof(xmlns_uriW)/sizeof(xmlns_uriW[0]) - 1;
2938 else if (strval_eq(This, prefix, &strval_xml)) {
2939 *uri = xml_uriW;
2940 *len = sizeof(xml_uriW)/sizeof(xml_uriW[0]) - 1;
2943 if (!*uri) {
2944 ns = reader_lookup_ns(This, prefix);
2945 if (ns) {
2946 *uri = ns->uri.str;
2947 *len = ns->uri.len;
2949 else {
2950 *uri = emptyW;
2951 *len = 0;
2955 break;
2956 case XmlNodeType_Element:
2957 case XmlNodeType_EndElement:
2959 ns = reader_lookup_ns(This, prefix);
2961 /* pick top default ns if any */
2962 if (!ns)
2963 ns = reader_lookup_nsdef(This);
2965 if (ns) {
2966 *uri = ns->uri.str;
2967 *len = ns->uri.len;
2969 else {
2970 *uri = emptyW;
2971 *len = 0;
2974 break;
2975 default:
2976 FIXME("Unhandled node type %d\n", nodetype);
2977 return E_NOTIMPL;
2980 return S_OK;
2983 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2985 xmlreader *This = impl_from_IXmlReader(iface);
2987 TRACE("(%p)->(%p %p)\n", This, name, len);
2988 *name = This->strvalues[StringValue_LocalName].str;
2989 if (len) *len = This->strvalues[StringValue_LocalName].len;
2990 return S_OK;
2993 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2995 xmlreader *This = impl_from_IXmlReader(iface);
2997 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2998 *prefix = This->strvalues[StringValue_Prefix].str;
2999 if (len) *len = This->strvalues[StringValue_Prefix].len;
3000 return S_OK;
3003 static BOOL is_namespace_definition(xmlreader *reader)
3005 const strval *local = &reader->strvalues[StringValue_LocalName];
3006 const strval *prefix = &reader->strvalues[StringValue_Prefix];
3008 if (reader_get_nodetype(reader) != XmlNodeType_Attribute)
3009 return FALSE;
3011 return ((strval_eq(reader, prefix, &strval_empty) && strval_eq(reader, local, &strval_xmlns)) ||
3012 strval_eq(reader, prefix, &strval_xmlns));
3015 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3017 xmlreader *reader = impl_from_IXmlReader(iface);
3018 strval *val = &reader->strvalues[StringValue_Value];
3020 TRACE("(%p)->(%p %p)\n", reader, value, len);
3022 *value = NULL;
3024 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
3026 XmlNodeType type;
3027 HRESULT hr;
3029 hr = IXmlReader_Read(iface, &type);
3030 if (FAILED(hr)) return hr;
3032 /* return if still pending, partially read values are not reported */
3033 if (is_reader_pending(reader)) return E_PENDING;
3036 if (!val->str)
3038 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3039 if (!ptr) return E_OUTOFMEMORY;
3040 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3041 ptr[val->len] = 0;
3042 val->str = ptr;
3045 /* For namespace definition attributes return values from namespace list */
3046 if (is_namespace_definition(reader)) {
3047 const strval *local = &reader->strvalues[StringValue_LocalName];
3048 struct ns *ns;
3050 ns = reader_lookup_ns(reader, local);
3051 if (!ns)
3052 ns = reader_lookup_nsdef(reader);
3054 val = &ns->uri;
3057 *value = val->str;
3058 if (len) *len = val->len;
3059 return S_OK;
3062 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3064 xmlreader *reader = impl_from_IXmlReader(iface);
3065 strval *val = &reader->strvalues[StringValue_Value];
3066 UINT len;
3068 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3070 /* Value is already allocated, chunked reads are not possible. */
3071 if (val->str) return S_FALSE;
3073 if (val->len)
3075 len = min(chunk_size, val->len);
3076 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
3077 val->start += len;
3078 val->len -= len;
3079 if (read) *read = len;
3082 return S_OK;
3085 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3086 LPCWSTR *baseUri,
3087 UINT *baseUri_length)
3089 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3090 return E_NOTIMPL;
3093 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3095 FIXME("(%p): stub\n", iface);
3096 return FALSE;
3099 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3101 xmlreader *This = impl_from_IXmlReader(iface);
3102 TRACE("(%p)\n", This);
3103 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3104 when current node is start tag of an element */
3105 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3108 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
3110 xmlreader *This = impl_from_IXmlReader(iface);
3112 TRACE("(%p %p)\n", This, lineNumber);
3114 if (!lineNumber) return E_INVALIDARG;
3116 *lineNumber = This->line;
3118 return S_OK;
3121 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
3123 xmlreader *This = impl_from_IXmlReader(iface);
3125 TRACE("(%p %p)\n", This, linePosition);
3127 if (!linePosition) return E_INVALIDARG;
3129 *linePosition = This->pos;
3131 return S_OK;
3134 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3136 xmlreader *This = impl_from_IXmlReader(iface);
3138 TRACE("(%p)->(%p)\n", This, count);
3140 if (!count) return E_INVALIDARG;
3142 *count = This->attr_count;
3143 return S_OK;
3146 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3148 xmlreader *This = impl_from_IXmlReader(iface);
3149 TRACE("(%p)->(%p)\n", This, depth);
3150 *depth = This->depth;
3151 return S_OK;
3154 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3156 FIXME("(%p): stub\n", iface);
3157 return FALSE;
3160 static const struct IXmlReaderVtbl xmlreader_vtbl =
3162 xmlreader_QueryInterface,
3163 xmlreader_AddRef,
3164 xmlreader_Release,
3165 xmlreader_SetInput,
3166 xmlreader_GetProperty,
3167 xmlreader_SetProperty,
3168 xmlreader_Read,
3169 xmlreader_GetNodeType,
3170 xmlreader_MoveToFirstAttribute,
3171 xmlreader_MoveToNextAttribute,
3172 xmlreader_MoveToAttributeByName,
3173 xmlreader_MoveToElement,
3174 xmlreader_GetQualifiedName,
3175 xmlreader_GetNamespaceUri,
3176 xmlreader_GetLocalName,
3177 xmlreader_GetPrefix,
3178 xmlreader_GetValue,
3179 xmlreader_ReadValueChunk,
3180 xmlreader_GetBaseUri,
3181 xmlreader_IsDefault,
3182 xmlreader_IsEmptyElement,
3183 xmlreader_GetLineNumber,
3184 xmlreader_GetLinePosition,
3185 xmlreader_GetAttributeCount,
3186 xmlreader_GetDepth,
3187 xmlreader_IsEOF
3190 /** IXmlReaderInput **/
3191 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3193 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3195 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3197 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3198 IsEqualGUID(riid, &IID_IUnknown))
3200 *ppvObject = iface;
3202 else
3204 WARN("interface %s not implemented\n", debugstr_guid(riid));
3205 *ppvObject = NULL;
3206 return E_NOINTERFACE;
3209 IUnknown_AddRef(iface);
3211 return S_OK;
3214 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3216 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3217 ULONG ref = InterlockedIncrement(&This->ref);
3218 TRACE("(%p)->(%d)\n", This, ref);
3219 return ref;
3222 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3224 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3225 LONG ref = InterlockedDecrement(&This->ref);
3227 TRACE("(%p)->(%d)\n", This, ref);
3229 if (ref == 0)
3231 IMalloc *imalloc = This->imalloc;
3232 if (This->input) IUnknown_Release(This->input);
3233 if (This->stream) ISequentialStream_Release(This->stream);
3234 if (This->buffer) free_input_buffer(This->buffer);
3235 readerinput_free(This, This->baseuri);
3236 readerinput_free(This, This);
3237 if (imalloc) IMalloc_Release(imalloc);
3240 return ref;
3243 static const struct IUnknownVtbl xmlreaderinputvtbl =
3245 xmlreaderinput_QueryInterface,
3246 xmlreaderinput_AddRef,
3247 xmlreaderinput_Release
3250 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3252 xmlreader *reader;
3253 int i;
3255 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3257 if (!IsEqualGUID(riid, &IID_IXmlReader))
3259 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
3260 return E_FAIL;
3263 if (imalloc)
3264 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3265 else
3266 reader = heap_alloc(sizeof(*reader));
3267 if(!reader) return E_OUTOFMEMORY;
3269 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3270 reader->ref = 1;
3271 reader->input = NULL;
3272 reader->state = XmlReadState_Closed;
3273 reader->instate = XmlReadInState_Initial;
3274 reader->resumestate = XmlReadResumeState_Initial;
3275 reader->dtdmode = DtdProcessing_Prohibit;
3276 reader->resolver = NULL;
3277 reader->mlang = NULL;
3278 reader->line = reader->pos = 0;
3279 reader->imalloc = imalloc;
3280 if (imalloc) IMalloc_AddRef(imalloc);
3281 reader->nodetype = XmlNodeType_None;
3282 list_init(&reader->attrs);
3283 reader->attr_count = 0;
3284 reader->attr = NULL;
3285 list_init(&reader->nsdef);
3286 list_init(&reader->ns);
3287 list_init(&reader->elements);
3288 reader->depth = 0;
3289 reader->max_depth = 256;
3290 reader->is_empty_element = FALSE;
3291 memset(reader->resume, 0, sizeof(reader->resume));
3293 for (i = 0; i < StringValue_Last; i++)
3294 reader->strvalues[i] = strval_empty;
3296 *obj = &reader->IXmlReader_iface;
3298 TRACE("returning iface %p\n", *obj);
3300 return S_OK;
3303 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3304 IMalloc *imalloc,
3305 LPCWSTR encoding,
3306 BOOL hint,
3307 LPCWSTR base_uri,
3308 IXmlReaderInput **ppInput)
3310 xmlreaderinput *readerinput;
3311 HRESULT hr;
3313 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3314 hint, wine_dbgstr_w(base_uri), ppInput);
3316 if (!stream || !ppInput) return E_INVALIDARG;
3318 if (imalloc)
3319 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3320 else
3321 readerinput = heap_alloc(sizeof(*readerinput));
3322 if(!readerinput) return E_OUTOFMEMORY;
3324 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3325 readerinput->ref = 1;
3326 readerinput->imalloc = imalloc;
3327 readerinput->stream = NULL;
3328 if (imalloc) IMalloc_AddRef(imalloc);
3329 readerinput->encoding = parse_encoding_name(encoding, -1);
3330 readerinput->hint = hint;
3331 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3332 readerinput->pending = 0;
3334 hr = alloc_input_buffer(readerinput);
3335 if (hr != S_OK)
3337 readerinput_free(readerinput, readerinput->baseuri);
3338 readerinput_free(readerinput, readerinput);
3339 if (imalloc) IMalloc_Release(imalloc);
3340 return hr;
3342 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3344 *ppInput = &readerinput->IXmlReaderInput_iface;
3346 TRACE("returning iface %p\n", *ppInput);
3348 return S_OK;