xmllite/reader: Return static empty string as xml declaration node value.
[wine.git] / dlls / xmllite / reader.c
blob1805758a31c3c8a9f7798f1f61fd77820b8ff703
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW[] = {'\"',0};
90 static const WCHAR quoteW[] = {'\'',0};
91 static const WCHAR ltW[] = {'<',0};
92 static const WCHAR gtW[] = {'>',0};
93 static const WCHAR commentW[] = {'<','!','-','-',0};
94 static const WCHAR piW[] = {'<','?',0};
96 static BOOL is_namestartchar(WCHAR ch);
98 static const char *debugstr_nodetype(XmlNodeType nodetype)
100 static const char * const type_names[] =
102 "None",
103 "Element",
104 "Attribute",
105 "Text",
106 "CDATA",
109 "ProcessingInstruction",
110 "Comment",
112 "DocumentType",
115 "Whitespace",
117 "EndElement",
119 "XmlDeclaration"
122 if (nodetype > _XmlNodeType_Last)
123 return wine_dbg_sprintf("unknown type=%d", nodetype);
125 return type_names[nodetype];
128 static const char *debugstr_reader_prop(XmlReaderProperty prop)
130 static const char * const prop_names[] =
132 "MultiLanguage",
133 "ConformanceLevel",
134 "RandomAccess",
135 "XmlResolver",
136 "DtdProcessing",
137 "ReadState",
138 "MaxElementDepth",
139 "MaxEntityExpansion"
142 if (prop > _XmlReaderProperty_Last)
143 return wine_dbg_sprintf("unknown property=%d", prop);
145 return prop_names[prop];
148 struct xml_encoding_data
150 const WCHAR *name;
151 xml_encoding enc;
152 UINT cp;
155 static const struct xml_encoding_data xml_encoding_map[] = {
156 { utf16W, XmlEncoding_UTF16, ~0 },
157 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
160 const WCHAR *get_encoding_name(xml_encoding encoding)
162 return xml_encoding_map[encoding].name;
165 xml_encoding get_encoding_from_codepage(UINT codepage)
167 int i;
168 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
170 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
172 return XmlEncoding_Unknown;
175 typedef struct
177 char *data;
178 UINT cur;
179 unsigned int allocated;
180 unsigned int written;
181 } encoded_buffer;
183 typedef struct input_buffer input_buffer;
185 typedef struct
187 IXmlReaderInput IXmlReaderInput_iface;
188 LONG ref;
189 /* reference passed on IXmlReaderInput creation, is kept when input is created */
190 IUnknown *input;
191 IMalloc *imalloc;
192 xml_encoding encoding;
193 BOOL hint;
194 WCHAR *baseuri;
195 /* stream reference set after SetInput() call from reader,
196 stored as sequential stream, cause currently
197 optimizations possible with IStream aren't implemented */
198 ISequentialStream *stream;
199 input_buffer *buffer;
200 unsigned int pending : 1;
201 } xmlreaderinput;
203 static const struct IUnknownVtbl xmlreaderinputvtbl;
205 /* Structure to hold parsed string of specific length.
207 Reader stores node value as 'start' pointer, on request
208 a null-terminated version of it is allocated.
210 To init a strval variable use reader_init_strval(),
211 to set strval as a reader value use reader_set_strval().
213 typedef struct
215 WCHAR *str; /* allocated null-terminated string */
216 UINT len; /* length in WCHARs, altered after ReadValueChunk */
217 UINT start; /* input position where value starts */
218 } strval;
220 static WCHAR emptyW[] = {0};
221 static WCHAR xmlW[] = {'x','m','l',0};
222 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
223 static const strval strval_empty = { emptyW };
224 static const strval strval_xml = { xmlW, 3 };
225 static const strval strval_xmlns = { xmlnsW, 5 };
227 struct reader_position
229 UINT line_number;
230 UINT line_position;
233 struct attribute
235 struct list entry;
236 strval prefix;
237 strval localname;
238 strval qname;
239 strval value;
240 struct reader_position position;
243 struct element
245 struct list entry;
246 strval prefix;
247 strval localname;
248 strval qname;
249 struct reader_position position;
252 struct ns
254 struct list entry;
255 strval prefix;
256 strval uri;
257 struct element *element;
260 typedef struct
262 IXmlReader IXmlReader_iface;
263 LONG ref;
264 xmlreaderinput *input;
265 IMalloc *imalloc;
266 XmlReadState state;
267 XmlReaderInternalState instate;
268 XmlReaderResumeState resumestate;
269 XmlNodeType nodetype;
270 DtdProcessing dtdmode;
271 IXmlResolver *resolver;
272 IUnknown *mlang;
273 struct reader_position position;
274 struct list attrs; /* attributes list for current node */
275 struct attribute *attr; /* current attribute */
276 UINT attr_count;
277 struct list nsdef;
278 struct list ns;
279 struct list elements;
280 strval strvalues[StringValue_Last];
281 UINT depth;
282 UINT max_depth;
283 BOOL is_empty_element;
284 struct element empty_element; /* used for empty elements without end tag <a />,
285 and to keep <?xml reader position */
286 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
287 } xmlreader;
289 struct input_buffer
291 encoded_buffer utf16;
292 encoded_buffer encoded;
293 UINT code_page;
294 xmlreaderinput *input;
297 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
299 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
302 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
304 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
307 /* reader memory allocation functions */
308 static inline void *reader_alloc(xmlreader *reader, size_t len)
310 return m_alloc(reader->imalloc, len);
313 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
315 void *ret = reader_alloc(reader, len);
316 if (ret)
317 memset(ret, 0, len);
318 return ret;
321 static inline void reader_free(xmlreader *reader, void *mem)
323 m_free(reader->imalloc, mem);
326 /* Just return pointer from offset, no attempt to read more. */
327 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
329 encoded_buffer *buffer = &reader->input->buffer->utf16;
330 return (WCHAR*)buffer->data + offset;
333 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
335 return v->str ? v->str : reader_get_ptr2(reader, v->start);
338 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
340 *dest = *src;
342 if (src->str != strval_empty.str)
344 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
345 if (!dest->str) return E_OUTOFMEMORY;
346 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
347 dest->str[dest->len] = 0;
348 dest->start = 0;
351 return S_OK;
354 /* reader input memory allocation functions */
355 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
357 return m_alloc(input->imalloc, len);
360 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
362 return m_realloc(input->imalloc, mem, len);
365 static inline void readerinput_free(xmlreaderinput *input, void *mem)
367 m_free(input->imalloc, mem);
370 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
372 LPWSTR ret = NULL;
374 if(str) {
375 DWORD size;
377 size = (strlenW(str)+1)*sizeof(WCHAR);
378 ret = readerinput_alloc(input, size);
379 if (ret) memcpy(ret, str, size);
382 return ret;
385 static void reader_clear_attrs(xmlreader *reader)
387 struct attribute *attr, *attr2;
388 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
390 reader_free(reader, attr);
392 list_init(&reader->attrs);
393 reader->attr_count = 0;
394 reader->attr = NULL;
397 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
398 while we are on a node with attributes */
399 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname,
400 strval *value, const struct reader_position *position)
402 struct attribute *attr;
404 attr = reader_alloc(reader, sizeof(*attr));
405 if (!attr) return E_OUTOFMEMORY;
407 if (prefix)
408 attr->prefix = *prefix;
409 else
410 memset(&attr->prefix, 0, sizeof(attr->prefix));
411 attr->localname = *localname;
412 attr->qname = qname ? *qname : *localname;
413 attr->value = *value;
414 attr->position = *position;
415 list_add_tail(&reader->attrs, &attr->entry);
416 reader->attr_count++;
418 return S_OK;
421 /* This one frees stored string value if needed */
422 static void reader_free_strvalued(xmlreader *reader, strval *v)
424 if (v->str != strval_empty.str)
426 reader_free(reader, v->str);
427 *v = strval_empty;
431 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
433 v->start = start;
434 v->len = len;
435 v->str = NULL;
438 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
440 return debugstr_wn(reader_get_strptr(reader, v), v->len);
443 /* used to initialize from constant string */
444 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
446 v->start = 0;
447 v->len = len;
448 v->str = str;
451 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
453 reader_free_strvalued(reader, &reader->strvalues[type]);
456 static void reader_free_strvalues(xmlreader *reader)
458 int type;
459 for (type = 0; type < StringValue_Last; type++)
460 reader_free_strvalue(reader, type);
463 /* This helper should only be used to test if strings are the same,
464 it doesn't try to sort. */
465 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
467 if (str1->len != str2->len) return 0;
468 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
471 static void reader_clear_elements(xmlreader *reader)
473 struct element *elem, *elem2;
474 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
476 reader_free_strvalued(reader, &elem->prefix);
477 reader_free_strvalued(reader, &elem->localname);
478 reader_free_strvalued(reader, &elem->qname);
479 reader_free(reader, elem);
481 list_init(&reader->elements);
482 reader->is_empty_element = FALSE;
485 static HRESULT reader_inc_depth(xmlreader *reader)
487 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
490 static void reader_dec_depth(xmlreader *reader)
492 if (reader->depth)
493 reader->depth--;
496 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
498 struct ns *ns;
499 HRESULT hr;
501 ns = reader_alloc(reader, sizeof(*ns));
502 if (!ns) return E_OUTOFMEMORY;
504 if (def)
505 memset(&ns->prefix, 0, sizeof(ns->prefix));
506 else {
507 hr = reader_strvaldup(reader, prefix, &ns->prefix);
508 if (FAILED(hr)) {
509 reader_free(reader, ns);
510 return hr;
514 hr = reader_strvaldup(reader, uri, &ns->uri);
515 if (FAILED(hr)) {
516 reader_free_strvalued(reader, &ns->prefix);
517 reader_free(reader, ns);
518 return hr;
521 ns->element = NULL;
522 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
523 return hr;
526 static void reader_free_element(xmlreader *reader, struct element *element)
528 reader_free_strvalued(reader, &element->prefix);
529 reader_free_strvalued(reader, &element->localname);
530 reader_free_strvalued(reader, &element->qname);
531 reader_free(reader, element);
534 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
536 struct ns *ns;
538 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
539 if (ns->element)
540 break;
541 ns->element = element;
544 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
545 if (ns->element)
546 break;
547 ns->element = element;
551 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
552 strval *qname, const struct reader_position *position)
554 struct element *element;
555 HRESULT hr;
557 element = reader_alloc_zero(reader, sizeof(*element));
558 if (!element)
559 return E_OUTOFMEMORY;
561 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK &&
562 (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK &&
563 (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK)
565 list_add_head(&reader->elements, &element->entry);
566 reader_mark_ns_nodes(reader, element);
567 reader->is_empty_element = FALSE;
568 element->position = *position;
570 else
571 reader_free_element(reader, element);
573 return hr;
576 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
578 struct ns *ns, *ns2;
580 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
581 if (ns->element != element)
582 break;
584 list_remove(&ns->entry);
585 reader_free_strvalued(reader, &ns->prefix);
586 reader_free_strvalued(reader, &ns->uri);
587 reader_free(reader, ns);
590 if (!list_empty(&reader->nsdef)) {
591 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
592 if (ns->element == element) {
593 list_remove(&ns->entry);
594 reader_free_strvalued(reader, &ns->prefix);
595 reader_free_strvalued(reader, &ns->uri);
596 reader_free(reader, ns);
601 static void reader_pop_element(xmlreader *reader)
603 struct element *element;
605 if (list_empty(&reader->elements))
606 return;
608 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
609 list_remove(&element->entry);
611 reader_pop_ns_nodes(reader, element);
612 reader_free_element(reader, element);
614 /* It was a root element, the rest is expected as Misc */
615 if (list_empty(&reader->elements))
616 reader->instate = XmlReadInState_MiscEnd;
619 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
620 means node value is to be determined. */
621 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
623 strval *v = &reader->strvalues[type];
625 reader_free_strvalue(reader, type);
626 if (!value)
628 v->str = NULL;
629 v->start = 0;
630 v->len = 0;
631 return;
634 if (value->str == strval_empty.str)
635 *v = *value;
636 else
638 if (type == StringValue_Value)
640 /* defer allocation for value string */
641 v->str = NULL;
642 v->start = value->start;
643 v->len = value->len;
645 else
647 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
648 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
649 v->str[value->len] = 0;
650 v->len = value->len;
655 static inline int is_reader_pending(xmlreader *reader)
657 return reader->input->pending;
660 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
662 const int initial_len = 0x2000;
663 buffer->data = readerinput_alloc(input, initial_len);
664 if (!buffer->data) return E_OUTOFMEMORY;
666 memset(buffer->data, 0, 4);
667 buffer->cur = 0;
668 buffer->allocated = initial_len;
669 buffer->written = 0;
671 return S_OK;
674 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
676 readerinput_free(input, buffer->data);
679 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
681 if (encoding == XmlEncoding_Unknown)
683 FIXME("unsupported encoding %d\n", encoding);
684 return E_NOTIMPL;
687 *cp = xml_encoding_map[encoding].cp;
689 return S_OK;
692 xml_encoding parse_encoding_name(const WCHAR *name, int len)
694 int min, max, n, c;
696 if (!name) return XmlEncoding_Unknown;
698 min = 0;
699 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
701 while (min <= max)
703 n = (min+max)/2;
705 if (len != -1)
706 c = strncmpiW(xml_encoding_map[n].name, name, len);
707 else
708 c = strcmpiW(xml_encoding_map[n].name, name);
709 if (!c)
710 return xml_encoding_map[n].enc;
712 if (c > 0)
713 max = n-1;
714 else
715 min = n+1;
718 return XmlEncoding_Unknown;
721 static HRESULT alloc_input_buffer(xmlreaderinput *input)
723 input_buffer *buffer;
724 HRESULT hr;
726 input->buffer = NULL;
728 buffer = readerinput_alloc(input, sizeof(*buffer));
729 if (!buffer) return E_OUTOFMEMORY;
731 buffer->input = input;
732 buffer->code_page = ~0; /* code page is unknown at this point */
733 hr = init_encoded_buffer(input, &buffer->utf16);
734 if (hr != S_OK) {
735 readerinput_free(input, buffer);
736 return hr;
739 hr = init_encoded_buffer(input, &buffer->encoded);
740 if (hr != S_OK) {
741 free_encoded_buffer(input, &buffer->utf16);
742 readerinput_free(input, buffer);
743 return hr;
746 input->buffer = buffer;
747 return S_OK;
750 static void free_input_buffer(input_buffer *buffer)
752 free_encoded_buffer(buffer->input, &buffer->encoded);
753 free_encoded_buffer(buffer->input, &buffer->utf16);
754 readerinput_free(buffer->input, buffer);
757 static void readerinput_release_stream(xmlreaderinput *readerinput)
759 if (readerinput->stream) {
760 ISequentialStream_Release(readerinput->stream);
761 readerinput->stream = NULL;
765 /* Queries already stored interface for IStream/ISequentialStream.
766 Interface supplied on creation will be overwritten */
767 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
769 HRESULT hr;
771 readerinput_release_stream(readerinput);
772 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
773 if (hr != S_OK)
774 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
776 return hr;
779 /* reads a chunk to raw buffer */
780 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
782 encoded_buffer *buffer = &readerinput->buffer->encoded;
783 /* to make sure aligned length won't exceed allocated length */
784 ULONG len = buffer->allocated - buffer->written - 4;
785 ULONG read;
786 HRESULT hr;
788 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
789 variable width encodings like UTF-8 */
790 len = (len + 3) & ~3;
791 /* try to use allocated space or grow */
792 if (buffer->allocated - buffer->written < len)
794 buffer->allocated *= 2;
795 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
796 len = buffer->allocated - buffer->written;
799 read = 0;
800 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
801 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
802 readerinput->pending = hr == E_PENDING;
803 if (FAILED(hr)) return hr;
804 buffer->written += read;
806 return hr;
809 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
810 static void readerinput_grow(xmlreaderinput *readerinput, int length)
812 encoded_buffer *buffer = &readerinput->buffer->utf16;
814 length *= sizeof(WCHAR);
815 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
816 if (buffer->allocated < buffer->written + length + 4)
818 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
819 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
820 buffer->allocated = grown_size;
824 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
826 static const char startA[] = {'<','?'};
827 static const char commentA[] = {'<','!'};
828 encoded_buffer *buffer = &readerinput->buffer->encoded;
829 unsigned char *ptr = (unsigned char*)buffer->data;
831 return !memcmp(buffer->data, startA, sizeof(startA)) ||
832 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
833 /* test start byte */
834 (ptr[0] == '<' &&
836 (ptr[1] && (ptr[1] <= 0x7f)) ||
837 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
838 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
839 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
843 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
845 encoded_buffer *buffer = &readerinput->buffer->encoded;
846 static const char utf8bom[] = {0xef,0xbb,0xbf};
847 static const char utf16lebom[] = {0xff,0xfe};
848 WCHAR *ptrW;
850 *enc = XmlEncoding_Unknown;
852 if (buffer->written <= 3)
854 HRESULT hr = readerinput_growraw(readerinput);
855 if (FAILED(hr)) return hr;
856 if (buffer->written <= 3) return MX_E_INPUTEND;
859 ptrW = (WCHAR *)buffer->data;
860 /* try start symbols if we have enough data to do that, input buffer should contain
861 first chunk already */
862 if (readerinput_is_utf8(readerinput))
863 *enc = XmlEncoding_UTF8;
864 else if (*ptrW == '<')
866 ptrW++;
867 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
868 *enc = XmlEncoding_UTF16;
870 /* try with BOM now */
871 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
873 buffer->cur += sizeof(utf8bom);
874 *enc = XmlEncoding_UTF8;
876 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
878 buffer->cur += sizeof(utf16lebom);
879 *enc = XmlEncoding_UTF16;
882 return S_OK;
885 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
887 encoded_buffer *buffer = &readerinput->buffer->encoded;
888 int len = buffer->written;
890 /* complete single byte char */
891 if (!(buffer->data[len-1] & 0x80)) return len;
893 /* find start byte of multibyte char */
894 while (--len && !(buffer->data[len] & 0xc0))
897 return len;
900 /* Returns byte length of complete char sequence for buffer code page,
901 it's relative to current buffer position which is currently used for BOM handling
902 only. */
903 static int readerinput_get_convlen(xmlreaderinput *readerinput)
905 encoded_buffer *buffer = &readerinput->buffer->encoded;
906 int len;
908 if (readerinput->buffer->code_page == CP_UTF8)
909 len = readerinput_get_utf8_convlen(readerinput);
910 else
911 len = buffer->written;
913 TRACE("%d\n", len - buffer->cur);
914 return len - buffer->cur;
917 /* It's possible that raw buffer has some leftovers from last conversion - some char
918 sequence that doesn't represent a full code point. Length argument should be calculated with
919 readerinput_get_convlen(), if it's -1 it will be calculated here. */
920 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
922 encoded_buffer *buffer = &readerinput->buffer->encoded;
924 if (len == -1)
925 len = readerinput_get_convlen(readerinput);
927 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
928 /* everything below cur is lost too */
929 buffer->written -= len + buffer->cur;
930 /* after this point we don't need cur offset really,
931 it's used only to mark where actual data begins when first chunk is read */
932 buffer->cur = 0;
935 /* note that raw buffer content is kept */
936 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
938 encoded_buffer *src = &readerinput->buffer->encoded;
939 encoded_buffer *dest = &readerinput->buffer->utf16;
940 int len, dest_len;
941 HRESULT hr;
942 WCHAR *ptr;
943 UINT cp;
945 hr = get_code_page(enc, &cp);
946 if (FAILED(hr)) return;
948 readerinput->buffer->code_page = cp;
949 len = readerinput_get_convlen(readerinput);
951 TRACE("switching to cp %d\n", cp);
953 /* just copy in this case */
954 if (enc == XmlEncoding_UTF16)
956 readerinput_grow(readerinput, len);
957 memcpy(dest->data, src->data + src->cur, len);
958 dest->written += len*sizeof(WCHAR);
959 return;
962 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
963 readerinput_grow(readerinput, dest_len);
964 ptr = (WCHAR*)dest->data;
965 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
966 ptr[dest_len] = 0;
967 dest->written += dest_len*sizeof(WCHAR);
970 /* shrinks parsed data a buffer begins with */
971 static void reader_shrink(xmlreader *reader)
973 encoded_buffer *buffer = &reader->input->buffer->utf16;
975 /* avoid to move too often using threshold shrink length */
976 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
978 buffer->written -= buffer->cur*sizeof(WCHAR);
979 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
980 buffer->cur = 0;
981 *(WCHAR*)&buffer->data[buffer->written] = 0;
985 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
986 It won't attempt to shrink but will grow destination buffer if needed */
987 static HRESULT reader_more(xmlreader *reader)
989 xmlreaderinput *readerinput = reader->input;
990 encoded_buffer *src = &readerinput->buffer->encoded;
991 encoded_buffer *dest = &readerinput->buffer->utf16;
992 UINT cp = readerinput->buffer->code_page;
993 int len, dest_len;
994 HRESULT hr;
995 WCHAR *ptr;
997 /* get some raw data from stream first */
998 hr = readerinput_growraw(readerinput);
999 len = readerinput_get_convlen(readerinput);
1001 /* just copy for UTF-16 case */
1002 if (cp == ~0)
1004 readerinput_grow(readerinput, len);
1005 memcpy(dest->data + dest->written, src->data + src->cur, len);
1006 dest->written += len*sizeof(WCHAR);
1007 return hr;
1010 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1011 readerinput_grow(readerinput, dest_len);
1012 ptr = (WCHAR*)(dest->data + dest->written);
1013 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1014 ptr[dest_len] = 0;
1015 dest->written += dest_len*sizeof(WCHAR);
1016 /* get rid of processed data */
1017 readerinput_shrinkraw(readerinput, len);
1019 return hr;
1022 static inline UINT reader_get_cur(xmlreader *reader)
1024 return reader->input->buffer->utf16.cur;
1027 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1029 encoded_buffer *buffer = &reader->input->buffer->utf16;
1030 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1031 if (!*ptr) reader_more(reader);
1032 return (WCHAR*)buffer->data + buffer->cur;
1035 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1037 int i=0;
1038 const WCHAR *ptr = reader_get_ptr(reader);
1039 while (str[i])
1041 if (!ptr[i])
1043 reader_more(reader);
1044 ptr = reader_get_ptr(reader);
1046 if (str[i] != ptr[i])
1047 return ptr[i] - str[i];
1048 i++;
1050 return 0;
1053 /* moves cursor n WCHARs forward */
1054 static void reader_skipn(xmlreader *reader, int n)
1056 encoded_buffer *buffer = &reader->input->buffer->utf16;
1057 const WCHAR *ptr = reader_get_ptr(reader);
1059 while (*ptr++ && n--)
1061 buffer->cur++;
1062 reader->position.line_position++;
1066 static inline BOOL is_wchar_space(WCHAR ch)
1068 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1071 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1072 static int reader_skipspaces(xmlreader *reader)
1074 encoded_buffer *buffer = &reader->input->buffer->utf16;
1075 const WCHAR *ptr = reader_get_ptr(reader);
1076 UINT start = reader_get_cur(reader);
1078 while (is_wchar_space(*ptr))
1080 if (*ptr == '\r')
1081 reader->position.line_position = 0;
1082 else if (*ptr == '\n')
1084 reader->position.line_number++;
1085 reader->position.line_position = 0;
1087 else
1088 reader->position.line_position++;
1090 buffer->cur++;
1091 ptr = reader_get_ptr(reader);
1094 return reader_get_cur(reader) - start;
1097 /* [26] VersionNum ::= '1.' [0-9]+ */
1098 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1100 static const WCHAR onedotW[] = {'1','.',0};
1101 WCHAR *ptr, *ptr2;
1102 UINT start;
1104 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1106 start = reader_get_cur(reader);
1107 /* skip "1." */
1108 reader_skipn(reader, 2);
1110 ptr2 = ptr = reader_get_ptr(reader);
1111 while (*ptr >= '0' && *ptr <= '9')
1113 reader_skipn(reader, 1);
1114 ptr = reader_get_ptr(reader);
1117 if (ptr2 == ptr) return WC_E_DIGIT;
1118 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1119 TRACE("version=%s\n", debug_strval(reader, val));
1120 return S_OK;
1123 /* [25] Eq ::= S? '=' S? */
1124 static HRESULT reader_parse_eq(xmlreader *reader)
1126 static const WCHAR eqW[] = {'=',0};
1127 reader_skipspaces(reader);
1128 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1129 /* skip '=' */
1130 reader_skipn(reader, 1);
1131 reader_skipspaces(reader);
1132 return S_OK;
1135 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1136 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1138 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1139 struct reader_position position;
1140 strval val, name;
1141 HRESULT hr;
1143 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1145 position = reader->position;
1146 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1147 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1148 /* skip 'version' */
1149 reader_skipn(reader, 7);
1151 hr = reader_parse_eq(reader);
1152 if (FAILED(hr)) return hr;
1154 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1155 return WC_E_QUOTE;
1156 /* skip "'"|'"' */
1157 reader_skipn(reader, 1);
1159 hr = reader_parse_versionnum(reader, &val);
1160 if (FAILED(hr)) return hr;
1162 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1163 return WC_E_QUOTE;
1165 /* skip "'"|'"' */
1166 reader_skipn(reader, 1);
1168 return reader_add_attr(reader, NULL, &name, NULL, &val, &position);
1171 /* ([A-Za-z0-9._] | '-') */
1172 static inline BOOL is_wchar_encname(WCHAR ch)
1174 return ((ch >= 'A' && ch <= 'Z') ||
1175 (ch >= 'a' && ch <= 'z') ||
1176 (ch >= '0' && ch <= '9') ||
1177 (ch == '.') || (ch == '_') ||
1178 (ch == '-'));
1181 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1182 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1184 WCHAR *start = reader_get_ptr(reader), *ptr;
1185 xml_encoding enc;
1186 int len;
1188 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1189 return WC_E_ENCNAME;
1191 val->start = reader_get_cur(reader);
1193 ptr = start;
1194 while (is_wchar_encname(*++ptr))
1197 len = ptr - start;
1198 enc = parse_encoding_name(start, len);
1199 TRACE("encoding name %s\n", debugstr_wn(start, len));
1200 val->str = start;
1201 val->len = len;
1203 if (enc == XmlEncoding_Unknown)
1204 return WC_E_ENCNAME;
1206 /* skip encoding name */
1207 reader_skipn(reader, len);
1208 return S_OK;
1211 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1212 static HRESULT reader_parse_encdecl(xmlreader *reader)
1214 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1215 struct reader_position position;
1216 strval name, val;
1217 HRESULT hr;
1219 if (!reader_skipspaces(reader)) return S_FALSE;
1221 position = reader->position;
1222 if (reader_cmp(reader, encodingW)) return S_FALSE;
1223 name.str = reader_get_ptr(reader);
1224 name.start = reader_get_cur(reader);
1225 name.len = 8;
1226 /* skip 'encoding' */
1227 reader_skipn(reader, 8);
1229 hr = reader_parse_eq(reader);
1230 if (FAILED(hr)) return hr;
1232 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1233 return WC_E_QUOTE;
1234 /* skip "'"|'"' */
1235 reader_skipn(reader, 1);
1237 hr = reader_parse_encname(reader, &val);
1238 if (FAILED(hr)) return hr;
1240 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1241 return WC_E_QUOTE;
1243 /* skip "'"|'"' */
1244 reader_skipn(reader, 1);
1246 return reader_add_attr(reader, NULL, &name, NULL, &val, &position);
1249 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1250 static HRESULT reader_parse_sddecl(xmlreader *reader)
1252 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1253 static const WCHAR yesW[] = {'y','e','s',0};
1254 static const WCHAR noW[] = {'n','o',0};
1255 struct reader_position position;
1256 strval name, val;
1257 UINT start;
1258 HRESULT hr;
1260 if (!reader_skipspaces(reader)) return S_FALSE;
1262 position = reader->position;
1263 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1264 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1265 /* skip 'standalone' */
1266 reader_skipn(reader, 10);
1268 hr = reader_parse_eq(reader);
1269 if (FAILED(hr)) return hr;
1271 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1272 return WC_E_QUOTE;
1273 /* skip "'"|'"' */
1274 reader_skipn(reader, 1);
1276 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1277 return WC_E_XMLDECL;
1279 start = reader_get_cur(reader);
1280 /* skip 'yes'|'no' */
1281 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1282 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1283 TRACE("standalone=%s\n", debug_strval(reader, &val));
1285 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1286 return WC_E_QUOTE;
1287 /* skip "'"|'"' */
1288 reader_skipn(reader, 1);
1290 return reader_add_attr(reader, NULL, &name, NULL, &val, &position);
1293 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1294 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1296 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1297 static const WCHAR declcloseW[] = {'?','>',0};
1298 struct reader_position position;
1299 HRESULT hr;
1301 /* check if we have "<?xml " */
1302 if (reader_cmp(reader, xmldeclW))
1303 return S_FALSE;
1305 reader_skipn(reader, 2);
1306 position = reader->position;
1307 reader_skipn(reader, 3);
1308 hr = reader_parse_versioninfo(reader);
1309 if (FAILED(hr))
1310 return hr;
1312 hr = reader_parse_encdecl(reader);
1313 if (FAILED(hr))
1314 return hr;
1316 hr = reader_parse_sddecl(reader);
1317 if (FAILED(hr))
1318 return hr;
1320 reader_skipspaces(reader);
1321 if (reader_cmp(reader, declcloseW))
1322 return WC_E_XMLDECL;
1324 /* skip '?>' */
1325 reader_skipn(reader, 2);
1327 reader->nodetype = XmlNodeType_XmlDeclaration;
1328 reader->empty_element.position = position;
1329 reader_set_strvalue(reader, StringValue_LocalName, &strval_xml);
1330 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml);
1332 return S_OK;
1335 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1336 static HRESULT reader_parse_comment(xmlreader *reader)
1338 WCHAR *ptr;
1339 UINT start;
1341 if (reader->resumestate == XmlReadResumeState_Comment)
1343 start = reader->resume[XmlReadResume_Body];
1344 ptr = reader_get_ptr(reader);
1346 else
1348 /* skip '<!--' */
1349 reader_skipn(reader, 4);
1350 reader_shrink(reader);
1351 ptr = reader_get_ptr(reader);
1352 start = reader_get_cur(reader);
1353 reader->nodetype = XmlNodeType_Comment;
1354 reader->resume[XmlReadResume_Body] = start;
1355 reader->resumestate = XmlReadResumeState_Comment;
1356 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1357 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1358 reader_set_strvalue(reader, StringValue_Value, NULL);
1361 /* will exit when there's no more data, it won't attempt to
1362 read more from stream */
1363 while (*ptr)
1365 if (ptr[0] == '-')
1367 if (ptr[1] == '-')
1369 if (ptr[2] == '>')
1371 strval value;
1373 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1374 TRACE("%s\n", debug_strval(reader, &value));
1376 /* skip rest of markup '->' */
1377 reader_skipn(reader, 3);
1379 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1380 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1381 reader_set_strvalue(reader, StringValue_Value, &value);
1382 reader->resume[XmlReadResume_Body] = 0;
1383 reader->resumestate = XmlReadResumeState_Initial;
1384 return S_OK;
1386 else
1387 return WC_E_COMMENT;
1391 reader_skipn(reader, 1);
1392 ptr++;
1395 return S_OK;
1398 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1399 static inline BOOL is_char(WCHAR ch)
1401 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1402 (ch >= 0x20 && ch <= 0xd7ff) ||
1403 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1404 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1405 (ch >= 0xe000 && ch <= 0xfffd);
1408 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1409 static inline BOOL is_pubchar(WCHAR ch)
1411 return (ch == ' ') ||
1412 (ch >= 'a' && ch <= 'z') ||
1413 (ch >= 'A' && ch <= 'Z') ||
1414 (ch >= '0' && ch <= '9') ||
1415 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1416 (ch == '=') || (ch == '?') ||
1417 (ch == '@') || (ch == '!') ||
1418 (ch >= '#' && ch <= '%') || /* #$% */
1419 (ch == '_') || (ch == '\r') || (ch == '\n');
1422 static inline BOOL is_namestartchar(WCHAR ch)
1424 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1425 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1426 (ch >= 0xc0 && ch <= 0xd6) ||
1427 (ch >= 0xd8 && ch <= 0xf6) ||
1428 (ch >= 0xf8 && ch <= 0x2ff) ||
1429 (ch >= 0x370 && ch <= 0x37d) ||
1430 (ch >= 0x37f && ch <= 0x1fff) ||
1431 (ch >= 0x200c && ch <= 0x200d) ||
1432 (ch >= 0x2070 && ch <= 0x218f) ||
1433 (ch >= 0x2c00 && ch <= 0x2fef) ||
1434 (ch >= 0x3001 && ch <= 0xd7ff) ||
1435 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1436 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1437 (ch >= 0xf900 && ch <= 0xfdcf) ||
1438 (ch >= 0xfdf0 && ch <= 0xfffd);
1441 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1442 static inline BOOL is_ncnamechar(WCHAR ch)
1444 return (ch >= 'A' && ch <= 'Z') ||
1445 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1446 (ch == '-') || (ch == '.') ||
1447 (ch >= '0' && ch <= '9') ||
1448 (ch == 0xb7) ||
1449 (ch >= 0xc0 && ch <= 0xd6) ||
1450 (ch >= 0xd8 && ch <= 0xf6) ||
1451 (ch >= 0xf8 && ch <= 0x2ff) ||
1452 (ch >= 0x300 && ch <= 0x36f) ||
1453 (ch >= 0x370 && ch <= 0x37d) ||
1454 (ch >= 0x37f && ch <= 0x1fff) ||
1455 (ch >= 0x200c && ch <= 0x200d) ||
1456 (ch >= 0x203f && ch <= 0x2040) ||
1457 (ch >= 0x2070 && ch <= 0x218f) ||
1458 (ch >= 0x2c00 && ch <= 0x2fef) ||
1459 (ch >= 0x3001 && ch <= 0xd7ff) ||
1460 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1461 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1462 (ch >= 0xf900 && ch <= 0xfdcf) ||
1463 (ch >= 0xfdf0 && ch <= 0xfffd);
1466 static inline BOOL is_namechar(WCHAR ch)
1468 return (ch == ':') || is_ncnamechar(ch);
1471 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1473 /* When we're on attribute always return attribute type, container node type is kept.
1474 Note that container is not necessarily an element, and attribute doesn't mean it's
1475 an attribute in XML spec terms. */
1476 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1479 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1480 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1481 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1482 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1483 [5] Name ::= NameStartChar (NameChar)* */
1484 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1486 WCHAR *ptr;
1487 UINT start;
1489 if (reader->resume[XmlReadResume_Name])
1491 start = reader->resume[XmlReadResume_Name];
1492 ptr = reader_get_ptr(reader);
1494 else
1496 ptr = reader_get_ptr(reader);
1497 start = reader_get_cur(reader);
1498 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1501 while (is_namechar(*ptr))
1503 reader_skipn(reader, 1);
1504 ptr = reader_get_ptr(reader);
1507 if (is_reader_pending(reader))
1509 reader->resume[XmlReadResume_Name] = start;
1510 return E_PENDING;
1512 else
1513 reader->resume[XmlReadResume_Name] = 0;
1515 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1516 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1518 return S_OK;
1521 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1522 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1524 static const WCHAR xmlW[] = {'x','m','l'};
1525 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1526 strval name;
1527 WCHAR *ptr;
1528 HRESULT hr;
1529 UINT i;
1531 hr = reader_parse_name(reader, &name);
1532 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1534 /* now that we got name check for illegal content */
1535 if (strval_eq(reader, &name, &xmlval))
1536 return WC_E_LEADINGXML;
1538 /* PITarget can't be a qualified name */
1539 ptr = reader_get_strptr(reader, &name);
1540 for (i = 0; i < name.len; i++)
1541 if (ptr[i] == ':')
1542 return i ? NC_E_NAMECOLON : WC_E_PI;
1544 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1545 *target = name;
1546 return S_OK;
1549 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1550 static HRESULT reader_parse_pi(xmlreader *reader)
1552 strval target;
1553 WCHAR *ptr;
1554 UINT start;
1555 HRESULT hr;
1557 switch (reader->resumestate)
1559 case XmlReadResumeState_Initial:
1560 /* skip '<?' */
1561 reader_skipn(reader, 2);
1562 reader_shrink(reader);
1563 reader->resumestate = XmlReadResumeState_PITarget;
1564 case XmlReadResumeState_PITarget:
1565 hr = reader_parse_pitarget(reader, &target);
1566 if (FAILED(hr)) return hr;
1567 reader_set_strvalue(reader, StringValue_LocalName, &target);
1568 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1569 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1570 reader->resumestate = XmlReadResumeState_PIBody;
1571 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1572 default:
1576 start = reader->resume[XmlReadResume_Body];
1577 ptr = reader_get_ptr(reader);
1578 while (*ptr)
1580 if (ptr[0] == '?')
1582 if (ptr[1] == '>')
1584 UINT cur = reader_get_cur(reader);
1585 strval value;
1587 /* strip all leading whitespace chars */
1588 while (start < cur)
1590 ptr = reader_get_ptr2(reader, start);
1591 if (!is_wchar_space(*ptr)) break;
1592 start++;
1595 reader_init_strvalue(start, cur-start, &value);
1597 /* skip '?>' */
1598 reader_skipn(reader, 2);
1599 TRACE("%s\n", debug_strval(reader, &value));
1600 reader->nodetype = XmlNodeType_ProcessingInstruction;
1601 reader->resumestate = XmlReadResumeState_Initial;
1602 reader->resume[XmlReadResume_Body] = 0;
1603 reader_set_strvalue(reader, StringValue_Value, &value);
1604 return S_OK;
1608 reader_skipn(reader, 1);
1609 ptr = reader_get_ptr(reader);
1612 return S_OK;
1615 /* This one is used to parse significant whitespace nodes, like in Misc production */
1616 static HRESULT reader_parse_whitespace(xmlreader *reader)
1618 switch (reader->resumestate)
1620 case XmlReadResumeState_Initial:
1621 reader_shrink(reader);
1622 reader->resumestate = XmlReadResumeState_Whitespace;
1623 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1624 reader->nodetype = XmlNodeType_Whitespace;
1625 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1626 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1627 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1628 /* fallthrough */
1629 case XmlReadResumeState_Whitespace:
1631 strval value;
1632 UINT start;
1634 reader_skipspaces(reader);
1635 if (is_reader_pending(reader)) return S_OK;
1637 start = reader->resume[XmlReadResume_Body];
1638 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1639 reader_set_strvalue(reader, StringValue_Value, &value);
1640 TRACE("%s\n", debug_strval(reader, &value));
1641 reader->resumestate = XmlReadResumeState_Initial;
1643 default:
1647 return S_OK;
1650 /* [27] Misc ::= Comment | PI | S */
1651 static HRESULT reader_parse_misc(xmlreader *reader)
1653 HRESULT hr = S_FALSE;
1655 if (reader->resumestate != XmlReadResumeState_Initial)
1657 hr = reader_more(reader);
1658 if (FAILED(hr)) return hr;
1660 /* finish current node */
1661 switch (reader->resumestate)
1663 case XmlReadResumeState_PITarget:
1664 case XmlReadResumeState_PIBody:
1665 return reader_parse_pi(reader);
1666 case XmlReadResumeState_Comment:
1667 return reader_parse_comment(reader);
1668 case XmlReadResumeState_Whitespace:
1669 return reader_parse_whitespace(reader);
1670 default:
1671 ERR("unknown resume state %d\n", reader->resumestate);
1675 while (1)
1677 const WCHAR *cur = reader_get_ptr(reader);
1679 if (is_wchar_space(*cur))
1680 hr = reader_parse_whitespace(reader);
1681 else if (!reader_cmp(reader, commentW))
1682 hr = reader_parse_comment(reader);
1683 else if (!reader_cmp(reader, piW))
1684 hr = reader_parse_pi(reader);
1685 else
1686 break;
1688 if (hr != S_FALSE) return hr;
1691 return hr;
1694 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1695 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1697 WCHAR *cur = reader_get_ptr(reader), quote;
1698 UINT start;
1700 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1702 quote = *cur;
1703 reader_skipn(reader, 1);
1705 cur = reader_get_ptr(reader);
1706 start = reader_get_cur(reader);
1707 while (is_char(*cur) && *cur != quote)
1709 reader_skipn(reader, 1);
1710 cur = reader_get_ptr(reader);
1712 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1713 if (*cur == quote) reader_skipn(reader, 1);
1715 TRACE("%s\n", debug_strval(reader, literal));
1716 return S_OK;
1719 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1720 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1721 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1723 WCHAR *cur = reader_get_ptr(reader), quote;
1724 UINT start;
1726 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1728 quote = *cur;
1729 reader_skipn(reader, 1);
1731 start = reader_get_cur(reader);
1732 cur = reader_get_ptr(reader);
1733 while (is_pubchar(*cur) && *cur != quote)
1735 reader_skipn(reader, 1);
1736 cur = reader_get_ptr(reader);
1738 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1739 if (*cur == quote) reader_skipn(reader, 1);
1741 TRACE("%s\n", debug_strval(reader, literal));
1742 return S_OK;
1745 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1746 static HRESULT reader_parse_externalid(xmlreader *reader)
1748 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1749 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1750 struct reader_position position = reader->position;
1751 strval name, sys;
1752 HRESULT hr;
1753 int cnt;
1755 if (!reader_cmp(reader, publicW)) {
1756 strval pub;
1758 /* public id */
1759 reader_skipn(reader, 6);
1760 cnt = reader_skipspaces(reader);
1761 if (!cnt) return WC_E_WHITESPACE;
1763 hr = reader_parse_pub_literal(reader, &pub);
1764 if (FAILED(hr)) return hr;
1766 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1767 hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position);
1768 if (FAILED(hr)) return hr;
1770 cnt = reader_skipspaces(reader);
1771 if (!cnt) return S_OK;
1773 /* optional system id */
1774 hr = reader_parse_sys_literal(reader, &sys);
1775 if (FAILED(hr)) return S_OK;
1777 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1778 hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position);
1779 if (FAILED(hr)) return hr;
1781 return S_OK;
1782 } else if (!reader_cmp(reader, systemW)) {
1783 /* system id */
1784 reader_skipn(reader, 6);
1785 cnt = reader_skipspaces(reader);
1786 if (!cnt) return WC_E_WHITESPACE;
1788 hr = reader_parse_sys_literal(reader, &sys);
1789 if (FAILED(hr)) return hr;
1791 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1792 return reader_add_attr(reader, NULL, &name, NULL, &sys, &position);
1795 return S_FALSE;
1798 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1799 static HRESULT reader_parse_dtd(xmlreader *reader)
1801 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1802 strval name;
1803 WCHAR *cur;
1804 HRESULT hr;
1806 /* check if we have "<!DOCTYPE" */
1807 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1808 reader_shrink(reader);
1810 /* DTD processing is not allowed by default */
1811 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1813 reader_skipn(reader, 9);
1814 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1816 /* name */
1817 hr = reader_parse_name(reader, &name);
1818 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1820 reader_skipspaces(reader);
1822 hr = reader_parse_externalid(reader);
1823 if (FAILED(hr)) return hr;
1825 reader_skipspaces(reader);
1827 cur = reader_get_ptr(reader);
1828 if (*cur != '>')
1830 FIXME("internal subset parsing not implemented\n");
1831 return E_NOTIMPL;
1834 /* skip '>' */
1835 reader_skipn(reader, 1);
1837 reader->nodetype = XmlNodeType_DocumentType;
1838 reader_set_strvalue(reader, StringValue_LocalName, &name);
1839 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1841 return S_OK;
1844 /* [11 NS] LocalPart ::= NCName */
1845 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1847 WCHAR *ptr;
1848 UINT start;
1850 if (reader->resume[XmlReadResume_Local])
1852 start = reader->resume[XmlReadResume_Local];
1853 ptr = reader_get_ptr(reader);
1855 else
1857 ptr = reader_get_ptr(reader);
1858 start = reader_get_cur(reader);
1861 while (is_ncnamechar(*ptr))
1863 reader_skipn(reader, 1);
1864 ptr = reader_get_ptr(reader);
1867 if (is_reader_pending(reader))
1869 reader->resume[XmlReadResume_Local] = start;
1870 return E_PENDING;
1872 else
1873 reader->resume[XmlReadResume_Local] = 0;
1875 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1877 return S_OK;
1880 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1881 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1882 [9 NS] UnprefixedName ::= LocalPart
1883 [10 NS] Prefix ::= NCName */
1884 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1886 WCHAR *ptr;
1887 UINT start;
1888 HRESULT hr;
1890 if (reader->resume[XmlReadResume_Name])
1892 start = reader->resume[XmlReadResume_Name];
1893 ptr = reader_get_ptr(reader);
1895 else
1897 ptr = reader_get_ptr(reader);
1898 start = reader_get_cur(reader);
1899 reader->resume[XmlReadResume_Name] = start;
1900 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1903 if (reader->resume[XmlReadResume_Local])
1905 hr = reader_parse_local(reader, local);
1906 if (FAILED(hr)) return hr;
1908 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1909 local->start - reader->resume[XmlReadResume_Name] - 1,
1910 prefix);
1912 else
1914 /* skip prefix part */
1915 while (is_ncnamechar(*ptr))
1917 reader_skipn(reader, 1);
1918 ptr = reader_get_ptr(reader);
1921 if (is_reader_pending(reader)) return E_PENDING;
1923 /* got a qualified name */
1924 if (*ptr == ':')
1926 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1928 /* skip ':' */
1929 reader_skipn(reader, 1);
1930 hr = reader_parse_local(reader, local);
1931 if (FAILED(hr)) return hr;
1933 else
1935 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1936 reader_init_strvalue(0, 0, prefix);
1940 if (prefix->len)
1941 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1942 else
1943 TRACE("ncname %s\n", debug_strval(reader, local));
1945 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1946 /* count ':' too */
1947 (prefix->len ? prefix->len + 1 : 0) + local->len,
1948 qname);
1950 reader->resume[XmlReadResume_Name] = 0;
1951 reader->resume[XmlReadResume_Local] = 0;
1953 return S_OK;
1956 /* Applies normalization rules to a single char, used for attribute values.
1958 Rules include 2 steps:
1960 1) replacing \r\n with a single \n;
1961 2) replacing all whitespace chars with ' '.
1964 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1966 encoded_buffer *buffer = &reader->input->buffer->utf16;
1968 if (!is_wchar_space(*ptr)) return;
1970 if (*ptr == '\r' && *(ptr+1) == '\n')
1972 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1973 memmove(ptr+1, ptr+2, len);
1975 *ptr = ' ';
1978 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1980 static const WCHAR entltW[] = {'l','t'};
1981 static const WCHAR entgtW[] = {'g','t'};
1982 static const WCHAR entampW[] = {'a','m','p'};
1983 static const WCHAR entaposW[] = {'a','p','o','s'};
1984 static const WCHAR entquotW[] = {'q','u','o','t'};
1985 static const strval lt = { (WCHAR*)entltW, 2 };
1986 static const strval gt = { (WCHAR*)entgtW, 2 };
1987 static const strval amp = { (WCHAR*)entampW, 3 };
1988 static const strval apos = { (WCHAR*)entaposW, 4 };
1989 static const strval quot = { (WCHAR*)entquotW, 4 };
1990 WCHAR *str = reader_get_strptr(reader, name);
1992 switch (*str)
1994 case 'l':
1995 if (strval_eq(reader, name, &lt)) return '<';
1996 break;
1997 case 'g':
1998 if (strval_eq(reader, name, &gt)) return '>';
1999 break;
2000 case 'a':
2001 if (strval_eq(reader, name, &amp))
2002 return '&';
2003 else if (strval_eq(reader, name, &apos))
2004 return '\'';
2005 break;
2006 case 'q':
2007 if (strval_eq(reader, name, &quot)) return '\"';
2008 break;
2009 default:
2013 return 0;
2016 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2017 [67] Reference ::= EntityRef | CharRef
2018 [68] EntityRef ::= '&' Name ';' */
2019 static HRESULT reader_parse_reference(xmlreader *reader)
2021 encoded_buffer *buffer = &reader->input->buffer->utf16;
2022 WCHAR *start = reader_get_ptr(reader), *ptr;
2023 UINT cur = reader_get_cur(reader);
2024 WCHAR ch = 0;
2025 int len;
2027 /* skip '&' */
2028 reader_skipn(reader, 1);
2029 ptr = reader_get_ptr(reader);
2031 if (*ptr == '#')
2033 reader_skipn(reader, 1);
2034 ptr = reader_get_ptr(reader);
2036 /* hex char or decimal */
2037 if (*ptr == 'x')
2039 reader_skipn(reader, 1);
2040 ptr = reader_get_ptr(reader);
2042 while (*ptr != ';')
2044 if ((*ptr >= '0' && *ptr <= '9'))
2045 ch = ch*16 + *ptr - '0';
2046 else if ((*ptr >= 'a' && *ptr <= 'f'))
2047 ch = ch*16 + *ptr - 'a' + 10;
2048 else if ((*ptr >= 'A' && *ptr <= 'F'))
2049 ch = ch*16 + *ptr - 'A' + 10;
2050 else
2051 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2052 reader_skipn(reader, 1);
2053 ptr = reader_get_ptr(reader);
2056 else
2058 while (*ptr != ';')
2060 if ((*ptr >= '0' && *ptr <= '9'))
2062 ch = ch*10 + *ptr - '0';
2063 reader_skipn(reader, 1);
2064 ptr = reader_get_ptr(reader);
2066 else
2067 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2071 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2073 /* normalize */
2074 if (is_wchar_space(ch)) ch = ' ';
2076 ptr = reader_get_ptr(reader);
2077 start = reader_get_ptr2(reader, cur);
2078 len = buffer->written - ((char *)ptr - buffer->data);
2079 memmove(start + 1, ptr + 1, len);
2081 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2082 buffer->cur = cur + 1;
2084 *start = ch;
2086 else
2088 strval name;
2089 HRESULT hr;
2091 hr = reader_parse_name(reader, &name);
2092 if (FAILED(hr)) return hr;
2094 ptr = reader_get_ptr(reader);
2095 if (*ptr != ';') return WC_E_SEMICOLON;
2097 /* predefined entities resolve to a single character */
2098 ch = get_predefined_entity(reader, &name);
2099 if (ch)
2101 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2102 memmove(start+1, ptr+1, len);
2103 buffer->cur = cur + 1;
2105 *start = ch;
2107 else
2109 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2110 return WC_E_UNDECLAREDENTITY;
2115 return S_OK;
2118 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2119 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2121 WCHAR *ptr, quote;
2122 UINT start;
2124 ptr = reader_get_ptr(reader);
2126 /* skip opening quote */
2127 quote = *ptr;
2128 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2129 reader_skipn(reader, 1);
2131 ptr = reader_get_ptr(reader);
2132 start = reader_get_cur(reader);
2133 while (*ptr)
2135 if (*ptr == '<') return WC_E_LESSTHAN;
2137 if (*ptr == quote)
2139 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2140 /* skip closing quote */
2141 reader_skipn(reader, 1);
2142 return S_OK;
2145 if (*ptr == '&')
2147 HRESULT hr = reader_parse_reference(reader);
2148 if (FAILED(hr)) return hr;
2150 else
2152 reader_normalize_space(reader, ptr);
2153 reader_skipn(reader, 1);
2155 ptr = reader_get_ptr(reader);
2158 return WC_E_QUOTE;
2161 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2162 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2163 [3 NS] DefaultAttName ::= 'xmlns'
2164 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2165 static HRESULT reader_parse_attribute(xmlreader *reader)
2167 struct reader_position position = reader->position;
2168 strval prefix, local, qname, value;
2169 BOOL ns = FALSE, nsdef = FALSE;
2170 HRESULT hr;
2172 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2173 if (FAILED(hr)) return hr;
2175 if (strval_eq(reader, &prefix, &strval_xmlns))
2176 ns = TRUE;
2178 if (strval_eq(reader, &qname, &strval_xmlns))
2179 ns = nsdef = TRUE;
2181 hr = reader_parse_eq(reader);
2182 if (FAILED(hr)) return hr;
2184 hr = reader_parse_attvalue(reader, &value);
2185 if (FAILED(hr)) return hr;
2187 if (ns)
2188 reader_push_ns(reader, nsdef ? &strval_xmlns : &local, &value, nsdef);
2190 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2191 return reader_add_attr(reader, &prefix, &local, &qname, &value, &position);
2194 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2195 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2196 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
2198 struct reader_position position = reader->position;
2199 HRESULT hr;
2201 hr = reader_parse_qname(reader, prefix, local, qname);
2202 if (FAILED(hr)) return hr;
2204 for (;;)
2206 static const WCHAR endW[] = {'/','>',0};
2208 reader_skipspaces(reader);
2210 /* empty element */
2211 if ((reader->is_empty_element = !reader_cmp(reader, endW)))
2213 /* skip '/>' */
2214 reader_skipn(reader, 2);
2215 reader->empty_element.prefix = *prefix;
2216 reader->empty_element.localname = *local;
2217 reader->empty_element.qname = *qname;
2218 reader->empty_element.position = position;
2219 reader_mark_ns_nodes(reader, &reader->empty_element);
2220 return S_OK;
2223 /* got a start tag */
2224 if (!reader_cmp(reader, gtW))
2226 /* skip '>' */
2227 reader_skipn(reader, 1);
2228 return reader_push_element(reader, prefix, local, qname, &position);
2231 hr = reader_parse_attribute(reader);
2232 if (FAILED(hr)) return hr;
2235 return S_OK;
2238 /* [39] element ::= EmptyElemTag | STag content ETag */
2239 static HRESULT reader_parse_element(xmlreader *reader)
2241 HRESULT hr;
2243 switch (reader->resumestate)
2245 case XmlReadResumeState_Initial:
2246 /* check if we are really on element */
2247 if (reader_cmp(reader, ltW)) return S_FALSE;
2249 /* skip '<' */
2250 reader_skipn(reader, 1);
2252 reader_shrink(reader);
2253 reader->resumestate = XmlReadResumeState_STag;
2254 case XmlReadResumeState_STag:
2256 strval qname, prefix, local;
2258 /* this handles empty elements too */
2259 hr = reader_parse_stag(reader, &prefix, &local, &qname);
2260 if (FAILED(hr)) return hr;
2262 /* FIXME: need to check for defined namespace to reject invalid prefix */
2264 /* if we got empty element and stack is empty go straight to Misc */
2265 if (reader->is_empty_element && list_empty(&reader->elements))
2266 reader->instate = XmlReadInState_MiscEnd;
2267 else
2268 reader->instate = XmlReadInState_Content;
2270 reader->nodetype = XmlNodeType_Element;
2271 reader->resumestate = XmlReadResumeState_Initial;
2272 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2273 reader_set_strvalue(reader, StringValue_LocalName, &local);
2274 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2275 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
2276 break;
2278 default:
2279 hr = E_FAIL;
2282 return hr;
2285 /* [13 NS] ETag ::= '</' QName S? '>' */
2286 static HRESULT reader_parse_endtag(xmlreader *reader)
2288 struct reader_position position;
2289 strval prefix, local, qname;
2290 struct element *element;
2291 HRESULT hr;
2293 /* skip '</' */
2294 reader_skipn(reader, 2);
2296 position = reader->position;
2297 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2298 if (FAILED(hr)) return hr;
2300 reader_skipspaces(reader);
2302 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2304 /* skip '>' */
2305 reader_skipn(reader, 1);
2307 /* Element stack should never be empty at this point, cause we shouldn't get to
2308 content parsing if it's empty. */
2309 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2310 if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH;
2312 /* update position stored for start tag, we won't be using it */
2313 element->position = position;
2315 reader->nodetype = XmlNodeType_EndElement;
2316 reader->is_empty_element = FALSE;
2317 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2319 return S_OK;
2322 /* [18] CDSect ::= CDStart CData CDEnd
2323 [19] CDStart ::= '<![CDATA['
2324 [20] CData ::= (Char* - (Char* ']]>' Char*))
2325 [21] CDEnd ::= ']]>' */
2326 static HRESULT reader_parse_cdata(xmlreader *reader)
2328 WCHAR *ptr;
2329 UINT start;
2331 if (reader->resumestate == XmlReadResumeState_CDATA)
2333 start = reader->resume[XmlReadResume_Body];
2334 ptr = reader_get_ptr(reader);
2336 else
2338 /* skip markup '<![CDATA[' */
2339 reader_skipn(reader, 9);
2340 reader_shrink(reader);
2341 ptr = reader_get_ptr(reader);
2342 start = reader_get_cur(reader);
2343 reader->nodetype = XmlNodeType_CDATA;
2344 reader->resume[XmlReadResume_Body] = start;
2345 reader->resumestate = XmlReadResumeState_CDATA;
2346 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2347 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2348 reader_set_strvalue(reader, StringValue_Value, NULL);
2351 while (*ptr)
2353 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2355 strval value;
2357 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2359 /* skip ']]>' */
2360 reader_skipn(reader, 3);
2361 TRACE("%s\n", debug_strval(reader, &value));
2363 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2364 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2365 reader_set_strvalue(reader, StringValue_Value, &value);
2366 reader->resume[XmlReadResume_Body] = 0;
2367 reader->resumestate = XmlReadResumeState_Initial;
2368 return S_OK;
2370 else
2372 /* Value normalization is not fully implemented, rules are:
2374 - single '\r' -> '\n';
2375 - sequence '\r\n' -> '\n', in this case value length changes;
2377 if (*ptr == '\r') *ptr = '\n';
2378 reader_skipn(reader, 1);
2379 ptr++;
2383 return S_OK;
2386 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2387 static HRESULT reader_parse_chardata(xmlreader *reader)
2389 WCHAR *ptr;
2390 UINT start;
2392 if (reader->resumestate == XmlReadResumeState_CharData)
2394 start = reader->resume[XmlReadResume_Body];
2395 ptr = reader_get_ptr(reader);
2397 else
2399 reader_shrink(reader);
2400 ptr = reader_get_ptr(reader);
2401 start = reader_get_cur(reader);
2402 /* There's no text */
2403 if (!*ptr || *ptr == '<') return S_OK;
2404 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2405 reader->resume[XmlReadResume_Body] = start;
2406 reader->resumestate = XmlReadResumeState_CharData;
2407 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2408 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2409 reader_set_strvalue(reader, StringValue_Value, NULL);
2412 while (*ptr)
2414 static const WCHAR ampW[] = {'&',0};
2416 /* CDATA closing sequence ']]>' is not allowed */
2417 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2418 return WC_E_CDSECTEND;
2420 /* Found next markup part */
2421 if (ptr[0] == '<')
2423 strval value;
2425 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2426 reader_set_strvalue(reader, StringValue_Value, &value);
2427 reader->resume[XmlReadResume_Body] = 0;
2428 reader->resumestate = XmlReadResumeState_Initial;
2429 return S_OK;
2432 /* this covers a case when text has leading whitespace chars */
2433 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2435 if (!reader_cmp(reader, ampW))
2436 reader_parse_reference(reader);
2437 else
2438 reader_skipn(reader, 1);
2440 ptr = reader_get_ptr(reader);
2443 return S_OK;
2446 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2447 static HRESULT reader_parse_content(xmlreader *reader)
2449 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2450 static const WCHAR etagW[] = {'<','/',0};
2452 if (reader->resumestate != XmlReadResumeState_Initial)
2454 switch (reader->resumestate)
2456 case XmlReadResumeState_CDATA:
2457 return reader_parse_cdata(reader);
2458 case XmlReadResumeState_Comment:
2459 return reader_parse_comment(reader);
2460 case XmlReadResumeState_PIBody:
2461 case XmlReadResumeState_PITarget:
2462 return reader_parse_pi(reader);
2463 case XmlReadResumeState_CharData:
2464 return reader_parse_chardata(reader);
2465 default:
2466 ERR("unknown resume state %d\n", reader->resumestate);
2470 reader_shrink(reader);
2472 /* handle end tag here, it indicates end of content as well */
2473 if (!reader_cmp(reader, etagW))
2474 return reader_parse_endtag(reader);
2476 if (!reader_cmp(reader, commentW))
2477 return reader_parse_comment(reader);
2479 if (!reader_cmp(reader, piW))
2480 return reader_parse_pi(reader);
2482 if (!reader_cmp(reader, cdstartW))
2483 return reader_parse_cdata(reader);
2485 if (!reader_cmp(reader, ltW))
2486 return reader_parse_element(reader);
2488 /* what's left must be CharData */
2489 return reader_parse_chardata(reader);
2492 static HRESULT reader_parse_nextnode(xmlreader *reader)
2494 XmlNodeType nodetype = reader_get_nodetype(reader);
2495 HRESULT hr;
2497 if (!is_reader_pending(reader))
2498 reader_clear_attrs(reader);
2500 /* When moving from EndElement or empty element, pop its own namespace definitions */
2501 switch (nodetype)
2503 case XmlNodeType_Attribute:
2504 reader_dec_depth(reader);
2505 /* fallthrough */
2506 case XmlNodeType_Element:
2507 if (reader->is_empty_element)
2508 reader_pop_ns_nodes(reader, &reader->empty_element);
2509 else if (FAILED(hr = reader_inc_depth(reader)))
2510 return hr;
2511 break;
2512 case XmlNodeType_EndElement:
2513 reader_pop_element(reader);
2514 reader_dec_depth(reader);
2515 break;
2516 default:
2520 for (;;)
2522 switch (reader->instate)
2524 /* if it's a first call for a new input we need to detect stream encoding */
2525 case XmlReadInState_Initial:
2527 xml_encoding enc;
2529 hr = readerinput_growraw(reader->input);
2530 if (FAILED(hr)) return hr;
2532 reader->position.line_number = 1;
2533 reader->position.line_position = 1;
2535 /* try to detect encoding by BOM or data and set input code page */
2536 hr = readerinput_detectencoding(reader->input, &enc);
2537 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2538 debugstr_w(xml_encoding_map[enc].name), hr);
2539 if (FAILED(hr)) return hr;
2541 /* always switch first time cause we have to put something in */
2542 readerinput_switchencoding(reader->input, enc);
2544 /* parse xml declaration */
2545 hr = reader_parse_xmldecl(reader);
2546 if (FAILED(hr)) return hr;
2548 readerinput_shrinkraw(reader->input, -1);
2549 reader->instate = XmlReadInState_Misc_DTD;
2550 if (hr == S_OK) return hr;
2552 break;
2553 case XmlReadInState_Misc_DTD:
2554 hr = reader_parse_misc(reader);
2555 if (FAILED(hr)) return hr;
2557 if (hr == S_FALSE)
2558 reader->instate = XmlReadInState_DTD;
2559 else
2560 return hr;
2561 break;
2562 case XmlReadInState_DTD:
2563 hr = reader_parse_dtd(reader);
2564 if (FAILED(hr)) return hr;
2566 if (hr == S_OK)
2568 reader->instate = XmlReadInState_DTD_Misc;
2569 return hr;
2571 else
2572 reader->instate = XmlReadInState_Element;
2573 break;
2574 case XmlReadInState_DTD_Misc:
2575 hr = reader_parse_misc(reader);
2576 if (FAILED(hr)) return hr;
2578 if (hr == S_FALSE)
2579 reader->instate = XmlReadInState_Element;
2580 else
2581 return hr;
2582 break;
2583 case XmlReadInState_Element:
2584 return reader_parse_element(reader);
2585 case XmlReadInState_Content:
2586 return reader_parse_content(reader);
2587 case XmlReadInState_MiscEnd:
2588 hr = reader_parse_misc(reader);
2589 if (FAILED(hr)) return hr;
2591 if (hr == S_FALSE)
2593 reader->instate = XmlReadInState_Eof;
2594 reader->state = XmlReadState_EndOfFile;
2595 reader->nodetype = XmlNodeType_None;
2597 return hr;
2598 case XmlReadInState_Eof:
2599 return S_FALSE;
2600 default:
2601 FIXME("internal state %d not handled\n", reader->instate);
2602 return E_NOTIMPL;
2606 return E_NOTIMPL;
2609 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2611 xmlreader *This = impl_from_IXmlReader(iface);
2613 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2615 if (IsEqualGUID(riid, &IID_IUnknown) ||
2616 IsEqualGUID(riid, &IID_IXmlReader))
2618 *ppvObject = iface;
2620 else
2622 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2623 *ppvObject = NULL;
2624 return E_NOINTERFACE;
2627 IXmlReader_AddRef(iface);
2629 return S_OK;
2632 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2634 xmlreader *This = impl_from_IXmlReader(iface);
2635 ULONG ref = InterlockedIncrement(&This->ref);
2636 TRACE("(%p)->(%d)\n", This, ref);
2637 return ref;
2640 static void reader_clear_ns(xmlreader *reader)
2642 struct ns *ns, *ns2;
2644 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2645 reader_free_strvalued(reader, &ns->prefix);
2646 reader_free_strvalued(reader, &ns->uri);
2647 reader_free(reader, ns);
2650 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2651 reader_free_strvalued(reader, &ns->uri);
2652 reader_free(reader, ns);
2656 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2658 xmlreader *This = impl_from_IXmlReader(iface);
2659 LONG ref = InterlockedDecrement(&This->ref);
2661 TRACE("(%p)->(%d)\n", This, ref);
2663 if (ref == 0)
2665 IMalloc *imalloc = This->imalloc;
2666 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2667 if (This->resolver) IXmlResolver_Release(This->resolver);
2668 if (This->mlang) IUnknown_Release(This->mlang);
2669 reader_clear_attrs(This);
2670 reader_clear_ns(This);
2671 reader_clear_elements(This);
2672 reader_free_strvalues(This);
2673 reader_free(This, This);
2674 if (imalloc) IMalloc_Release(imalloc);
2677 return ref;
2680 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2682 xmlreader *This = impl_from_IXmlReader(iface);
2683 IXmlReaderInput *readerinput;
2684 HRESULT hr;
2686 TRACE("(%p)->(%p)\n", This, input);
2688 if (This->input)
2690 readerinput_release_stream(This->input);
2691 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2692 This->input = NULL;
2695 This->position.line_number = 0;
2696 This->position.line_position = 0;
2697 reader_clear_elements(This);
2698 This->depth = 0;
2699 This->nodetype = XmlNodeType_None;
2700 This->resumestate = XmlReadResumeState_Initial;
2701 memset(This->resume, 0, sizeof(This->resume));
2703 /* just reset current input */
2704 if (!input)
2706 This->state = XmlReadState_Initial;
2707 return S_OK;
2710 /* now try IXmlReaderInput, ISequentialStream, IStream */
2711 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2712 if (hr == S_OK)
2714 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2715 This->input = impl_from_IXmlReaderInput(readerinput);
2716 else
2718 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2719 readerinput, readerinput->lpVtbl);
2720 IUnknown_Release(readerinput);
2721 return E_FAIL;
2726 if (hr != S_OK || !readerinput)
2728 /* create IXmlReaderInput basing on supplied interface */
2729 hr = CreateXmlReaderInputWithEncodingName(input,
2730 This->imalloc, NULL, FALSE, NULL, &readerinput);
2731 if (hr != S_OK) return hr;
2732 This->input = impl_from_IXmlReaderInput(readerinput);
2735 /* set stream for supplied IXmlReaderInput */
2736 hr = readerinput_query_for_stream(This->input);
2737 if (hr == S_OK)
2739 This->state = XmlReadState_Initial;
2740 This->instate = XmlReadInState_Initial;
2743 return hr;
2746 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2748 xmlreader *This = impl_from_IXmlReader(iface);
2750 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2752 if (!value) return E_INVALIDARG;
2754 switch (property)
2756 case XmlReaderProperty_MultiLanguage:
2757 *value = (LONG_PTR)This->mlang;
2758 if (This->mlang)
2759 IUnknown_AddRef(This->mlang);
2760 break;
2761 case XmlReaderProperty_XmlResolver:
2762 *value = (LONG_PTR)This->resolver;
2763 if (This->resolver)
2764 IXmlResolver_AddRef(This->resolver);
2765 break;
2766 case XmlReaderProperty_DtdProcessing:
2767 *value = This->dtdmode;
2768 break;
2769 case XmlReaderProperty_ReadState:
2770 *value = This->state;
2771 break;
2772 case XmlReaderProperty_MaxElementDepth:
2773 *value = This->max_depth;
2774 break;
2775 default:
2776 FIXME("Unimplemented property (%u)\n", property);
2777 return E_NOTIMPL;
2780 return S_OK;
2783 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2785 xmlreader *This = impl_from_IXmlReader(iface);
2787 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2789 switch (property)
2791 case XmlReaderProperty_MultiLanguage:
2792 if (This->mlang)
2793 IUnknown_Release(This->mlang);
2794 This->mlang = (IUnknown*)value;
2795 if (This->mlang)
2796 IUnknown_AddRef(This->mlang);
2797 if (This->mlang)
2798 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2799 break;
2800 case XmlReaderProperty_XmlResolver:
2801 if (This->resolver)
2802 IXmlResolver_Release(This->resolver);
2803 This->resolver = (IXmlResolver*)value;
2804 if (This->resolver)
2805 IXmlResolver_AddRef(This->resolver);
2806 break;
2807 case XmlReaderProperty_DtdProcessing:
2808 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2809 This->dtdmode = value;
2810 break;
2811 case XmlReaderProperty_MaxElementDepth:
2812 This->max_depth = value;
2813 break;
2814 default:
2815 FIXME("Unimplemented property (%u)\n", property);
2816 return E_NOTIMPL;
2819 return S_OK;
2822 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2824 xmlreader *This = impl_from_IXmlReader(iface);
2825 XmlNodeType oldtype = This->nodetype;
2826 HRESULT hr;
2828 TRACE("(%p)->(%p)\n", This, nodetype);
2830 if (This->state == XmlReadState_Closed) return S_FALSE;
2832 hr = reader_parse_nextnode(This);
2833 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2834 This->state = XmlReadState_Interactive;
2836 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2837 if (nodetype)
2838 *nodetype = This->nodetype;
2840 return hr;
2843 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2845 xmlreader *This = impl_from_IXmlReader(iface);
2847 TRACE("(%p)->(%p)\n", This, node_type);
2849 if (!node_type)
2850 return E_INVALIDARG;
2852 *node_type = reader_get_nodetype(This);
2853 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2856 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2858 if (!reader->attr_count)
2859 return S_FALSE;
2861 if (!reader->attr)
2862 reader_inc_depth(reader);
2864 reader->attr = LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry);
2865 reader_set_strvalue(reader, StringValue_Prefix, &reader->attr->prefix);
2866 reader_set_strvalue(reader, StringValue_LocalName, &reader->attr->localname);
2867 reader_set_strvalue(reader, StringValue_QualifiedName, &reader->attr->qname);
2868 reader_set_strvalue(reader, StringValue_Value, &reader->attr->value);
2870 return S_OK;
2873 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2875 xmlreader *This = impl_from_IXmlReader(iface);
2877 TRACE("(%p)\n", This);
2879 return reader_move_to_first_attribute(This);
2882 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2884 xmlreader *This = impl_from_IXmlReader(iface);
2885 const struct list *next;
2887 TRACE("(%p)\n", This);
2889 if (!This->attr_count) return S_FALSE;
2891 if (!This->attr)
2892 return reader_move_to_first_attribute(This);
2894 next = list_next(&This->attrs, &This->attr->entry);
2895 if (next)
2897 This->attr = LIST_ENTRY(next, struct attribute, entry);
2898 reader_set_strvalue(This, StringValue_Prefix, &This->attr->prefix);
2899 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2900 reader_set_strvalue(This, StringValue_QualifiedName, &This->attr->qname);
2901 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2904 return next ? S_OK : S_FALSE;
2907 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2908 LPCWSTR local_name,
2909 LPCWSTR namespaceUri)
2911 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2912 return E_NOTIMPL;
2915 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2917 xmlreader *This = impl_from_IXmlReader(iface);
2919 TRACE("(%p)\n", This);
2921 if (!This->attr_count) return S_FALSE;
2923 if (This->attr)
2924 reader_dec_depth(This);
2926 This->attr = NULL;
2928 /* FIXME: support other node types with 'attributes' like DTD */
2929 if (This->is_empty_element) {
2930 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
2931 reader_set_strvalue(This, StringValue_LocalName, &This->empty_element.localname);
2932 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
2934 else {
2935 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2936 if (element) {
2937 reader_set_strvalue(This, StringValue_Prefix, &element->prefix);
2938 reader_set_strvalue(This, StringValue_LocalName, &element->localname);
2939 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
2942 reader_set_strvalue(This, StringValue_Value, &strval_empty);
2944 return S_OK;
2947 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2949 xmlreader *This = impl_from_IXmlReader(iface);
2950 XmlNodeType nodetype;
2951 UINT length;
2953 TRACE("(%p)->(%p %p)\n", This, name, len);
2955 if (!len)
2956 len = &length;
2958 switch ((nodetype = reader_get_nodetype(This)))
2960 case XmlNodeType_Element:
2961 case XmlNodeType_EndElement:
2962 /* empty elements are not added to the stack */
2963 if (!This->is_empty_element)
2965 struct element *element;
2967 element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2968 *name = element->qname.str;
2969 *len = element->qname.len;
2970 break;
2972 /* fallthrough */
2973 default:
2974 *name = This->strvalues[StringValue_QualifiedName].str;
2975 *len = This->strvalues[StringValue_QualifiedName].len;
2976 break;
2979 return S_OK;
2982 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
2984 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
2985 struct ns *ns;
2987 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
2988 if (strval_eq(reader, prefix, &ns->prefix))
2989 return ns;
2992 return NULL;
2995 static struct ns *reader_lookup_nsdef(xmlreader *reader)
2997 if (list_empty(&reader->nsdef))
2998 return NULL;
3000 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
3003 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
3005 xmlreader *This = impl_from_IXmlReader(iface);
3006 const strval *prefix = &This->strvalues[StringValue_Prefix];
3007 XmlNodeType nodetype;
3008 struct ns *ns;
3009 UINT length;
3011 TRACE("(%p %p %p)\n", iface, uri, len);
3013 if (!len)
3014 len = &length;
3016 *uri = NULL;
3017 *len = 0;
3019 switch ((nodetype = reader_get_nodetype(This)))
3021 case XmlNodeType_Attribute:
3023 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3024 '2','0','0','0','/','x','m','l','n','s','/',0};
3025 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3026 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3027 const strval *local = &This->strvalues[StringValue_LocalName];
3029 /* check for reserved prefixes first */
3030 if ((strval_eq(This, prefix, &strval_empty) && strval_eq(This, local, &strval_xmlns)) ||
3031 strval_eq(This, prefix, &strval_xmlns))
3033 *uri = xmlns_uriW;
3034 *len = sizeof(xmlns_uriW)/sizeof(xmlns_uriW[0]) - 1;
3036 else if (strval_eq(This, prefix, &strval_xml)) {
3037 *uri = xml_uriW;
3038 *len = sizeof(xml_uriW)/sizeof(xml_uriW[0]) - 1;
3041 if (!*uri) {
3042 ns = reader_lookup_ns(This, prefix);
3043 if (ns) {
3044 *uri = ns->uri.str;
3045 *len = ns->uri.len;
3047 else {
3048 *uri = emptyW;
3049 *len = 0;
3053 break;
3054 case XmlNodeType_Element:
3055 case XmlNodeType_EndElement:
3057 ns = reader_lookup_ns(This, prefix);
3059 /* pick top default ns if any */
3060 if (!ns)
3061 ns = reader_lookup_nsdef(This);
3063 if (ns) {
3064 *uri = ns->uri.str;
3065 *len = ns->uri.len;
3067 else {
3068 *uri = emptyW;
3069 *len = 0;
3072 break;
3073 case XmlNodeType_Text:
3074 case XmlNodeType_CDATA:
3075 case XmlNodeType_ProcessingInstruction:
3076 case XmlNodeType_Comment:
3077 case XmlNodeType_Whitespace:
3078 case XmlNodeType_XmlDeclaration:
3079 *uri = emptyW;
3080 *len = 0;
3081 break;
3082 default:
3083 FIXME("Unhandled node type %d\n", nodetype);
3084 return E_NOTIMPL;
3087 return S_OK;
3090 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3092 xmlreader *This = impl_from_IXmlReader(iface);
3093 XmlNodeType nodetype;
3094 UINT length;
3096 TRACE("(%p)->(%p %p)\n", This, name, len);
3098 if (!len)
3099 len = &length;
3101 switch ((nodetype = reader_get_nodetype(This)))
3103 case XmlNodeType_Element:
3104 case XmlNodeType_EndElement:
3105 /* empty elements are not added to the stack */
3106 if (!This->is_empty_element)
3108 struct element *element;
3110 element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3111 *name = element->localname.str;
3112 *len = element->localname.len;
3113 break;
3115 /* fallthrough */
3116 default:
3117 *name = This->strvalues[StringValue_LocalName].str;
3118 *len = This->strvalues[StringValue_LocalName].len;
3119 break;
3122 return S_OK;
3125 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, const WCHAR **ret, UINT *len)
3127 xmlreader *This = impl_from_IXmlReader(iface);
3128 XmlNodeType nodetype;
3129 UINT length;
3131 TRACE("(%p)->(%p %p)\n", This, ret, len);
3133 if (!len)
3134 len = &length;
3136 *ret = emptyW;
3137 *len = 0;
3139 switch ((nodetype = reader_get_nodetype(This)))
3141 case XmlNodeType_Element:
3142 case XmlNodeType_EndElement:
3143 case XmlNodeType_Attribute:
3145 const strval *prefix = &This->strvalues[StringValue_Prefix];
3146 struct ns *ns;
3148 if (strval_eq(This, prefix, &strval_xml))
3150 *ret = xmlW;
3151 *len = 3;
3153 else if (strval_eq(This, prefix, &strval_xmlns))
3155 *ret = xmlnsW;
3156 *len = 5;
3158 else if ((ns = reader_lookup_ns(This, prefix)))
3160 *ret = ns->prefix.str;
3161 *len = ns->prefix.len;
3164 break;
3166 default:
3170 return S_OK;
3173 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3175 xmlreader *reader = impl_from_IXmlReader(iface);
3176 strval *val = &reader->strvalues[StringValue_Value];
3177 UINT length;
3179 TRACE("(%p)->(%p %p)\n", reader, value, len);
3181 *value = NULL;
3182 if (!len)
3183 len = &length;
3185 if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader))
3187 XmlNodeType type;
3188 HRESULT hr;
3190 hr = IXmlReader_Read(iface, &type);
3191 if (FAILED(hr)) return hr;
3193 /* return if still pending, partially read values are not reported */
3194 if (is_reader_pending(reader)) return E_PENDING;
3197 switch (reader_get_nodetype(reader))
3199 case XmlNodeType_XmlDeclaration:
3200 *value = emptyW;
3201 *len = 0;
3202 break;
3203 case XmlNodeType_Attribute:
3205 const strval *local = &reader->strvalues[StringValue_LocalName];
3206 const strval *prefix = &reader->strvalues[StringValue_Prefix];
3208 /* For namespace definition attributes return values from namespace list */
3209 if (((strval_eq(reader, prefix, &strval_empty) && strval_eq(reader, local, &strval_xmlns)) ||
3210 strval_eq(reader, prefix, &strval_xmlns)))
3212 struct ns *ns;
3214 if (!(ns = reader_lookup_ns(reader, local)))
3215 ns = reader_lookup_nsdef(reader);
3217 *value = ns->uri.str;
3218 *len = ns->uri.len;
3219 break;
3222 /* fallthrough */
3223 default:
3224 if (!val->str)
3226 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3227 if (!ptr) return E_OUTOFMEMORY;
3228 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3229 ptr[val->len] = 0;
3230 val->str = ptr;
3232 *value = val->str;
3233 *len = val->len;
3234 break;
3237 return S_OK;
3240 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3242 xmlreader *reader = impl_from_IXmlReader(iface);
3243 strval *val = &reader->strvalues[StringValue_Value];
3244 UINT len;
3246 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3248 /* Value is already allocated, chunked reads are not possible. */
3249 if (val->str) return S_FALSE;
3251 if (val->len)
3253 len = min(chunk_size, val->len);
3254 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
3255 val->start += len;
3256 val->len -= len;
3257 if (read) *read = len;
3260 return S_OK;
3263 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3264 LPCWSTR *baseUri,
3265 UINT *baseUri_length)
3267 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3268 return E_NOTIMPL;
3271 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3273 FIXME("(%p): stub\n", iface);
3274 return FALSE;
3277 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3279 xmlreader *This = impl_from_IXmlReader(iface);
3280 TRACE("(%p)\n", This);
3281 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3282 when current node is start tag of an element */
3283 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3286 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *line_number)
3288 xmlreader *This = impl_from_IXmlReader(iface);
3289 const struct element *element;
3291 TRACE("(%p %p)\n", This, line_number);
3293 if (!line_number)
3294 return E_INVALIDARG;
3296 switch (reader_get_nodetype(This))
3298 case XmlNodeType_Element:
3299 case XmlNodeType_EndElement:
3300 if (This->is_empty_element)
3301 element = &This->empty_element;
3302 else
3303 element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3305 *line_number = element->position.line_number;
3306 break;
3307 case XmlNodeType_Attribute:
3308 *line_number = This->attr->position.line_number;
3309 break;
3310 case XmlNodeType_XmlDeclaration:
3311 *line_number = This->empty_element.position.line_number;
3312 break;
3313 default:
3314 *line_number = This->position.line_number;
3315 break;
3318 return S_OK;
3321 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *line_position)
3323 xmlreader *This = impl_from_IXmlReader(iface);
3324 const struct element *element;
3326 TRACE("(%p %p)\n", This, line_position);
3328 if (!line_position)
3329 return E_INVALIDARG;
3331 switch (reader_get_nodetype(This))
3333 case XmlNodeType_Element:
3334 case XmlNodeType_EndElement:
3335 if (This->is_empty_element)
3336 element = &This->empty_element;
3337 else
3338 element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3340 *line_position = element->position.line_position;
3341 break;
3342 case XmlNodeType_Attribute:
3343 *line_position = This->attr->position.line_position;
3344 break;
3345 case XmlNodeType_XmlDeclaration:
3346 *line_position = This->empty_element.position.line_position;
3347 break;
3348 default:
3349 *line_position = This->position.line_position;
3350 break;
3353 return S_OK;
3356 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3358 xmlreader *This = impl_from_IXmlReader(iface);
3360 TRACE("(%p)->(%p)\n", This, count);
3362 if (!count) return E_INVALIDARG;
3364 *count = This->attr_count;
3365 return S_OK;
3368 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3370 xmlreader *This = impl_from_IXmlReader(iface);
3371 TRACE("(%p)->(%p)\n", This, depth);
3372 *depth = This->depth;
3373 return S_OK;
3376 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3378 xmlreader *This = impl_from_IXmlReader(iface);
3379 TRACE("(%p)\n", iface);
3380 return This->state == XmlReadState_EndOfFile;
3383 static const struct IXmlReaderVtbl xmlreader_vtbl =
3385 xmlreader_QueryInterface,
3386 xmlreader_AddRef,
3387 xmlreader_Release,
3388 xmlreader_SetInput,
3389 xmlreader_GetProperty,
3390 xmlreader_SetProperty,
3391 xmlreader_Read,
3392 xmlreader_GetNodeType,
3393 xmlreader_MoveToFirstAttribute,
3394 xmlreader_MoveToNextAttribute,
3395 xmlreader_MoveToAttributeByName,
3396 xmlreader_MoveToElement,
3397 xmlreader_GetQualifiedName,
3398 xmlreader_GetNamespaceUri,
3399 xmlreader_GetLocalName,
3400 xmlreader_GetPrefix,
3401 xmlreader_GetValue,
3402 xmlreader_ReadValueChunk,
3403 xmlreader_GetBaseUri,
3404 xmlreader_IsDefault,
3405 xmlreader_IsEmptyElement,
3406 xmlreader_GetLineNumber,
3407 xmlreader_GetLinePosition,
3408 xmlreader_GetAttributeCount,
3409 xmlreader_GetDepth,
3410 xmlreader_IsEOF
3413 /** IXmlReaderInput **/
3414 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3416 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3418 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3420 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3421 IsEqualGUID(riid, &IID_IUnknown))
3423 *ppvObject = iface;
3425 else
3427 WARN("interface %s not implemented\n", debugstr_guid(riid));
3428 *ppvObject = NULL;
3429 return E_NOINTERFACE;
3432 IUnknown_AddRef(iface);
3434 return S_OK;
3437 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3439 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3440 ULONG ref = InterlockedIncrement(&This->ref);
3441 TRACE("(%p)->(%d)\n", This, ref);
3442 return ref;
3445 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3447 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3448 LONG ref = InterlockedDecrement(&This->ref);
3450 TRACE("(%p)->(%d)\n", This, ref);
3452 if (ref == 0)
3454 IMalloc *imalloc = This->imalloc;
3455 if (This->input) IUnknown_Release(This->input);
3456 if (This->stream) ISequentialStream_Release(This->stream);
3457 if (This->buffer) free_input_buffer(This->buffer);
3458 readerinput_free(This, This->baseuri);
3459 readerinput_free(This, This);
3460 if (imalloc) IMalloc_Release(imalloc);
3463 return ref;
3466 static const struct IUnknownVtbl xmlreaderinputvtbl =
3468 xmlreaderinput_QueryInterface,
3469 xmlreaderinput_AddRef,
3470 xmlreaderinput_Release
3473 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3475 xmlreader *reader;
3476 int i;
3478 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3480 if (!IsEqualGUID(riid, &IID_IXmlReader))
3482 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
3483 return E_FAIL;
3486 if (imalloc)
3487 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3488 else
3489 reader = heap_alloc(sizeof(*reader));
3490 if(!reader) return E_OUTOFMEMORY;
3492 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3493 reader->ref = 1;
3494 reader->input = NULL;
3495 reader->state = XmlReadState_Closed;
3496 reader->instate = XmlReadInState_Initial;
3497 reader->resumestate = XmlReadResumeState_Initial;
3498 reader->dtdmode = DtdProcessing_Prohibit;
3499 reader->resolver = NULL;
3500 reader->mlang = NULL;
3501 reader->position.line_number = 0;
3502 reader->position.line_position = 0;
3503 reader->imalloc = imalloc;
3504 if (imalloc) IMalloc_AddRef(imalloc);
3505 reader->nodetype = XmlNodeType_None;
3506 list_init(&reader->attrs);
3507 reader->attr_count = 0;
3508 reader->attr = NULL;
3509 list_init(&reader->nsdef);
3510 list_init(&reader->ns);
3511 list_init(&reader->elements);
3512 reader->depth = 0;
3513 reader->max_depth = 256;
3514 reader->is_empty_element = FALSE;
3515 memset(reader->resume, 0, sizeof(reader->resume));
3517 for (i = 0; i < StringValue_Last; i++)
3518 reader->strvalues[i] = strval_empty;
3520 *obj = &reader->IXmlReader_iface;
3522 TRACE("returning iface %p\n", *obj);
3524 return S_OK;
3527 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3528 IMalloc *imalloc,
3529 LPCWSTR encoding,
3530 BOOL hint,
3531 LPCWSTR base_uri,
3532 IXmlReaderInput **ppInput)
3534 xmlreaderinput *readerinput;
3535 HRESULT hr;
3537 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3538 hint, wine_dbgstr_w(base_uri), ppInput);
3540 if (!stream || !ppInput) return E_INVALIDARG;
3542 if (imalloc)
3543 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3544 else
3545 readerinput = heap_alloc(sizeof(*readerinput));
3546 if(!readerinput) return E_OUTOFMEMORY;
3548 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3549 readerinput->ref = 1;
3550 readerinput->imalloc = imalloc;
3551 readerinput->stream = NULL;
3552 if (imalloc) IMalloc_AddRef(imalloc);
3553 readerinput->encoding = parse_encoding_name(encoding, -1);
3554 readerinput->hint = hint;
3555 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3556 readerinput->pending = 0;
3558 hr = alloc_input_buffer(readerinput);
3559 if (hr != S_OK)
3561 readerinput_free(readerinput, readerinput->baseuri);
3562 readerinput_free(readerinput, readerinput);
3563 if (imalloc) IMalloc_Release(imalloc);
3564 return hr;
3566 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3568 *ppInput = &readerinput->IXmlReaderInput_iface;
3570 TRACE("returning iface %p\n", *ppInput);
3572 return S_OK;