xmllite/reader: Return same string for local and qualified names for attributes when...
[wine.git] / dlls / xmllite / reader.c
blobb28a4b134aa2e8a1d65b249ea7ff5b73a81278d8
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW[] = {'\"',0};
90 static const WCHAR quoteW[] = {'\'',0};
91 static const WCHAR ltW[] = {'<',0};
92 static const WCHAR gtW[] = {'>',0};
93 static const WCHAR commentW[] = {'<','!','-','-',0};
94 static const WCHAR piW[] = {'<','?',0};
96 static BOOL is_namestartchar(WCHAR ch);
98 static const char *debugstr_nodetype(XmlNodeType nodetype)
100 static const char * const type_names[] =
102 "None",
103 "Element",
104 "Attribute",
105 "Text",
106 "CDATA",
109 "ProcessingInstruction",
110 "Comment",
112 "DocumentType",
115 "Whitespace",
117 "EndElement",
119 "XmlDeclaration"
122 if (nodetype > _XmlNodeType_Last)
123 return wine_dbg_sprintf("unknown type=%d", nodetype);
125 return type_names[nodetype];
128 static const char *debugstr_reader_prop(XmlReaderProperty prop)
130 static const char * const prop_names[] =
132 "MultiLanguage",
133 "ConformanceLevel",
134 "RandomAccess",
135 "XmlResolver",
136 "DtdProcessing",
137 "ReadState",
138 "MaxElementDepth",
139 "MaxEntityExpansion"
142 if (prop > _XmlReaderProperty_Last)
143 return wine_dbg_sprintf("unknown property=%d", prop);
145 return prop_names[prop];
148 struct xml_encoding_data
150 const WCHAR *name;
151 xml_encoding enc;
152 UINT cp;
155 static const struct xml_encoding_data xml_encoding_map[] = {
156 { utf16W, XmlEncoding_UTF16, ~0 },
157 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
160 const WCHAR *get_encoding_name(xml_encoding encoding)
162 return xml_encoding_map[encoding].name;
165 xml_encoding get_encoding_from_codepage(UINT codepage)
167 int i;
168 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
170 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
172 return XmlEncoding_Unknown;
175 typedef struct
177 char *data;
178 UINT cur;
179 unsigned int allocated;
180 unsigned int written;
181 } encoded_buffer;
183 typedef struct input_buffer input_buffer;
185 typedef struct
187 IXmlReaderInput IXmlReaderInput_iface;
188 LONG ref;
189 /* reference passed on IXmlReaderInput creation, is kept when input is created */
190 IUnknown *input;
191 IMalloc *imalloc;
192 xml_encoding encoding;
193 BOOL hint;
194 WCHAR *baseuri;
195 /* stream reference set after SetInput() call from reader,
196 stored as sequential stream, cause currently
197 optimizations possible with IStream aren't implemented */
198 ISequentialStream *stream;
199 input_buffer *buffer;
200 unsigned int pending : 1;
201 } xmlreaderinput;
203 static const struct IUnknownVtbl xmlreaderinputvtbl;
205 /* Structure to hold parsed string of specific length.
207 Reader stores node value as 'start' pointer, on request
208 a null-terminated version of it is allocated.
210 To init a strval variable use reader_init_strval(),
211 to set strval as a reader value use reader_set_strval().
213 typedef struct
215 WCHAR *str; /* allocated null-terminated string */
216 UINT len; /* length in WCHARs, altered after ReadValueChunk */
217 UINT start; /* input position where value starts */
218 } strval;
220 static WCHAR emptyW[] = {0};
221 static WCHAR xmlW[] = {'x','m','l',0};
222 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
223 static const strval strval_empty = { emptyW };
224 static const strval strval_xml = { xmlW, 3 };
225 static const strval strval_xmlns = { xmlnsW, 5 };
227 struct reader_position
229 UINT line_number;
230 UINT line_position;
233 enum attribute_flags
235 ATTRIBUTE_NS_DEFINITION = 0x1,
236 ATTRIBUTE_DEFAULT_NS_DEFINITION = 0x2,
239 struct attribute
241 struct list entry;
242 strval prefix;
243 strval localname;
244 strval qname;
245 strval value;
246 struct reader_position position;
247 unsigned int flags;
250 struct element
252 struct list entry;
253 strval prefix;
254 strval localname;
255 strval qname;
256 struct reader_position position;
259 struct ns
261 struct list entry;
262 strval prefix;
263 strval uri;
264 struct element *element;
267 typedef struct
269 IXmlReader IXmlReader_iface;
270 LONG ref;
271 xmlreaderinput *input;
272 IMalloc *imalloc;
273 XmlReadState state;
274 HRESULT error; /* error set on XmlReadState_Error */
275 XmlReaderInternalState instate;
276 XmlReaderResumeState resumestate;
277 XmlNodeType nodetype;
278 DtdProcessing dtdmode;
279 IXmlResolver *resolver;
280 IUnknown *mlang;
281 struct reader_position position;
282 struct list attrs; /* attributes list for current node */
283 struct attribute *attr; /* current attribute */
284 UINT attr_count;
285 struct list nsdef;
286 struct list ns;
287 struct list elements;
288 strval strvalues[StringValue_Last];
289 UINT depth;
290 UINT max_depth;
291 BOOL is_empty_element;
292 struct element empty_element; /* used for empty elements without end tag <a />,
293 and to keep <?xml reader position */
294 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
295 } xmlreader;
297 struct input_buffer
299 encoded_buffer utf16;
300 encoded_buffer encoded;
301 UINT code_page;
302 xmlreaderinput *input;
305 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
307 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
310 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
312 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
315 /* reader memory allocation functions */
316 static inline void *reader_alloc(xmlreader *reader, size_t len)
318 return m_alloc(reader->imalloc, len);
321 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
323 void *ret = reader_alloc(reader, len);
324 if (ret)
325 memset(ret, 0, len);
326 return ret;
329 static inline void reader_free(xmlreader *reader, void *mem)
331 m_free(reader->imalloc, mem);
334 /* Just return pointer from offset, no attempt to read more. */
335 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
337 encoded_buffer *buffer = &reader->input->buffer->utf16;
338 return (WCHAR*)buffer->data + offset;
341 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
343 return v->str ? v->str : reader_get_ptr2(reader, v->start);
346 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
348 *dest = *src;
350 if (src->str != strval_empty.str)
352 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
353 if (!dest->str) return E_OUTOFMEMORY;
354 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
355 dest->str[dest->len] = 0;
356 dest->start = 0;
359 return S_OK;
362 /* reader input memory allocation functions */
363 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
365 return m_alloc(input->imalloc, len);
368 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
370 return m_realloc(input->imalloc, mem, len);
373 static inline void readerinput_free(xmlreaderinput *input, void *mem)
375 m_free(input->imalloc, mem);
378 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
380 LPWSTR ret = NULL;
382 if(str) {
383 DWORD size;
385 size = (strlenW(str)+1)*sizeof(WCHAR);
386 ret = readerinput_alloc(input, size);
387 if (ret) memcpy(ret, str, size);
390 return ret;
393 static void reader_clear_attrs(xmlreader *reader)
395 struct attribute *attr, *attr2;
396 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
398 reader_free(reader, attr);
400 list_init(&reader->attrs);
401 reader->attr_count = 0;
402 reader->attr = NULL;
405 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
406 while we are on a node with attributes */
407 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname,
408 strval *value, const struct reader_position *position, unsigned int flags)
410 struct attribute *attr;
412 attr = reader_alloc(reader, sizeof(*attr));
413 if (!attr) return E_OUTOFMEMORY;
415 if (prefix)
416 attr->prefix = *prefix;
417 else
418 memset(&attr->prefix, 0, sizeof(attr->prefix));
419 attr->localname = *localname;
420 attr->qname = qname ? *qname : *localname;
421 attr->value = *value;
422 attr->position = *position;
423 attr->flags = flags;
424 list_add_tail(&reader->attrs, &attr->entry);
425 reader->attr_count++;
427 return S_OK;
430 /* Returns current element, doesn't check if reader is actually positioned on it. */
431 static struct element *reader_get_element(xmlreader *reader)
433 if (reader->is_empty_element)
434 return &reader->empty_element;
436 return LIST_ENTRY(list_head(&reader->elements), struct element, entry);
439 /* This one frees stored string value if needed */
440 static void reader_free_strvalued(xmlreader *reader, strval *v)
442 if (v->str != strval_empty.str)
444 reader_free(reader, v->str);
445 *v = strval_empty;
449 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
451 v->start = start;
452 v->len = len;
453 v->str = NULL;
456 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
458 return debugstr_wn(reader_get_strptr(reader, v), v->len);
461 /* used to initialize from constant string */
462 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
464 v->start = 0;
465 v->len = len;
466 v->str = str;
469 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
471 reader_free_strvalued(reader, &reader->strvalues[type]);
474 static void reader_free_strvalues(xmlreader *reader)
476 int type;
477 for (type = 0; type < StringValue_Last; type++)
478 reader_free_strvalue(reader, type);
481 /* This helper should only be used to test if strings are the same,
482 it doesn't try to sort. */
483 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
485 if (str1->len != str2->len) return 0;
486 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
489 static void reader_clear_elements(xmlreader *reader)
491 struct element *elem, *elem2;
492 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
494 reader_free_strvalued(reader, &elem->prefix);
495 reader_free_strvalued(reader, &elem->localname);
496 reader_free_strvalued(reader, &elem->qname);
497 reader_free(reader, elem);
499 list_init(&reader->elements);
500 reader_free_strvalued(reader, &reader->empty_element.localname);
501 reader_free_strvalued(reader, &reader->empty_element.qname);
502 reader->is_empty_element = FALSE;
505 static HRESULT reader_inc_depth(xmlreader *reader)
507 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
510 static void reader_dec_depth(xmlreader *reader)
512 if (reader->depth)
513 reader->depth--;
516 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
518 struct ns *ns;
519 HRESULT hr;
521 ns = reader_alloc(reader, sizeof(*ns));
522 if (!ns) return E_OUTOFMEMORY;
524 if (def)
525 memset(&ns->prefix, 0, sizeof(ns->prefix));
526 else {
527 hr = reader_strvaldup(reader, prefix, &ns->prefix);
528 if (FAILED(hr)) {
529 reader_free(reader, ns);
530 return hr;
534 hr = reader_strvaldup(reader, uri, &ns->uri);
535 if (FAILED(hr)) {
536 reader_free_strvalued(reader, &ns->prefix);
537 reader_free(reader, ns);
538 return hr;
541 ns->element = NULL;
542 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
543 return hr;
546 static void reader_free_element(xmlreader *reader, struct element *element)
548 reader_free_strvalued(reader, &element->prefix);
549 reader_free_strvalued(reader, &element->localname);
550 reader_free_strvalued(reader, &element->qname);
551 reader_free(reader, element);
554 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
556 struct ns *ns;
558 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
559 if (ns->element)
560 break;
561 ns->element = element;
564 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
565 if (ns->element)
566 break;
567 ns->element = element;
571 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
572 strval *qname, const struct reader_position *position)
574 struct element *element;
575 HRESULT hr;
577 element = reader_alloc_zero(reader, sizeof(*element));
578 if (!element)
579 return E_OUTOFMEMORY;
581 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK &&
582 (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK &&
583 (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK)
585 list_add_head(&reader->elements, &element->entry);
586 reader_mark_ns_nodes(reader, element);
587 reader->is_empty_element = FALSE;
588 element->position = *position;
590 else
591 reader_free_element(reader, element);
593 return hr;
596 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
598 struct ns *ns, *ns2;
600 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
601 if (ns->element != element)
602 break;
604 list_remove(&ns->entry);
605 reader_free_strvalued(reader, &ns->prefix);
606 reader_free_strvalued(reader, &ns->uri);
607 reader_free(reader, ns);
610 if (!list_empty(&reader->nsdef)) {
611 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
612 if (ns->element == element) {
613 list_remove(&ns->entry);
614 reader_free_strvalued(reader, &ns->prefix);
615 reader_free_strvalued(reader, &ns->uri);
616 reader_free(reader, ns);
621 static void reader_pop_element(xmlreader *reader)
623 struct element *element;
625 if (list_empty(&reader->elements))
626 return;
628 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
629 list_remove(&element->entry);
631 reader_pop_ns_nodes(reader, element);
632 reader_free_element(reader, element);
634 /* It was a root element, the rest is expected as Misc */
635 if (list_empty(&reader->elements))
636 reader->instate = XmlReadInState_MiscEnd;
639 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
640 means node value is to be determined. */
641 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
643 strval *v = &reader->strvalues[type];
645 reader_free_strvalue(reader, type);
646 if (!value)
648 v->str = NULL;
649 v->start = 0;
650 v->len = 0;
651 return;
654 if (value->str == strval_empty.str)
655 *v = *value;
656 else
658 if (type == StringValue_Value)
660 /* defer allocation for value string */
661 v->str = NULL;
662 v->start = value->start;
663 v->len = value->len;
665 else
667 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
668 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
669 v->str[value->len] = 0;
670 v->len = value->len;
675 static inline int is_reader_pending(xmlreader *reader)
677 return reader->input->pending;
680 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
682 const int initial_len = 0x2000;
683 buffer->data = readerinput_alloc(input, initial_len);
684 if (!buffer->data) return E_OUTOFMEMORY;
686 memset(buffer->data, 0, 4);
687 buffer->cur = 0;
688 buffer->allocated = initial_len;
689 buffer->written = 0;
691 return S_OK;
694 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
696 readerinput_free(input, buffer->data);
699 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
701 if (encoding == XmlEncoding_Unknown)
703 FIXME("unsupported encoding %d\n", encoding);
704 return E_NOTIMPL;
707 *cp = xml_encoding_map[encoding].cp;
709 return S_OK;
712 xml_encoding parse_encoding_name(const WCHAR *name, int len)
714 int min, max, n, c;
716 if (!name) return XmlEncoding_Unknown;
718 min = 0;
719 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
721 while (min <= max)
723 n = (min+max)/2;
725 if (len != -1)
726 c = strncmpiW(xml_encoding_map[n].name, name, len);
727 else
728 c = strcmpiW(xml_encoding_map[n].name, name);
729 if (!c)
730 return xml_encoding_map[n].enc;
732 if (c > 0)
733 max = n-1;
734 else
735 min = n+1;
738 return XmlEncoding_Unknown;
741 static HRESULT alloc_input_buffer(xmlreaderinput *input)
743 input_buffer *buffer;
744 HRESULT hr;
746 input->buffer = NULL;
748 buffer = readerinput_alloc(input, sizeof(*buffer));
749 if (!buffer) return E_OUTOFMEMORY;
751 buffer->input = input;
752 buffer->code_page = ~0; /* code page is unknown at this point */
753 hr = init_encoded_buffer(input, &buffer->utf16);
754 if (hr != S_OK) {
755 readerinput_free(input, buffer);
756 return hr;
759 hr = init_encoded_buffer(input, &buffer->encoded);
760 if (hr != S_OK) {
761 free_encoded_buffer(input, &buffer->utf16);
762 readerinput_free(input, buffer);
763 return hr;
766 input->buffer = buffer;
767 return S_OK;
770 static void free_input_buffer(input_buffer *buffer)
772 free_encoded_buffer(buffer->input, &buffer->encoded);
773 free_encoded_buffer(buffer->input, &buffer->utf16);
774 readerinput_free(buffer->input, buffer);
777 static void readerinput_release_stream(xmlreaderinput *readerinput)
779 if (readerinput->stream) {
780 ISequentialStream_Release(readerinput->stream);
781 readerinput->stream = NULL;
785 /* Queries already stored interface for IStream/ISequentialStream.
786 Interface supplied on creation will be overwritten */
787 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
789 HRESULT hr;
791 readerinput_release_stream(readerinput);
792 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
793 if (hr != S_OK)
794 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
796 return hr;
799 /* reads a chunk to raw buffer */
800 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
802 encoded_buffer *buffer = &readerinput->buffer->encoded;
803 /* to make sure aligned length won't exceed allocated length */
804 ULONG len = buffer->allocated - buffer->written - 4;
805 ULONG read;
806 HRESULT hr;
808 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
809 variable width encodings like UTF-8 */
810 len = (len + 3) & ~3;
811 /* try to use allocated space or grow */
812 if (buffer->allocated - buffer->written < len)
814 buffer->allocated *= 2;
815 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
816 len = buffer->allocated - buffer->written;
819 read = 0;
820 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
821 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
822 readerinput->pending = hr == E_PENDING;
823 if (FAILED(hr)) return hr;
824 buffer->written += read;
826 return hr;
829 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
830 static void readerinput_grow(xmlreaderinput *readerinput, int length)
832 encoded_buffer *buffer = &readerinput->buffer->utf16;
834 length *= sizeof(WCHAR);
835 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
836 if (buffer->allocated < buffer->written + length + 4)
838 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
839 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
840 buffer->allocated = grown_size;
844 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
846 static const char startA[] = {'<','?'};
847 static const char commentA[] = {'<','!'};
848 encoded_buffer *buffer = &readerinput->buffer->encoded;
849 unsigned char *ptr = (unsigned char*)buffer->data;
851 return !memcmp(buffer->data, startA, sizeof(startA)) ||
852 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
853 /* test start byte */
854 (ptr[0] == '<' &&
856 (ptr[1] && (ptr[1] <= 0x7f)) ||
857 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
858 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
859 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
863 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
865 encoded_buffer *buffer = &readerinput->buffer->encoded;
866 static const char utf8bom[] = {0xef,0xbb,0xbf};
867 static const char utf16lebom[] = {0xff,0xfe};
868 WCHAR *ptrW;
870 *enc = XmlEncoding_Unknown;
872 if (buffer->written <= 3)
874 HRESULT hr = readerinput_growraw(readerinput);
875 if (FAILED(hr)) return hr;
876 if (buffer->written <= 3) return MX_E_INPUTEND;
879 ptrW = (WCHAR *)buffer->data;
880 /* try start symbols if we have enough data to do that, input buffer should contain
881 first chunk already */
882 if (readerinput_is_utf8(readerinput))
883 *enc = XmlEncoding_UTF8;
884 else if (*ptrW == '<')
886 ptrW++;
887 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
888 *enc = XmlEncoding_UTF16;
890 /* try with BOM now */
891 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
893 buffer->cur += sizeof(utf8bom);
894 *enc = XmlEncoding_UTF8;
896 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
898 buffer->cur += sizeof(utf16lebom);
899 *enc = XmlEncoding_UTF16;
902 return S_OK;
905 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
907 encoded_buffer *buffer = &readerinput->buffer->encoded;
908 int len = buffer->written;
910 /* complete single byte char */
911 if (!(buffer->data[len-1] & 0x80)) return len;
913 /* find start byte of multibyte char */
914 while (--len && !(buffer->data[len] & 0xc0))
917 return len;
920 /* Returns byte length of complete char sequence for buffer code page,
921 it's relative to current buffer position which is currently used for BOM handling
922 only. */
923 static int readerinput_get_convlen(xmlreaderinput *readerinput)
925 encoded_buffer *buffer = &readerinput->buffer->encoded;
926 int len;
928 if (readerinput->buffer->code_page == CP_UTF8)
929 len = readerinput_get_utf8_convlen(readerinput);
930 else
931 len = buffer->written;
933 TRACE("%d\n", len - buffer->cur);
934 return len - buffer->cur;
937 /* It's possible that raw buffer has some leftovers from last conversion - some char
938 sequence that doesn't represent a full code point. Length argument should be calculated with
939 readerinput_get_convlen(), if it's -1 it will be calculated here. */
940 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
942 encoded_buffer *buffer = &readerinput->buffer->encoded;
944 if (len == -1)
945 len = readerinput_get_convlen(readerinput);
947 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
948 /* everything below cur is lost too */
949 buffer->written -= len + buffer->cur;
950 /* after this point we don't need cur offset really,
951 it's used only to mark where actual data begins when first chunk is read */
952 buffer->cur = 0;
955 /* note that raw buffer content is kept */
956 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
958 encoded_buffer *src = &readerinput->buffer->encoded;
959 encoded_buffer *dest = &readerinput->buffer->utf16;
960 int len, dest_len;
961 HRESULT hr;
962 WCHAR *ptr;
963 UINT cp;
965 hr = get_code_page(enc, &cp);
966 if (FAILED(hr)) return;
968 readerinput->buffer->code_page = cp;
969 len = readerinput_get_convlen(readerinput);
971 TRACE("switching to cp %d\n", cp);
973 /* just copy in this case */
974 if (enc == XmlEncoding_UTF16)
976 readerinput_grow(readerinput, len);
977 memcpy(dest->data, src->data + src->cur, len);
978 dest->written += len*sizeof(WCHAR);
979 return;
982 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
983 readerinput_grow(readerinput, dest_len);
984 ptr = (WCHAR*)dest->data;
985 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
986 ptr[dest_len] = 0;
987 dest->written += dest_len*sizeof(WCHAR);
990 /* shrinks parsed data a buffer begins with */
991 static void reader_shrink(xmlreader *reader)
993 encoded_buffer *buffer = &reader->input->buffer->utf16;
995 /* avoid to move too often using threshold shrink length */
996 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
998 buffer->written -= buffer->cur*sizeof(WCHAR);
999 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
1000 buffer->cur = 0;
1001 *(WCHAR*)&buffer->data[buffer->written] = 0;
1005 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1006 It won't attempt to shrink but will grow destination buffer if needed */
1007 static HRESULT reader_more(xmlreader *reader)
1009 xmlreaderinput *readerinput = reader->input;
1010 encoded_buffer *src = &readerinput->buffer->encoded;
1011 encoded_buffer *dest = &readerinput->buffer->utf16;
1012 UINT cp = readerinput->buffer->code_page;
1013 int len, dest_len;
1014 HRESULT hr;
1015 WCHAR *ptr;
1017 /* get some raw data from stream first */
1018 hr = readerinput_growraw(readerinput);
1019 len = readerinput_get_convlen(readerinput);
1021 /* just copy for UTF-16 case */
1022 if (cp == ~0)
1024 readerinput_grow(readerinput, len);
1025 memcpy(dest->data + dest->written, src->data + src->cur, len);
1026 dest->written += len*sizeof(WCHAR);
1027 return hr;
1030 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1031 readerinput_grow(readerinput, dest_len);
1032 ptr = (WCHAR*)(dest->data + dest->written);
1033 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1034 ptr[dest_len] = 0;
1035 dest->written += dest_len*sizeof(WCHAR);
1036 /* get rid of processed data */
1037 readerinput_shrinkraw(readerinput, len);
1039 return hr;
1042 static inline UINT reader_get_cur(xmlreader *reader)
1044 return reader->input->buffer->utf16.cur;
1047 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1049 encoded_buffer *buffer = &reader->input->buffer->utf16;
1050 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1051 if (!*ptr) reader_more(reader);
1052 return (WCHAR*)buffer->data + buffer->cur;
1055 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1057 int i=0;
1058 const WCHAR *ptr = reader_get_ptr(reader);
1059 while (str[i])
1061 if (!ptr[i])
1063 reader_more(reader);
1064 ptr = reader_get_ptr(reader);
1066 if (str[i] != ptr[i])
1067 return ptr[i] - str[i];
1068 i++;
1070 return 0;
1073 static void reader_update_position(xmlreader *reader, WCHAR ch)
1075 if (ch == '\r')
1076 reader->position.line_position = 1;
1077 else if (ch == '\n')
1079 reader->position.line_number++;
1080 reader->position.line_position = 1;
1082 else
1083 reader->position.line_position++;
1086 /* moves cursor n WCHARs forward */
1087 static void reader_skipn(xmlreader *reader, int n)
1089 encoded_buffer *buffer = &reader->input->buffer->utf16;
1090 const WCHAR *ptr;
1092 while (*(ptr = reader_get_ptr(reader)) && n--)
1094 reader_update_position(reader, *ptr);
1095 buffer->cur++;
1099 static inline BOOL is_wchar_space(WCHAR ch)
1101 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1104 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1105 static int reader_skipspaces(xmlreader *reader)
1107 const WCHAR *ptr = reader_get_ptr(reader);
1108 UINT start = reader_get_cur(reader);
1110 while (is_wchar_space(*ptr))
1112 reader_skipn(reader, 1);
1113 ptr = reader_get_ptr(reader);
1116 return reader_get_cur(reader) - start;
1119 /* [26] VersionNum ::= '1.' [0-9]+ */
1120 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1122 static const WCHAR onedotW[] = {'1','.',0};
1123 WCHAR *ptr, *ptr2;
1124 UINT start;
1126 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1128 start = reader_get_cur(reader);
1129 /* skip "1." */
1130 reader_skipn(reader, 2);
1132 ptr2 = ptr = reader_get_ptr(reader);
1133 while (*ptr >= '0' && *ptr <= '9')
1135 reader_skipn(reader, 1);
1136 ptr = reader_get_ptr(reader);
1139 if (ptr2 == ptr) return WC_E_DIGIT;
1140 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1141 TRACE("version=%s\n", debug_strval(reader, val));
1142 return S_OK;
1145 /* [25] Eq ::= S? '=' S? */
1146 static HRESULT reader_parse_eq(xmlreader *reader)
1148 static const WCHAR eqW[] = {'=',0};
1149 reader_skipspaces(reader);
1150 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1151 /* skip '=' */
1152 reader_skipn(reader, 1);
1153 reader_skipspaces(reader);
1154 return S_OK;
1157 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1158 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1160 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1161 struct reader_position position;
1162 strval val, name;
1163 HRESULT hr;
1165 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1167 position = reader->position;
1168 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1169 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1170 /* skip 'version' */
1171 reader_skipn(reader, 7);
1173 hr = reader_parse_eq(reader);
1174 if (FAILED(hr)) return hr;
1176 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1177 return WC_E_QUOTE;
1178 /* skip "'"|'"' */
1179 reader_skipn(reader, 1);
1181 hr = reader_parse_versionnum(reader, &val);
1182 if (FAILED(hr)) return hr;
1184 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1185 return WC_E_QUOTE;
1187 /* skip "'"|'"' */
1188 reader_skipn(reader, 1);
1190 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1193 /* ([A-Za-z0-9._] | '-') */
1194 static inline BOOL is_wchar_encname(WCHAR ch)
1196 return ((ch >= 'A' && ch <= 'Z') ||
1197 (ch >= 'a' && ch <= 'z') ||
1198 (ch >= '0' && ch <= '9') ||
1199 (ch == '.') || (ch == '_') ||
1200 (ch == '-'));
1203 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1204 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1206 WCHAR *start = reader_get_ptr(reader), *ptr;
1207 xml_encoding enc;
1208 int len;
1210 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1211 return WC_E_ENCNAME;
1213 val->start = reader_get_cur(reader);
1215 ptr = start;
1216 while (is_wchar_encname(*++ptr))
1219 len = ptr - start;
1220 enc = parse_encoding_name(start, len);
1221 TRACE("encoding name %s\n", debugstr_wn(start, len));
1222 val->str = start;
1223 val->len = len;
1225 if (enc == XmlEncoding_Unknown)
1226 return WC_E_ENCNAME;
1228 /* skip encoding name */
1229 reader_skipn(reader, len);
1230 return S_OK;
1233 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1234 static HRESULT reader_parse_encdecl(xmlreader *reader)
1236 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1237 struct reader_position position;
1238 strval name, val;
1239 HRESULT hr;
1241 if (!reader_skipspaces(reader)) return S_FALSE;
1243 position = reader->position;
1244 if (reader_cmp(reader, encodingW)) return S_FALSE;
1245 name.str = reader_get_ptr(reader);
1246 name.start = reader_get_cur(reader);
1247 name.len = 8;
1248 /* skip 'encoding' */
1249 reader_skipn(reader, 8);
1251 hr = reader_parse_eq(reader);
1252 if (FAILED(hr)) return hr;
1254 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1255 return WC_E_QUOTE;
1256 /* skip "'"|'"' */
1257 reader_skipn(reader, 1);
1259 hr = reader_parse_encname(reader, &val);
1260 if (FAILED(hr)) return hr;
1262 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1263 return WC_E_QUOTE;
1265 /* skip "'"|'"' */
1266 reader_skipn(reader, 1);
1268 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1271 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1272 static HRESULT reader_parse_sddecl(xmlreader *reader)
1274 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1275 static const WCHAR yesW[] = {'y','e','s',0};
1276 static const WCHAR noW[] = {'n','o',0};
1277 struct reader_position position;
1278 strval name, val;
1279 UINT start;
1280 HRESULT hr;
1282 if (!reader_skipspaces(reader)) return S_FALSE;
1284 position = reader->position;
1285 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1286 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1287 /* skip 'standalone' */
1288 reader_skipn(reader, 10);
1290 hr = reader_parse_eq(reader);
1291 if (FAILED(hr)) return hr;
1293 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1294 return WC_E_QUOTE;
1295 /* skip "'"|'"' */
1296 reader_skipn(reader, 1);
1298 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1299 return WC_E_XMLDECL;
1301 start = reader_get_cur(reader);
1302 /* skip 'yes'|'no' */
1303 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1304 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1305 TRACE("standalone=%s\n", debug_strval(reader, &val));
1307 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1308 return WC_E_QUOTE;
1309 /* skip "'"|'"' */
1310 reader_skipn(reader, 1);
1312 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1315 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1316 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1318 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1319 static const WCHAR declcloseW[] = {'?','>',0};
1320 struct reader_position position;
1321 HRESULT hr;
1323 /* check if we have "<?xml " */
1324 if (reader_cmp(reader, xmldeclW))
1325 return S_FALSE;
1327 reader_skipn(reader, 2);
1328 position = reader->position;
1329 reader_skipn(reader, 3);
1330 hr = reader_parse_versioninfo(reader);
1331 if (FAILED(hr))
1332 return hr;
1334 hr = reader_parse_encdecl(reader);
1335 if (FAILED(hr))
1336 return hr;
1338 hr = reader_parse_sddecl(reader);
1339 if (FAILED(hr))
1340 return hr;
1342 reader_skipspaces(reader);
1343 if (reader_cmp(reader, declcloseW))
1344 return WC_E_XMLDECL;
1346 /* skip '?>' */
1347 reader_skipn(reader, 2);
1349 reader->nodetype = XmlNodeType_XmlDeclaration;
1350 reader->empty_element.position = position;
1351 reader_set_strvalue(reader, StringValue_LocalName, &strval_xml);
1352 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml);
1354 return S_OK;
1357 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1358 static HRESULT reader_parse_comment(xmlreader *reader)
1360 WCHAR *ptr;
1361 UINT start;
1363 if (reader->resumestate == XmlReadResumeState_Comment)
1365 start = reader->resume[XmlReadResume_Body];
1366 ptr = reader_get_ptr(reader);
1368 else
1370 /* skip '<!--' */
1371 reader_skipn(reader, 4);
1372 reader_shrink(reader);
1373 ptr = reader_get_ptr(reader);
1374 start = reader_get_cur(reader);
1375 reader->nodetype = XmlNodeType_Comment;
1376 reader->resume[XmlReadResume_Body] = start;
1377 reader->resumestate = XmlReadResumeState_Comment;
1378 reader_set_strvalue(reader, StringValue_Value, NULL);
1381 /* will exit when there's no more data, it won't attempt to
1382 read more from stream */
1383 while (*ptr)
1385 if (ptr[0] == '-')
1387 if (ptr[1] == '-')
1389 if (ptr[2] == '>')
1391 strval value;
1393 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1394 TRACE("%s\n", debug_strval(reader, &value));
1396 /* skip rest of markup '->' */
1397 reader_skipn(reader, 3);
1399 reader_set_strvalue(reader, StringValue_Value, &value);
1400 reader->resume[XmlReadResume_Body] = 0;
1401 reader->resumestate = XmlReadResumeState_Initial;
1402 return S_OK;
1404 else
1405 return WC_E_COMMENT;
1409 reader_skipn(reader, 1);
1410 ptr++;
1413 return S_OK;
1416 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1417 static inline BOOL is_char(WCHAR ch)
1419 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1420 (ch >= 0x20 && ch <= 0xd7ff) ||
1421 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1422 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1423 (ch >= 0xe000 && ch <= 0xfffd);
1426 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1427 static inline BOOL is_pubchar(WCHAR ch)
1429 return (ch == ' ') ||
1430 (ch >= 'a' && ch <= 'z') ||
1431 (ch >= 'A' && ch <= 'Z') ||
1432 (ch >= '0' && ch <= '9') ||
1433 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1434 (ch == '=') || (ch == '?') ||
1435 (ch == '@') || (ch == '!') ||
1436 (ch >= '#' && ch <= '%') || /* #$% */
1437 (ch == '_') || (ch == '\r') || (ch == '\n');
1440 static inline BOOL is_namestartchar(WCHAR ch)
1442 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1443 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1444 (ch >= 0xc0 && ch <= 0xd6) ||
1445 (ch >= 0xd8 && ch <= 0xf6) ||
1446 (ch >= 0xf8 && ch <= 0x2ff) ||
1447 (ch >= 0x370 && ch <= 0x37d) ||
1448 (ch >= 0x37f && ch <= 0x1fff) ||
1449 (ch >= 0x200c && ch <= 0x200d) ||
1450 (ch >= 0x2070 && ch <= 0x218f) ||
1451 (ch >= 0x2c00 && ch <= 0x2fef) ||
1452 (ch >= 0x3001 && ch <= 0xd7ff) ||
1453 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1454 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1455 (ch >= 0xf900 && ch <= 0xfdcf) ||
1456 (ch >= 0xfdf0 && ch <= 0xfffd);
1459 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1460 static inline BOOL is_ncnamechar(WCHAR ch)
1462 return (ch >= 'A' && ch <= 'Z') ||
1463 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1464 (ch == '-') || (ch == '.') ||
1465 (ch >= '0' && ch <= '9') ||
1466 (ch == 0xb7) ||
1467 (ch >= 0xc0 && ch <= 0xd6) ||
1468 (ch >= 0xd8 && ch <= 0xf6) ||
1469 (ch >= 0xf8 && ch <= 0x2ff) ||
1470 (ch >= 0x300 && ch <= 0x36f) ||
1471 (ch >= 0x370 && ch <= 0x37d) ||
1472 (ch >= 0x37f && ch <= 0x1fff) ||
1473 (ch >= 0x200c && ch <= 0x200d) ||
1474 (ch >= 0x203f && ch <= 0x2040) ||
1475 (ch >= 0x2070 && ch <= 0x218f) ||
1476 (ch >= 0x2c00 && ch <= 0x2fef) ||
1477 (ch >= 0x3001 && ch <= 0xd7ff) ||
1478 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1479 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1480 (ch >= 0xf900 && ch <= 0xfdcf) ||
1481 (ch >= 0xfdf0 && ch <= 0xfffd);
1484 static inline BOOL is_namechar(WCHAR ch)
1486 return (ch == ':') || is_ncnamechar(ch);
1489 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1491 /* When we're on attribute always return attribute type, container node type is kept.
1492 Note that container is not necessarily an element, and attribute doesn't mean it's
1493 an attribute in XML spec terms. */
1494 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1497 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1498 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1499 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1500 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1501 [5] Name ::= NameStartChar (NameChar)* */
1502 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1504 WCHAR *ptr;
1505 UINT start;
1507 if (reader->resume[XmlReadResume_Name])
1509 start = reader->resume[XmlReadResume_Name];
1510 ptr = reader_get_ptr(reader);
1512 else
1514 ptr = reader_get_ptr(reader);
1515 start = reader_get_cur(reader);
1516 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1519 while (is_namechar(*ptr))
1521 reader_skipn(reader, 1);
1522 ptr = reader_get_ptr(reader);
1525 if (is_reader_pending(reader))
1527 reader->resume[XmlReadResume_Name] = start;
1528 return E_PENDING;
1530 else
1531 reader->resume[XmlReadResume_Name] = 0;
1533 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1534 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1536 return S_OK;
1539 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1540 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1542 static const WCHAR xmlW[] = {'x','m','l'};
1543 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1544 strval name;
1545 WCHAR *ptr;
1546 HRESULT hr;
1547 UINT i;
1549 hr = reader_parse_name(reader, &name);
1550 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1552 /* now that we got name check for illegal content */
1553 if (strval_eq(reader, &name, &xmlval))
1554 return WC_E_LEADINGXML;
1556 /* PITarget can't be a qualified name */
1557 ptr = reader_get_strptr(reader, &name);
1558 for (i = 0; i < name.len; i++)
1559 if (ptr[i] == ':')
1560 return i ? NC_E_NAMECOLON : WC_E_PI;
1562 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1563 *target = name;
1564 return S_OK;
1567 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1568 static HRESULT reader_parse_pi(xmlreader *reader)
1570 strval target;
1571 WCHAR *ptr;
1572 UINT start;
1573 HRESULT hr;
1575 switch (reader->resumestate)
1577 case XmlReadResumeState_Initial:
1578 /* skip '<?' */
1579 reader_skipn(reader, 2);
1580 reader_shrink(reader);
1581 reader->resumestate = XmlReadResumeState_PITarget;
1582 case XmlReadResumeState_PITarget:
1583 hr = reader_parse_pitarget(reader, &target);
1584 if (FAILED(hr)) return hr;
1585 reader_set_strvalue(reader, StringValue_LocalName, &target);
1586 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1587 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1588 reader->resumestate = XmlReadResumeState_PIBody;
1589 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1590 default:
1594 start = reader->resume[XmlReadResume_Body];
1595 ptr = reader_get_ptr(reader);
1596 while (*ptr)
1598 if (ptr[0] == '?')
1600 if (ptr[1] == '>')
1602 UINT cur = reader_get_cur(reader);
1603 strval value;
1605 /* strip all leading whitespace chars */
1606 while (start < cur)
1608 ptr = reader_get_ptr2(reader, start);
1609 if (!is_wchar_space(*ptr)) break;
1610 start++;
1613 reader_init_strvalue(start, cur-start, &value);
1615 /* skip '?>' */
1616 reader_skipn(reader, 2);
1617 TRACE("%s\n", debug_strval(reader, &value));
1618 reader->nodetype = XmlNodeType_ProcessingInstruction;
1619 reader->resumestate = XmlReadResumeState_Initial;
1620 reader->resume[XmlReadResume_Body] = 0;
1621 reader_set_strvalue(reader, StringValue_Value, &value);
1622 return S_OK;
1626 reader_skipn(reader, 1);
1627 ptr = reader_get_ptr(reader);
1630 return S_OK;
1633 /* This one is used to parse significant whitespace nodes, like in Misc production */
1634 static HRESULT reader_parse_whitespace(xmlreader *reader)
1636 switch (reader->resumestate)
1638 case XmlReadResumeState_Initial:
1639 reader_shrink(reader);
1640 reader->resumestate = XmlReadResumeState_Whitespace;
1641 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1642 reader->nodetype = XmlNodeType_Whitespace;
1643 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1644 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1645 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1646 /* fallthrough */
1647 case XmlReadResumeState_Whitespace:
1649 strval value;
1650 UINT start;
1652 reader_skipspaces(reader);
1653 if (is_reader_pending(reader)) return S_OK;
1655 start = reader->resume[XmlReadResume_Body];
1656 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1657 reader_set_strvalue(reader, StringValue_Value, &value);
1658 TRACE("%s\n", debug_strval(reader, &value));
1659 reader->resumestate = XmlReadResumeState_Initial;
1661 default:
1665 return S_OK;
1668 /* [27] Misc ::= Comment | PI | S */
1669 static HRESULT reader_parse_misc(xmlreader *reader)
1671 HRESULT hr = S_FALSE;
1673 if (reader->resumestate != XmlReadResumeState_Initial)
1675 hr = reader_more(reader);
1676 if (FAILED(hr)) return hr;
1678 /* finish current node */
1679 switch (reader->resumestate)
1681 case XmlReadResumeState_PITarget:
1682 case XmlReadResumeState_PIBody:
1683 return reader_parse_pi(reader);
1684 case XmlReadResumeState_Comment:
1685 return reader_parse_comment(reader);
1686 case XmlReadResumeState_Whitespace:
1687 return reader_parse_whitespace(reader);
1688 default:
1689 ERR("unknown resume state %d\n", reader->resumestate);
1693 while (1)
1695 const WCHAR *cur = reader_get_ptr(reader);
1697 if (is_wchar_space(*cur))
1698 hr = reader_parse_whitespace(reader);
1699 else if (!reader_cmp(reader, commentW))
1700 hr = reader_parse_comment(reader);
1701 else if (!reader_cmp(reader, piW))
1702 hr = reader_parse_pi(reader);
1703 else
1704 break;
1706 if (hr != S_FALSE) return hr;
1709 return hr;
1712 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1713 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1715 WCHAR *cur = reader_get_ptr(reader), quote;
1716 UINT start;
1718 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1720 quote = *cur;
1721 reader_skipn(reader, 1);
1723 cur = reader_get_ptr(reader);
1724 start = reader_get_cur(reader);
1725 while (is_char(*cur) && *cur != quote)
1727 reader_skipn(reader, 1);
1728 cur = reader_get_ptr(reader);
1730 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1731 if (*cur == quote) reader_skipn(reader, 1);
1733 TRACE("%s\n", debug_strval(reader, literal));
1734 return S_OK;
1737 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1738 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1739 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1741 WCHAR *cur = reader_get_ptr(reader), quote;
1742 UINT start;
1744 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1746 quote = *cur;
1747 reader_skipn(reader, 1);
1749 start = reader_get_cur(reader);
1750 cur = reader_get_ptr(reader);
1751 while (is_pubchar(*cur) && *cur != quote)
1753 reader_skipn(reader, 1);
1754 cur = reader_get_ptr(reader);
1756 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1757 if (*cur == quote) reader_skipn(reader, 1);
1759 TRACE("%s\n", debug_strval(reader, literal));
1760 return S_OK;
1763 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1764 static HRESULT reader_parse_externalid(xmlreader *reader)
1766 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1767 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1768 struct reader_position position = reader->position;
1769 strval name, sys;
1770 HRESULT hr;
1771 int cnt;
1773 if (!reader_cmp(reader, publicW)) {
1774 strval pub;
1776 /* public id */
1777 reader_skipn(reader, 6);
1778 cnt = reader_skipspaces(reader);
1779 if (!cnt) return WC_E_WHITESPACE;
1781 hr = reader_parse_pub_literal(reader, &pub);
1782 if (FAILED(hr)) return hr;
1784 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1785 hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0);
1786 if (FAILED(hr)) return hr;
1788 cnt = reader_skipspaces(reader);
1789 if (!cnt) return S_OK;
1791 /* optional system id */
1792 hr = reader_parse_sys_literal(reader, &sys);
1793 if (FAILED(hr)) return S_OK;
1795 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1796 hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1797 if (FAILED(hr)) return hr;
1799 return S_OK;
1800 } else if (!reader_cmp(reader, systemW)) {
1801 /* system id */
1802 reader_skipn(reader, 6);
1803 cnt = reader_skipspaces(reader);
1804 if (!cnt) return WC_E_WHITESPACE;
1806 hr = reader_parse_sys_literal(reader, &sys);
1807 if (FAILED(hr)) return hr;
1809 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1810 return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1813 return S_FALSE;
1816 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1817 static HRESULT reader_parse_dtd(xmlreader *reader)
1819 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1820 strval name;
1821 WCHAR *cur;
1822 HRESULT hr;
1824 /* check if we have "<!DOCTYPE" */
1825 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1826 reader_shrink(reader);
1828 /* DTD processing is not allowed by default */
1829 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1831 reader_skipn(reader, 9);
1832 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1834 /* name */
1835 hr = reader_parse_name(reader, &name);
1836 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1838 reader_skipspaces(reader);
1840 hr = reader_parse_externalid(reader);
1841 if (FAILED(hr)) return hr;
1843 reader_skipspaces(reader);
1845 cur = reader_get_ptr(reader);
1846 if (*cur != '>')
1848 FIXME("internal subset parsing not implemented\n");
1849 return E_NOTIMPL;
1852 /* skip '>' */
1853 reader_skipn(reader, 1);
1855 reader->nodetype = XmlNodeType_DocumentType;
1856 reader_set_strvalue(reader, StringValue_LocalName, &name);
1857 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1859 return S_OK;
1862 /* [11 NS] LocalPart ::= NCName */
1863 static HRESULT reader_parse_local(xmlreader *reader, strval *local, BOOL check_for_separator)
1865 WCHAR *ptr;
1866 UINT start;
1868 if (reader->resume[XmlReadResume_Local])
1870 start = reader->resume[XmlReadResume_Local];
1871 ptr = reader_get_ptr(reader);
1873 else
1875 ptr = reader_get_ptr(reader);
1876 start = reader_get_cur(reader);
1879 while (is_ncnamechar(*ptr))
1881 reader_skipn(reader, 1);
1882 ptr = reader_get_ptr(reader);
1885 if (check_for_separator && *ptr == ':')
1886 return NC_E_QNAMECOLON;
1888 if (is_reader_pending(reader))
1890 reader->resume[XmlReadResume_Local] = start;
1891 return E_PENDING;
1893 else
1894 reader->resume[XmlReadResume_Local] = 0;
1896 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1898 return S_OK;
1901 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1902 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1903 [9 NS] UnprefixedName ::= LocalPart
1904 [10 NS] Prefix ::= NCName */
1905 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1907 WCHAR *ptr;
1908 UINT start;
1909 HRESULT hr;
1911 if (reader->resume[XmlReadResume_Name])
1913 start = reader->resume[XmlReadResume_Name];
1914 ptr = reader_get_ptr(reader);
1916 else
1918 ptr = reader_get_ptr(reader);
1919 start = reader_get_cur(reader);
1920 reader->resume[XmlReadResume_Name] = start;
1921 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1924 if (reader->resume[XmlReadResume_Local])
1926 hr = reader_parse_local(reader, local, FALSE);
1927 if (FAILED(hr)) return hr;
1929 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1930 local->start - reader->resume[XmlReadResume_Name] - 1,
1931 prefix);
1933 else
1935 /* skip prefix part */
1936 while (is_ncnamechar(*ptr))
1938 reader_skipn(reader, 1);
1939 ptr = reader_get_ptr(reader);
1942 if (is_reader_pending(reader)) return E_PENDING;
1944 /* got a qualified name */
1945 if (*ptr == ':')
1947 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1949 /* skip ':' */
1950 reader_skipn(reader, 1);
1951 hr = reader_parse_local(reader, local, TRUE);
1952 if (FAILED(hr)) return hr;
1954 else
1956 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1957 reader_init_strvalue(0, 0, prefix);
1961 if (prefix->len)
1962 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1963 else
1964 TRACE("ncname %s\n", debug_strval(reader, local));
1966 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1967 /* count ':' too */
1968 (prefix->len ? prefix->len + 1 : 0) + local->len,
1969 qname);
1971 reader->resume[XmlReadResume_Name] = 0;
1972 reader->resume[XmlReadResume_Local] = 0;
1974 return S_OK;
1977 /* Applies normalization rules to a single char, used for attribute values.
1979 Rules include 2 steps:
1981 1) replacing \r\n with a single \n;
1982 2) replacing all whitespace chars with ' '.
1985 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1987 encoded_buffer *buffer = &reader->input->buffer->utf16;
1989 if (!is_wchar_space(*ptr)) return;
1991 if (*ptr == '\r' && *(ptr+1) == '\n')
1993 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1994 memmove(ptr+1, ptr+2, len);
1996 *ptr = ' ';
1999 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
2001 static const WCHAR entltW[] = {'l','t'};
2002 static const WCHAR entgtW[] = {'g','t'};
2003 static const WCHAR entampW[] = {'a','m','p'};
2004 static const WCHAR entaposW[] = {'a','p','o','s'};
2005 static const WCHAR entquotW[] = {'q','u','o','t'};
2006 static const strval lt = { (WCHAR*)entltW, 2 };
2007 static const strval gt = { (WCHAR*)entgtW, 2 };
2008 static const strval amp = { (WCHAR*)entampW, 3 };
2009 static const strval apos = { (WCHAR*)entaposW, 4 };
2010 static const strval quot = { (WCHAR*)entquotW, 4 };
2011 WCHAR *str = reader_get_strptr(reader, name);
2013 switch (*str)
2015 case 'l':
2016 if (strval_eq(reader, name, &lt)) return '<';
2017 break;
2018 case 'g':
2019 if (strval_eq(reader, name, &gt)) return '>';
2020 break;
2021 case 'a':
2022 if (strval_eq(reader, name, &amp))
2023 return '&';
2024 else if (strval_eq(reader, name, &apos))
2025 return '\'';
2026 break;
2027 case 'q':
2028 if (strval_eq(reader, name, &quot)) return '\"';
2029 break;
2030 default:
2034 return 0;
2037 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2038 [67] Reference ::= EntityRef | CharRef
2039 [68] EntityRef ::= '&' Name ';' */
2040 static HRESULT reader_parse_reference(xmlreader *reader)
2042 encoded_buffer *buffer = &reader->input->buffer->utf16;
2043 WCHAR *start = reader_get_ptr(reader), *ptr;
2044 UINT cur = reader_get_cur(reader);
2045 WCHAR ch = 0;
2046 int len;
2048 /* skip '&' */
2049 reader_skipn(reader, 1);
2050 ptr = reader_get_ptr(reader);
2052 if (*ptr == '#')
2054 reader_skipn(reader, 1);
2055 ptr = reader_get_ptr(reader);
2057 /* hex char or decimal */
2058 if (*ptr == 'x')
2060 reader_skipn(reader, 1);
2061 ptr = reader_get_ptr(reader);
2063 while (*ptr != ';')
2065 if ((*ptr >= '0' && *ptr <= '9'))
2066 ch = ch*16 + *ptr - '0';
2067 else if ((*ptr >= 'a' && *ptr <= 'f'))
2068 ch = ch*16 + *ptr - 'a' + 10;
2069 else if ((*ptr >= 'A' && *ptr <= 'F'))
2070 ch = ch*16 + *ptr - 'A' + 10;
2071 else
2072 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2073 reader_skipn(reader, 1);
2074 ptr = reader_get_ptr(reader);
2077 else
2079 while (*ptr != ';')
2081 if ((*ptr >= '0' && *ptr <= '9'))
2083 ch = ch*10 + *ptr - '0';
2084 reader_skipn(reader, 1);
2085 ptr = reader_get_ptr(reader);
2087 else
2088 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2092 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2094 /* normalize */
2095 if (is_wchar_space(ch)) ch = ' ';
2097 ptr = reader_get_ptr(reader);
2098 start = reader_get_ptr2(reader, cur);
2099 len = buffer->written - ((char *)ptr - buffer->data);
2100 memmove(start + 1, ptr + 1, len);
2102 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2103 buffer->cur = cur + 1;
2105 *start = ch;
2107 else
2109 strval name;
2110 HRESULT hr;
2112 hr = reader_parse_name(reader, &name);
2113 if (FAILED(hr)) return hr;
2115 ptr = reader_get_ptr(reader);
2116 if (*ptr != ';') return WC_E_SEMICOLON;
2118 /* predefined entities resolve to a single character */
2119 ch = get_predefined_entity(reader, &name);
2120 if (ch)
2122 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2123 memmove(start+1, ptr+1, len);
2124 buffer->cur = cur + 1;
2126 *start = ch;
2128 else
2130 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2131 return WC_E_UNDECLAREDENTITY;
2136 return S_OK;
2139 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2140 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2142 WCHAR *ptr, quote;
2143 UINT start;
2145 ptr = reader_get_ptr(reader);
2147 /* skip opening quote */
2148 quote = *ptr;
2149 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2150 reader_skipn(reader, 1);
2152 ptr = reader_get_ptr(reader);
2153 start = reader_get_cur(reader);
2154 while (*ptr)
2156 if (*ptr == '<') return WC_E_LESSTHAN;
2158 if (*ptr == quote)
2160 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2161 /* skip closing quote */
2162 reader_skipn(reader, 1);
2163 return S_OK;
2166 if (*ptr == '&')
2168 HRESULT hr = reader_parse_reference(reader);
2169 if (FAILED(hr)) return hr;
2171 else
2173 reader_normalize_space(reader, ptr);
2174 reader_skipn(reader, 1);
2176 ptr = reader_get_ptr(reader);
2179 return WC_E_QUOTE;
2182 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2183 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2184 [3 NS] DefaultAttName ::= 'xmlns'
2185 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2186 static HRESULT reader_parse_attribute(xmlreader *reader)
2188 struct reader_position position = reader->position;
2189 strval prefix, local, qname, value;
2190 enum attribute_flags flags = 0;
2191 HRESULT hr;
2193 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2194 if (FAILED(hr)) return hr;
2196 if (strval_eq(reader, &prefix, &strval_xmlns))
2197 flags |= ATTRIBUTE_NS_DEFINITION;
2199 if (strval_eq(reader, &qname, &strval_xmlns))
2200 flags |= ATTRIBUTE_DEFAULT_NS_DEFINITION;
2202 hr = reader_parse_eq(reader);
2203 if (FAILED(hr)) return hr;
2205 hr = reader_parse_attvalue(reader, &value);
2206 if (FAILED(hr)) return hr;
2208 if (flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
2209 reader_push_ns(reader, &local, &value, !!(flags & ATTRIBUTE_DEFAULT_NS_DEFINITION));
2211 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2212 return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags);
2215 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2216 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2217 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
2219 struct reader_position position = reader->position;
2220 HRESULT hr;
2222 hr = reader_parse_qname(reader, prefix, local, qname);
2223 if (FAILED(hr)) return hr;
2225 for (;;)
2227 static const WCHAR endW[] = {'/','>',0};
2229 reader_skipspaces(reader);
2231 /* empty element */
2232 if ((reader->is_empty_element = !reader_cmp(reader, endW)))
2234 struct element *element = &reader->empty_element;
2236 /* skip '/>' */
2237 reader_skipn(reader, 2);
2239 reader_free_strvalued(reader, &element->qname);
2240 reader_free_strvalued(reader, &element->localname);
2242 element->prefix = *prefix;
2243 reader_strvaldup(reader, qname, &element->qname);
2244 reader_strvaldup(reader, local, &element->localname);
2245 element->position = position;
2246 reader_mark_ns_nodes(reader, element);
2247 return S_OK;
2250 /* got a start tag */
2251 if (!reader_cmp(reader, gtW))
2253 /* skip '>' */
2254 reader_skipn(reader, 1);
2255 return reader_push_element(reader, prefix, local, qname, &position);
2258 hr = reader_parse_attribute(reader);
2259 if (FAILED(hr)) return hr;
2262 return S_OK;
2265 /* [39] element ::= EmptyElemTag | STag content ETag */
2266 static HRESULT reader_parse_element(xmlreader *reader)
2268 HRESULT hr;
2270 switch (reader->resumestate)
2272 case XmlReadResumeState_Initial:
2273 /* check if we are really on element */
2274 if (reader_cmp(reader, ltW)) return S_FALSE;
2276 /* skip '<' */
2277 reader_skipn(reader, 1);
2279 reader_shrink(reader);
2280 reader->resumestate = XmlReadResumeState_STag;
2281 case XmlReadResumeState_STag:
2283 strval qname, prefix, local;
2285 /* this handles empty elements too */
2286 hr = reader_parse_stag(reader, &prefix, &local, &qname);
2287 if (FAILED(hr)) return hr;
2289 /* FIXME: need to check for defined namespace to reject invalid prefix */
2291 /* if we got empty element and stack is empty go straight to Misc */
2292 if (reader->is_empty_element && list_empty(&reader->elements))
2293 reader->instate = XmlReadInState_MiscEnd;
2294 else
2295 reader->instate = XmlReadInState_Content;
2297 reader->nodetype = XmlNodeType_Element;
2298 reader->resumestate = XmlReadResumeState_Initial;
2299 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2300 reader_set_strvalue(reader, StringValue_LocalName, &local);
2301 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2302 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
2303 break;
2305 default:
2306 hr = E_FAIL;
2309 return hr;
2312 /* [13 NS] ETag ::= '</' QName S? '>' */
2313 static HRESULT reader_parse_endtag(xmlreader *reader)
2315 struct reader_position position;
2316 strval prefix, local, qname;
2317 struct element *element;
2318 HRESULT hr;
2320 /* skip '</' */
2321 reader_skipn(reader, 2);
2323 position = reader->position;
2324 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2325 if (FAILED(hr)) return hr;
2327 reader_skipspaces(reader);
2329 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2331 /* skip '>' */
2332 reader_skipn(reader, 1);
2334 /* Element stack should never be empty at this point, cause we shouldn't get to
2335 content parsing if it's empty. */
2336 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2337 if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH;
2339 /* update position stored for start tag, we won't be using it */
2340 element->position = position;
2342 reader->nodetype = XmlNodeType_EndElement;
2343 reader->is_empty_element = FALSE;
2344 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2346 return S_OK;
2349 /* [18] CDSect ::= CDStart CData CDEnd
2350 [19] CDStart ::= '<![CDATA['
2351 [20] CData ::= (Char* - (Char* ']]>' Char*))
2352 [21] CDEnd ::= ']]>' */
2353 static HRESULT reader_parse_cdata(xmlreader *reader)
2355 WCHAR *ptr;
2356 UINT start;
2358 if (reader->resumestate == XmlReadResumeState_CDATA)
2360 start = reader->resume[XmlReadResume_Body];
2361 ptr = reader_get_ptr(reader);
2363 else
2365 /* skip markup '<![CDATA[' */
2366 reader_skipn(reader, 9);
2367 reader_shrink(reader);
2368 ptr = reader_get_ptr(reader);
2369 start = reader_get_cur(reader);
2370 reader->nodetype = XmlNodeType_CDATA;
2371 reader->resume[XmlReadResume_Body] = start;
2372 reader->resumestate = XmlReadResumeState_CDATA;
2373 reader_set_strvalue(reader, StringValue_Value, NULL);
2376 while (*ptr)
2378 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2380 strval value;
2382 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2384 /* skip ']]>' */
2385 reader_skipn(reader, 3);
2386 TRACE("%s\n", debug_strval(reader, &value));
2388 reader_set_strvalue(reader, StringValue_Value, &value);
2389 reader->resume[XmlReadResume_Body] = 0;
2390 reader->resumestate = XmlReadResumeState_Initial;
2391 return S_OK;
2393 else
2395 /* Value normalization is not fully implemented, rules are:
2397 - single '\r' -> '\n';
2398 - sequence '\r\n' -> '\n', in this case value length changes;
2400 if (*ptr == '\r') *ptr = '\n';
2401 reader_skipn(reader, 1);
2402 ptr++;
2406 return S_OK;
2409 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2410 static HRESULT reader_parse_chardata(xmlreader *reader)
2412 struct reader_position position;
2413 WCHAR *ptr;
2414 UINT start;
2416 if (reader->resumestate == XmlReadResumeState_CharData)
2418 start = reader->resume[XmlReadResume_Body];
2419 ptr = reader_get_ptr(reader);
2421 else
2423 reader_shrink(reader);
2424 ptr = reader_get_ptr(reader);
2425 start = reader_get_cur(reader);
2426 /* There's no text */
2427 if (!*ptr || *ptr == '<') return S_OK;
2428 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2429 reader->resume[XmlReadResume_Body] = start;
2430 reader->resumestate = XmlReadResumeState_CharData;
2431 reader_set_strvalue(reader, StringValue_Value, NULL);
2434 position = reader->position;
2435 while (*ptr)
2437 static const WCHAR ampW[] = {'&',0};
2439 /* CDATA closing sequence ']]>' is not allowed */
2440 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2441 return WC_E_CDSECTEND;
2443 /* Found next markup part */
2444 if (ptr[0] == '<')
2446 strval value;
2448 reader->empty_element.position = position;
2449 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2450 reader_set_strvalue(reader, StringValue_Value, &value);
2451 reader->resume[XmlReadResume_Body] = 0;
2452 reader->resumestate = XmlReadResumeState_Initial;
2453 return S_OK;
2456 /* this covers a case when text has leading whitespace chars */
2457 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2459 if (!reader_cmp(reader, ampW))
2460 reader_parse_reference(reader);
2461 else
2462 reader_skipn(reader, 1);
2464 ptr = reader_get_ptr(reader);
2467 return S_OK;
2470 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2471 static HRESULT reader_parse_content(xmlreader *reader)
2473 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2474 static const WCHAR etagW[] = {'<','/',0};
2476 if (reader->resumestate != XmlReadResumeState_Initial)
2478 switch (reader->resumestate)
2480 case XmlReadResumeState_CDATA:
2481 return reader_parse_cdata(reader);
2482 case XmlReadResumeState_Comment:
2483 return reader_parse_comment(reader);
2484 case XmlReadResumeState_PIBody:
2485 case XmlReadResumeState_PITarget:
2486 return reader_parse_pi(reader);
2487 case XmlReadResumeState_CharData:
2488 return reader_parse_chardata(reader);
2489 default:
2490 ERR("unknown resume state %d\n", reader->resumestate);
2494 reader_shrink(reader);
2496 /* handle end tag here, it indicates end of content as well */
2497 if (!reader_cmp(reader, etagW))
2498 return reader_parse_endtag(reader);
2500 if (!reader_cmp(reader, commentW))
2501 return reader_parse_comment(reader);
2503 if (!reader_cmp(reader, piW))
2504 return reader_parse_pi(reader);
2506 if (!reader_cmp(reader, cdstartW))
2507 return reader_parse_cdata(reader);
2509 if (!reader_cmp(reader, ltW))
2510 return reader_parse_element(reader);
2512 /* what's left must be CharData */
2513 return reader_parse_chardata(reader);
2516 static HRESULT reader_parse_nextnode(xmlreader *reader)
2518 XmlNodeType nodetype = reader_get_nodetype(reader);
2519 HRESULT hr;
2521 if (!is_reader_pending(reader))
2522 reader_clear_attrs(reader);
2524 /* When moving from EndElement or empty element, pop its own namespace definitions */
2525 switch (nodetype)
2527 case XmlNodeType_Attribute:
2528 reader_dec_depth(reader);
2529 /* fallthrough */
2530 case XmlNodeType_Element:
2531 if (reader->is_empty_element)
2532 reader_pop_ns_nodes(reader, &reader->empty_element);
2533 else if (FAILED(hr = reader_inc_depth(reader)))
2534 return hr;
2535 break;
2536 case XmlNodeType_EndElement:
2537 reader_pop_element(reader);
2538 reader_dec_depth(reader);
2539 break;
2540 default:
2544 for (;;)
2546 switch (reader->instate)
2548 /* if it's a first call for a new input we need to detect stream encoding */
2549 case XmlReadInState_Initial:
2551 xml_encoding enc;
2553 hr = readerinput_growraw(reader->input);
2554 if (FAILED(hr)) return hr;
2556 reader->position.line_number = 1;
2557 reader->position.line_position = 1;
2559 /* try to detect encoding by BOM or data and set input code page */
2560 hr = readerinput_detectencoding(reader->input, &enc);
2561 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2562 debugstr_w(xml_encoding_map[enc].name), hr);
2563 if (FAILED(hr)) return hr;
2565 /* always switch first time cause we have to put something in */
2566 readerinput_switchencoding(reader->input, enc);
2568 /* parse xml declaration */
2569 hr = reader_parse_xmldecl(reader);
2570 if (FAILED(hr)) return hr;
2572 readerinput_shrinkraw(reader->input, -1);
2573 reader->instate = XmlReadInState_Misc_DTD;
2574 if (hr == S_OK) return hr;
2576 break;
2577 case XmlReadInState_Misc_DTD:
2578 hr = reader_parse_misc(reader);
2579 if (FAILED(hr)) return hr;
2581 if (hr == S_FALSE)
2582 reader->instate = XmlReadInState_DTD;
2583 else
2584 return hr;
2585 break;
2586 case XmlReadInState_DTD:
2587 hr = reader_parse_dtd(reader);
2588 if (FAILED(hr)) return hr;
2590 if (hr == S_OK)
2592 reader->instate = XmlReadInState_DTD_Misc;
2593 return hr;
2595 else
2596 reader->instate = XmlReadInState_Element;
2597 break;
2598 case XmlReadInState_DTD_Misc:
2599 hr = reader_parse_misc(reader);
2600 if (FAILED(hr)) return hr;
2602 if (hr == S_FALSE)
2603 reader->instate = XmlReadInState_Element;
2604 else
2605 return hr;
2606 break;
2607 case XmlReadInState_Element:
2608 return reader_parse_element(reader);
2609 case XmlReadInState_Content:
2610 return reader_parse_content(reader);
2611 case XmlReadInState_MiscEnd:
2612 hr = reader_parse_misc(reader);
2613 if (FAILED(hr)) return hr;
2615 if (hr == S_FALSE)
2617 reader->instate = XmlReadInState_Eof;
2618 reader->state = XmlReadState_EndOfFile;
2619 reader->nodetype = XmlNodeType_None;
2621 return hr;
2622 case XmlReadInState_Eof:
2623 return S_FALSE;
2624 default:
2625 FIXME("internal state %d not handled\n", reader->instate);
2626 return E_NOTIMPL;
2630 return E_NOTIMPL;
2633 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2635 xmlreader *This = impl_from_IXmlReader(iface);
2637 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2639 if (IsEqualGUID(riid, &IID_IUnknown) ||
2640 IsEqualGUID(riid, &IID_IXmlReader))
2642 *ppvObject = iface;
2644 else
2646 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2647 *ppvObject = NULL;
2648 return E_NOINTERFACE;
2651 IXmlReader_AddRef(iface);
2653 return S_OK;
2656 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2658 xmlreader *This = impl_from_IXmlReader(iface);
2659 ULONG ref = InterlockedIncrement(&This->ref);
2660 TRACE("(%p)->(%d)\n", This, ref);
2661 return ref;
2664 static void reader_clear_ns(xmlreader *reader)
2666 struct ns *ns, *ns2;
2668 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2669 reader_free_strvalued(reader, &ns->prefix);
2670 reader_free_strvalued(reader, &ns->uri);
2671 reader_free(reader, ns);
2674 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2675 reader_free_strvalued(reader, &ns->uri);
2676 reader_free(reader, ns);
2680 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2682 xmlreader *This = impl_from_IXmlReader(iface);
2683 LONG ref = InterlockedDecrement(&This->ref);
2685 TRACE("(%p)->(%d)\n", This, ref);
2687 if (ref == 0)
2689 IMalloc *imalloc = This->imalloc;
2690 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2691 if (This->resolver) IXmlResolver_Release(This->resolver);
2692 if (This->mlang) IUnknown_Release(This->mlang);
2693 reader_clear_attrs(This);
2694 reader_clear_ns(This);
2695 reader_clear_elements(This);
2696 reader_free_strvalues(This);
2697 reader_free(This, This);
2698 if (imalloc) IMalloc_Release(imalloc);
2701 return ref;
2704 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2706 xmlreader *This = impl_from_IXmlReader(iface);
2707 IXmlReaderInput *readerinput;
2708 HRESULT hr;
2710 TRACE("(%p)->(%p)\n", This, input);
2712 if (This->input)
2714 readerinput_release_stream(This->input);
2715 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2716 This->input = NULL;
2719 This->position.line_number = 0;
2720 This->position.line_position = 0;
2721 reader_clear_elements(This);
2722 This->depth = 0;
2723 This->nodetype = XmlNodeType_None;
2724 This->resumestate = XmlReadResumeState_Initial;
2725 memset(This->resume, 0, sizeof(This->resume));
2727 /* just reset current input */
2728 if (!input)
2730 This->state = XmlReadState_Initial;
2731 return S_OK;
2734 /* now try IXmlReaderInput, ISequentialStream, IStream */
2735 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2736 if (hr == S_OK)
2738 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2739 This->input = impl_from_IXmlReaderInput(readerinput);
2740 else
2742 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2743 readerinput, readerinput->lpVtbl);
2744 IUnknown_Release(readerinput);
2745 return E_FAIL;
2750 if (hr != S_OK || !readerinput)
2752 /* create IXmlReaderInput basing on supplied interface */
2753 hr = CreateXmlReaderInputWithEncodingName(input,
2754 This->imalloc, NULL, FALSE, NULL, &readerinput);
2755 if (hr != S_OK) return hr;
2756 This->input = impl_from_IXmlReaderInput(readerinput);
2759 /* set stream for supplied IXmlReaderInput */
2760 hr = readerinput_query_for_stream(This->input);
2761 if (hr == S_OK)
2763 This->state = XmlReadState_Initial;
2764 This->instate = XmlReadInState_Initial;
2767 return hr;
2770 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2772 xmlreader *This = impl_from_IXmlReader(iface);
2774 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2776 if (!value) return E_INVALIDARG;
2778 switch (property)
2780 case XmlReaderProperty_MultiLanguage:
2781 *value = (LONG_PTR)This->mlang;
2782 if (This->mlang)
2783 IUnknown_AddRef(This->mlang);
2784 break;
2785 case XmlReaderProperty_XmlResolver:
2786 *value = (LONG_PTR)This->resolver;
2787 if (This->resolver)
2788 IXmlResolver_AddRef(This->resolver);
2789 break;
2790 case XmlReaderProperty_DtdProcessing:
2791 *value = This->dtdmode;
2792 break;
2793 case XmlReaderProperty_ReadState:
2794 *value = This->state;
2795 break;
2796 case XmlReaderProperty_MaxElementDepth:
2797 *value = This->max_depth;
2798 break;
2799 default:
2800 FIXME("Unimplemented property (%u)\n", property);
2801 return E_NOTIMPL;
2804 return S_OK;
2807 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2809 xmlreader *This = impl_from_IXmlReader(iface);
2811 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2813 switch (property)
2815 case XmlReaderProperty_MultiLanguage:
2816 if (This->mlang)
2817 IUnknown_Release(This->mlang);
2818 This->mlang = (IUnknown*)value;
2819 if (This->mlang)
2820 IUnknown_AddRef(This->mlang);
2821 if (This->mlang)
2822 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2823 break;
2824 case XmlReaderProperty_XmlResolver:
2825 if (This->resolver)
2826 IXmlResolver_Release(This->resolver);
2827 This->resolver = (IXmlResolver*)value;
2828 if (This->resolver)
2829 IXmlResolver_AddRef(This->resolver);
2830 break;
2831 case XmlReaderProperty_DtdProcessing:
2832 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2833 This->dtdmode = value;
2834 break;
2835 case XmlReaderProperty_MaxElementDepth:
2836 This->max_depth = value;
2837 break;
2838 default:
2839 FIXME("Unimplemented property (%u)\n", property);
2840 return E_NOTIMPL;
2843 return S_OK;
2846 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2848 xmlreader *This = impl_from_IXmlReader(iface);
2849 XmlNodeType oldtype = This->nodetype;
2850 XmlNodeType type;
2851 HRESULT hr;
2853 TRACE("(%p)->(%p)\n", This, nodetype);
2855 if (!nodetype)
2856 nodetype = &type;
2858 switch (This->state)
2860 case XmlReadState_Closed:
2861 hr = S_FALSE;
2862 break;
2863 case XmlReadState_Error:
2864 hr = This->error;
2865 break;
2866 default:
2867 hr = reader_parse_nextnode(This);
2868 if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype)
2869 This->state = XmlReadState_Interactive;
2871 if (FAILED(hr))
2873 This->state = XmlReadState_Error;
2874 This->nodetype = XmlNodeType_None;
2875 This->depth = 0;
2876 This->error = hr;
2880 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2881 *nodetype = This->nodetype;
2883 return hr;
2886 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2888 xmlreader *This = impl_from_IXmlReader(iface);
2890 TRACE("(%p)->(%p)\n", This, node_type);
2892 if (!node_type)
2893 return E_INVALIDARG;
2895 *node_type = reader_get_nodetype(This);
2896 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2899 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2901 if (!reader->attr_count)
2902 return S_FALSE;
2904 if (!reader->attr)
2905 reader_inc_depth(reader);
2907 reader->attr = LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry);
2908 reader_set_strvalue(reader, StringValue_Prefix, &reader->attr->prefix);
2909 reader_set_strvalue(reader, StringValue_LocalName, &reader->attr->localname);
2910 reader_set_strvalue(reader, StringValue_QualifiedName, &reader->attr->qname);
2911 reader_set_strvalue(reader, StringValue_Value, &reader->attr->value);
2913 return S_OK;
2916 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2918 xmlreader *This = impl_from_IXmlReader(iface);
2920 TRACE("(%p)\n", This);
2922 return reader_move_to_first_attribute(This);
2925 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2927 xmlreader *This = impl_from_IXmlReader(iface);
2928 const struct list *next;
2930 TRACE("(%p)\n", This);
2932 if (!This->attr_count) return S_FALSE;
2934 if (!This->attr)
2935 return reader_move_to_first_attribute(This);
2937 next = list_next(&This->attrs, &This->attr->entry);
2938 if (next)
2940 This->attr = LIST_ENTRY(next, struct attribute, entry);
2941 reader_set_strvalue(This, StringValue_Prefix, &This->attr->prefix);
2942 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2943 reader_set_strvalue(This, StringValue_QualifiedName, &This->attr->qname);
2944 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2947 return next ? S_OK : S_FALSE;
2950 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2951 LPCWSTR local_name,
2952 LPCWSTR namespaceUri)
2954 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2955 return E_NOTIMPL;
2958 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2960 xmlreader *This = impl_from_IXmlReader(iface);
2962 TRACE("(%p)\n", This);
2964 if (!This->attr_count) return S_FALSE;
2966 if (This->attr)
2967 reader_dec_depth(This);
2969 This->attr = NULL;
2971 /* FIXME: support other node types with 'attributes' like DTD */
2972 if (This->is_empty_element) {
2973 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
2974 reader_set_strvalue(This, StringValue_LocalName, &This->empty_element.localname);
2975 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
2977 else {
2978 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2979 if (element) {
2980 reader_set_strvalue(This, StringValue_Prefix, &element->prefix);
2981 reader_set_strvalue(This, StringValue_LocalName, &element->localname);
2982 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
2985 reader_set_strvalue(This, StringValue_Value, &strval_empty);
2987 return S_OK;
2990 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2992 xmlreader *This = impl_from_IXmlReader(iface);
2993 struct attribute *attribute = This->attr;
2994 struct element *element;
2995 UINT length;
2997 TRACE("(%p)->(%p %p)\n", This, name, len);
2999 if (!len)
3000 len = &length;
3002 switch (reader_get_nodetype(This))
3004 case XmlNodeType_Text:
3005 case XmlNodeType_CDATA:
3006 case XmlNodeType_Comment:
3007 case XmlNodeType_Whitespace:
3008 *name = emptyW;
3009 *len = 0;
3010 break;
3011 case XmlNodeType_Element:
3012 case XmlNodeType_EndElement:
3013 element = reader_get_element(This);
3014 if (element->prefix.len)
3016 *name = element->qname.str;
3017 *len = element->qname.len;
3019 else
3021 *name = element->localname.str;
3022 *len = element->localname.len;
3024 break;
3025 case XmlNodeType_Attribute:
3026 if (attribute->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3028 *name = xmlnsW;
3029 *len = 5;
3030 } else if (attribute->prefix.len)
3032 *name = This->strvalues[StringValue_QualifiedName].str;
3033 *len = This->strvalues[StringValue_QualifiedName].len;
3035 else
3037 *name = This->strvalues[StringValue_LocalName].str;
3038 *len = This->strvalues[StringValue_LocalName].len;
3040 break;
3041 default:
3042 *name = This->strvalues[StringValue_QualifiedName].str;
3043 *len = This->strvalues[StringValue_QualifiedName].len;
3044 break;
3047 return S_OK;
3050 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
3052 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
3053 struct ns *ns;
3055 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
3056 if (strval_eq(reader, prefix, &ns->prefix))
3057 return ns;
3060 return NULL;
3063 static struct ns *reader_lookup_nsdef(xmlreader *reader)
3065 if (list_empty(&reader->nsdef))
3066 return NULL;
3068 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
3071 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
3073 xmlreader *This = impl_from_IXmlReader(iface);
3074 const strval *prefix = &This->strvalues[StringValue_Prefix];
3075 XmlNodeType nodetype;
3076 struct ns *ns;
3077 UINT length;
3079 TRACE("(%p %p %p)\n", iface, uri, len);
3081 if (!len)
3082 len = &length;
3084 *uri = NULL;
3085 *len = 0;
3087 switch ((nodetype = reader_get_nodetype(This)))
3089 case XmlNodeType_Attribute:
3091 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3092 '2','0','0','0','/','x','m','l','n','s','/',0};
3093 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3094 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3095 const strval *local = &This->strvalues[StringValue_LocalName];
3097 /* check for reserved prefixes first */
3098 if ((strval_eq(This, prefix, &strval_empty) && strval_eq(This, local, &strval_xmlns)) ||
3099 strval_eq(This, prefix, &strval_xmlns))
3101 *uri = xmlns_uriW;
3102 *len = sizeof(xmlns_uriW)/sizeof(xmlns_uriW[0]) - 1;
3104 else if (strval_eq(This, prefix, &strval_xml)) {
3105 *uri = xml_uriW;
3106 *len = sizeof(xml_uriW)/sizeof(xml_uriW[0]) - 1;
3109 if (!*uri) {
3110 ns = reader_lookup_ns(This, prefix);
3111 if (ns) {
3112 *uri = ns->uri.str;
3113 *len = ns->uri.len;
3115 else {
3116 *uri = emptyW;
3117 *len = 0;
3121 break;
3122 case XmlNodeType_Element:
3123 case XmlNodeType_EndElement:
3125 ns = reader_lookup_ns(This, prefix);
3127 /* pick top default ns if any */
3128 if (!ns)
3129 ns = reader_lookup_nsdef(This);
3131 if (ns) {
3132 *uri = ns->uri.str;
3133 *len = ns->uri.len;
3135 else {
3136 *uri = emptyW;
3137 *len = 0;
3140 break;
3141 case XmlNodeType_Text:
3142 case XmlNodeType_CDATA:
3143 case XmlNodeType_ProcessingInstruction:
3144 case XmlNodeType_Comment:
3145 case XmlNodeType_Whitespace:
3146 case XmlNodeType_XmlDeclaration:
3147 *uri = emptyW;
3148 *len = 0;
3149 break;
3150 default:
3151 FIXME("Unhandled node type %d\n", nodetype);
3152 return E_NOTIMPL;
3155 return S_OK;
3158 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3160 xmlreader *This = impl_from_IXmlReader(iface);
3161 struct element *element;
3162 UINT length;
3164 TRACE("(%p)->(%p %p)\n", This, name, len);
3166 if (!len)
3167 len = &length;
3169 switch (reader_get_nodetype(This))
3171 case XmlNodeType_Text:
3172 case XmlNodeType_CDATA:
3173 case XmlNodeType_Comment:
3174 case XmlNodeType_Whitespace:
3175 *name = emptyW;
3176 *len = 0;
3177 break;
3178 case XmlNodeType_Element:
3179 case XmlNodeType_EndElement:
3180 element = reader_get_element(This);
3181 *name = element->localname.str;
3182 *len = element->localname.len;
3183 break;
3184 case XmlNodeType_Attribute:
3185 if (This->attr->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3187 *name = xmlnsW;
3188 *len = 5;
3190 else
3192 *name = This->strvalues[StringValue_LocalName].str;
3193 *len = This->strvalues[StringValue_LocalName].len;
3195 break;
3196 default:
3197 *name = This->strvalues[StringValue_LocalName].str;
3198 *len = This->strvalues[StringValue_LocalName].len;
3199 break;
3202 return S_OK;
3205 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, const WCHAR **ret, UINT *len)
3207 xmlreader *This = impl_from_IXmlReader(iface);
3208 XmlNodeType nodetype;
3209 UINT length;
3211 TRACE("(%p)->(%p %p)\n", This, ret, len);
3213 if (!len)
3214 len = &length;
3216 *ret = emptyW;
3217 *len = 0;
3219 switch ((nodetype = reader_get_nodetype(This)))
3221 case XmlNodeType_Element:
3222 case XmlNodeType_EndElement:
3223 case XmlNodeType_Attribute:
3225 const strval *prefix = &This->strvalues[StringValue_Prefix];
3226 struct ns *ns;
3228 if (strval_eq(This, prefix, &strval_xml))
3230 *ret = xmlW;
3231 *len = 3;
3233 else if (strval_eq(This, prefix, &strval_xmlns))
3235 *ret = xmlnsW;
3236 *len = 5;
3238 else if ((ns = reader_lookup_ns(This, prefix)))
3240 *ret = ns->prefix.str;
3241 *len = ns->prefix.len;
3244 break;
3246 default:
3250 return S_OK;
3253 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3255 xmlreader *reader = impl_from_IXmlReader(iface);
3256 strval *val = &reader->strvalues[StringValue_Value];
3257 UINT length;
3259 TRACE("(%p)->(%p %p)\n", reader, value, len);
3261 *value = NULL;
3262 if (!len)
3263 len = &length;
3265 if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader))
3267 XmlNodeType type;
3268 HRESULT hr;
3270 hr = IXmlReader_Read(iface, &type);
3271 if (FAILED(hr)) return hr;
3273 /* return if still pending, partially read values are not reported */
3274 if (is_reader_pending(reader)) return E_PENDING;
3277 switch (reader_get_nodetype(reader))
3279 case XmlNodeType_XmlDeclaration:
3280 *value = emptyW;
3281 *len = 0;
3282 break;
3283 case XmlNodeType_Attribute:
3285 const strval *local = &reader->strvalues[StringValue_LocalName];
3287 /* For namespace definition attributes return values from namespace list */
3288 if (reader->attr->flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
3290 struct ns *ns;
3292 if (!(ns = reader_lookup_ns(reader, local)))
3293 ns = reader_lookup_nsdef(reader);
3295 *value = ns->uri.str;
3296 *len = ns->uri.len;
3297 break;
3300 /* fallthrough */
3301 default:
3302 if (!val->str)
3304 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3305 if (!ptr) return E_OUTOFMEMORY;
3306 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3307 ptr[val->len] = 0;
3308 val->str = ptr;
3310 *value = val->str;
3311 *len = val->len;
3312 break;
3315 return S_OK;
3318 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3320 xmlreader *reader = impl_from_IXmlReader(iface);
3321 strval *val = &reader->strvalues[StringValue_Value];
3322 UINT len;
3324 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3326 /* Value is already allocated, chunked reads are not possible. */
3327 if (val->str) return S_FALSE;
3329 if (val->len)
3331 len = min(chunk_size, val->len);
3332 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
3333 val->start += len;
3334 val->len -= len;
3335 if (read) *read = len;
3338 return S_OK;
3341 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3342 LPCWSTR *baseUri,
3343 UINT *baseUri_length)
3345 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3346 return E_NOTIMPL;
3349 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3351 FIXME("(%p): stub\n", iface);
3352 return FALSE;
3355 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3357 xmlreader *This = impl_from_IXmlReader(iface);
3358 TRACE("(%p)\n", This);
3359 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3360 when current node is start tag of an element */
3361 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3364 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *line_number)
3366 xmlreader *This = impl_from_IXmlReader(iface);
3367 const struct element *element;
3369 TRACE("(%p %p)\n", This, line_number);
3371 if (!line_number)
3372 return E_INVALIDARG;
3374 switch (reader_get_nodetype(This))
3376 case XmlNodeType_Element:
3377 case XmlNodeType_EndElement:
3378 element = reader_get_element(This);
3379 *line_number = element->position.line_number;
3380 break;
3381 case XmlNodeType_Attribute:
3382 *line_number = This->attr->position.line_number;
3383 break;
3384 case XmlNodeType_Whitespace:
3385 case XmlNodeType_XmlDeclaration:
3386 *line_number = This->empty_element.position.line_number;
3387 break;
3388 default:
3389 *line_number = This->position.line_number;
3390 break;
3393 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3396 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *line_position)
3398 xmlreader *This = impl_from_IXmlReader(iface);
3399 const struct element *element;
3401 TRACE("(%p %p)\n", This, line_position);
3403 if (!line_position)
3404 return E_INVALIDARG;
3406 switch (reader_get_nodetype(This))
3408 case XmlNodeType_Element:
3409 case XmlNodeType_EndElement:
3410 element = reader_get_element(This);
3411 *line_position = element->position.line_position;
3412 break;
3413 case XmlNodeType_Attribute:
3414 *line_position = This->attr->position.line_position;
3415 break;
3416 case XmlNodeType_Whitespace:
3417 case XmlNodeType_XmlDeclaration:
3418 *line_position = This->empty_element.position.line_position;
3419 break;
3420 default:
3421 *line_position = This->position.line_position;
3422 break;
3425 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3428 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3430 xmlreader *This = impl_from_IXmlReader(iface);
3432 TRACE("(%p)->(%p)\n", This, count);
3434 if (!count) return E_INVALIDARG;
3436 *count = This->attr_count;
3437 return S_OK;
3440 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3442 xmlreader *This = impl_from_IXmlReader(iface);
3443 TRACE("(%p)->(%p)\n", This, depth);
3444 *depth = This->depth;
3445 return S_OK;
3448 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3450 xmlreader *This = impl_from_IXmlReader(iface);
3451 TRACE("(%p)\n", iface);
3452 return This->state == XmlReadState_EndOfFile;
3455 static const struct IXmlReaderVtbl xmlreader_vtbl =
3457 xmlreader_QueryInterface,
3458 xmlreader_AddRef,
3459 xmlreader_Release,
3460 xmlreader_SetInput,
3461 xmlreader_GetProperty,
3462 xmlreader_SetProperty,
3463 xmlreader_Read,
3464 xmlreader_GetNodeType,
3465 xmlreader_MoveToFirstAttribute,
3466 xmlreader_MoveToNextAttribute,
3467 xmlreader_MoveToAttributeByName,
3468 xmlreader_MoveToElement,
3469 xmlreader_GetQualifiedName,
3470 xmlreader_GetNamespaceUri,
3471 xmlreader_GetLocalName,
3472 xmlreader_GetPrefix,
3473 xmlreader_GetValue,
3474 xmlreader_ReadValueChunk,
3475 xmlreader_GetBaseUri,
3476 xmlreader_IsDefault,
3477 xmlreader_IsEmptyElement,
3478 xmlreader_GetLineNumber,
3479 xmlreader_GetLinePosition,
3480 xmlreader_GetAttributeCount,
3481 xmlreader_GetDepth,
3482 xmlreader_IsEOF
3485 /** IXmlReaderInput **/
3486 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3488 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3490 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3492 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3493 IsEqualGUID(riid, &IID_IUnknown))
3495 *ppvObject = iface;
3497 else
3499 WARN("interface %s not implemented\n", debugstr_guid(riid));
3500 *ppvObject = NULL;
3501 return E_NOINTERFACE;
3504 IUnknown_AddRef(iface);
3506 return S_OK;
3509 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3511 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3512 ULONG ref = InterlockedIncrement(&This->ref);
3513 TRACE("(%p)->(%d)\n", This, ref);
3514 return ref;
3517 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3519 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3520 LONG ref = InterlockedDecrement(&This->ref);
3522 TRACE("(%p)->(%d)\n", This, ref);
3524 if (ref == 0)
3526 IMalloc *imalloc = This->imalloc;
3527 if (This->input) IUnknown_Release(This->input);
3528 if (This->stream) ISequentialStream_Release(This->stream);
3529 if (This->buffer) free_input_buffer(This->buffer);
3530 readerinput_free(This, This->baseuri);
3531 readerinput_free(This, This);
3532 if (imalloc) IMalloc_Release(imalloc);
3535 return ref;
3538 static const struct IUnknownVtbl xmlreaderinputvtbl =
3540 xmlreaderinput_QueryInterface,
3541 xmlreaderinput_AddRef,
3542 xmlreaderinput_Release
3545 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3547 xmlreader *reader;
3548 int i;
3550 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3552 if (!IsEqualGUID(riid, &IID_IXmlReader))
3554 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
3555 return E_FAIL;
3558 if (imalloc)
3559 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3560 else
3561 reader = heap_alloc(sizeof(*reader));
3562 if (!reader)
3563 return E_OUTOFMEMORY;
3565 memset(reader, 0, sizeof(*reader));
3566 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3567 reader->ref = 1;
3568 reader->state = XmlReadState_Closed;
3569 reader->instate = XmlReadInState_Initial;
3570 reader->resumestate = XmlReadResumeState_Initial;
3571 reader->dtdmode = DtdProcessing_Prohibit;
3572 reader->imalloc = imalloc;
3573 if (imalloc) IMalloc_AddRef(imalloc);
3574 reader->nodetype = XmlNodeType_None;
3575 list_init(&reader->attrs);
3576 list_init(&reader->nsdef);
3577 list_init(&reader->ns);
3578 list_init(&reader->elements);
3579 reader->max_depth = 256;
3581 for (i = 0; i < StringValue_Last; i++)
3582 reader->strvalues[i] = strval_empty;
3584 *obj = &reader->IXmlReader_iface;
3586 TRACE("returning iface %p\n", *obj);
3588 return S_OK;
3591 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3592 IMalloc *imalloc,
3593 LPCWSTR encoding,
3594 BOOL hint,
3595 LPCWSTR base_uri,
3596 IXmlReaderInput **ppInput)
3598 xmlreaderinput *readerinput;
3599 HRESULT hr;
3601 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3602 hint, wine_dbgstr_w(base_uri), ppInput);
3604 if (!stream || !ppInput) return E_INVALIDARG;
3606 if (imalloc)
3607 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3608 else
3609 readerinput = heap_alloc(sizeof(*readerinput));
3610 if(!readerinput) return E_OUTOFMEMORY;
3612 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3613 readerinput->ref = 1;
3614 readerinput->imalloc = imalloc;
3615 readerinput->stream = NULL;
3616 if (imalloc) IMalloc_AddRef(imalloc);
3617 readerinput->encoding = parse_encoding_name(encoding, -1);
3618 readerinput->hint = hint;
3619 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3620 readerinput->pending = 0;
3622 hr = alloc_input_buffer(readerinput);
3623 if (hr != S_OK)
3625 readerinput_free(readerinput, readerinput->baseuri);
3626 readerinput_free(readerinput, readerinput);
3627 if (imalloc) IMalloc_Release(imalloc);
3628 return hr;
3630 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3632 *ppInput = &readerinput->IXmlReaderInput_iface;
3634 TRACE("returning iface %p\n", *ppInput);
3636 return S_OK;