xmllite: Improve GetValue() for namespace definition nodes.
[wine.git] / dlls / xmllite / reader.c
blob47649f5a8f7d7633a1b8e882d7a8465b0c89db68
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW[] = {'\"',0};
90 static const WCHAR quoteW[] = {'\'',0};
91 static const WCHAR ltW[] = {'<',0};
92 static const WCHAR gtW[] = {'>',0};
93 static const WCHAR commentW[] = {'<','!','-','-',0};
94 static const WCHAR piW[] = {'<','?',0};
96 static const char *debugstr_nodetype(XmlNodeType nodetype)
98 static const char * const type_names[] =
100 "None",
101 "Element",
102 "Attribute",
103 "Text",
104 "CDATA",
107 "ProcessingInstruction",
108 "Comment",
110 "DocumentType",
113 "Whitespace",
115 "EndElement",
117 "XmlDeclaration"
120 if (nodetype > _XmlNodeType_Last)
121 return wine_dbg_sprintf("unknown type=%d", nodetype);
123 return type_names[nodetype];
126 static const char *debugstr_reader_prop(XmlReaderProperty prop)
128 static const char * const prop_names[] =
130 "MultiLanguage",
131 "ConformanceLevel",
132 "RandomAccess",
133 "XmlResolver",
134 "DtdProcessing",
135 "ReadState",
136 "MaxElementDepth",
137 "MaxEntityExpansion"
140 if (prop > _XmlReaderProperty_Last)
141 return wine_dbg_sprintf("unknown property=%d", prop);
143 return prop_names[prop];
146 struct xml_encoding_data
148 const WCHAR *name;
149 xml_encoding enc;
150 UINT cp;
153 static const struct xml_encoding_data xml_encoding_map[] = {
154 { utf16W, XmlEncoding_UTF16, ~0 },
155 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
158 const WCHAR *get_encoding_name(xml_encoding encoding)
160 return xml_encoding_map[encoding].name;
163 xml_encoding get_encoding_from_codepage(UINT codepage)
165 int i;
166 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
168 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
170 return XmlEncoding_Unknown;
173 typedef struct
175 char *data;
176 UINT cur;
177 unsigned int allocated;
178 unsigned int written;
179 } encoded_buffer;
181 typedef struct input_buffer input_buffer;
183 typedef struct
185 IXmlReaderInput IXmlReaderInput_iface;
186 LONG ref;
187 /* reference passed on IXmlReaderInput creation, is kept when input is created */
188 IUnknown *input;
189 IMalloc *imalloc;
190 xml_encoding encoding;
191 BOOL hint;
192 WCHAR *baseuri;
193 /* stream reference set after SetInput() call from reader,
194 stored as sequential stream, cause currently
195 optimizations possible with IStream aren't implemented */
196 ISequentialStream *stream;
197 input_buffer *buffer;
198 unsigned int pending : 1;
199 } xmlreaderinput;
201 static const struct IUnknownVtbl xmlreaderinputvtbl;
203 /* Structure to hold parsed string of specific length.
205 Reader stores node value as 'start' pointer, on request
206 a null-terminated version of it is allocated.
208 To init a strval variable use reader_init_strval(),
209 to set strval as a reader value use reader_set_strval().
211 typedef struct
213 WCHAR *str; /* allocated null-terminated string */
214 UINT len; /* length in WCHARs, altered after ReadValueChunk */
215 UINT start; /* input position where value starts */
216 } strval;
218 static WCHAR emptyW[] = {0};
219 static WCHAR xmlW[] = {'x','m','l',0};
220 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
221 static const strval strval_empty = { emptyW };
222 static const strval strval_xml = { xmlW, 3 };
223 static const strval strval_xmlns = { xmlnsW, 5 };
225 struct attribute
227 struct list entry;
228 strval prefix;
229 strval localname;
230 strval value;
233 struct element
235 struct list entry;
236 strval prefix;
237 strval localname;
238 strval qname;
241 struct ns
243 struct list entry;
244 strval prefix;
245 strval uri;
246 struct element *element;
249 typedef struct
251 IXmlReader IXmlReader_iface;
252 LONG ref;
253 xmlreaderinput *input;
254 IMalloc *imalloc;
255 XmlReadState state;
256 XmlReaderInternalState instate;
257 XmlReaderResumeState resumestate;
258 XmlNodeType nodetype;
259 DtdProcessing dtdmode;
260 IXmlResolver *resolver;
261 IUnknown *mlang;
262 UINT line, pos; /* reader position in XML stream */
263 struct list attrs; /* attributes list for current node */
264 struct attribute *attr; /* current attribute */
265 UINT attr_count;
266 struct list nsdef;
267 struct list ns;
268 struct list elements;
269 strval strvalues[StringValue_Last];
270 UINT depth;
271 UINT max_depth;
272 BOOL is_empty_element;
273 struct element empty_element;
274 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
275 } xmlreader;
277 struct input_buffer
279 encoded_buffer utf16;
280 encoded_buffer encoded;
281 UINT code_page;
282 xmlreaderinput *input;
285 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
287 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
290 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
292 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
295 /* reader memory allocation functions */
296 static inline void *reader_alloc(xmlreader *reader, size_t len)
298 return m_alloc(reader->imalloc, len);
301 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
303 void *ret = reader_alloc(reader, len);
304 if (ret)
305 memset(ret, 0, len);
306 return ret;
309 static inline void reader_free(xmlreader *reader, void *mem)
311 m_free(reader->imalloc, mem);
314 /* Just return pointer from offset, no attempt to read more. */
315 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
317 encoded_buffer *buffer = &reader->input->buffer->utf16;
318 return (WCHAR*)buffer->data + offset;
321 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
323 return v->str ? v->str : reader_get_ptr2(reader, v->start);
326 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
328 *dest = *src;
330 if (src->str != strval_empty.str)
332 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
333 if (!dest->str) return E_OUTOFMEMORY;
334 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
335 dest->str[dest->len] = 0;
336 dest->start = 0;
339 return S_OK;
342 /* reader input memory allocation functions */
343 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
345 return m_alloc(input->imalloc, len);
348 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
350 return m_realloc(input->imalloc, mem, len);
353 static inline void readerinput_free(xmlreaderinput *input, void *mem)
355 m_free(input->imalloc, mem);
358 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
360 LPWSTR ret = NULL;
362 if(str) {
363 DWORD size;
365 size = (strlenW(str)+1)*sizeof(WCHAR);
366 ret = readerinput_alloc(input, size);
367 if (ret) memcpy(ret, str, size);
370 return ret;
373 static void reader_clear_attrs(xmlreader *reader)
375 struct attribute *attr, *attr2;
376 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
378 reader_free(reader, attr);
380 list_init(&reader->attrs);
381 reader->attr_count = 0;
382 reader->attr = NULL;
385 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
386 while we are on a node with attributes */
387 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *value)
389 struct attribute *attr;
391 attr = reader_alloc(reader, sizeof(*attr));
392 if (!attr) return E_OUTOFMEMORY;
394 if (prefix)
395 attr->prefix = *prefix;
396 else
397 memset(&attr->prefix, 0, sizeof(attr->prefix));
398 attr->localname = *localname;
399 attr->value = *value;
400 list_add_tail(&reader->attrs, &attr->entry);
401 reader->attr_count++;
403 return S_OK;
406 /* This one frees stored string value if needed */
407 static void reader_free_strvalued(xmlreader *reader, strval *v)
409 if (v->str != strval_empty.str)
411 reader_free(reader, v->str);
412 *v = strval_empty;
416 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
418 v->start = start;
419 v->len = len;
420 v->str = NULL;
423 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
425 return debugstr_wn(reader_get_strptr(reader, v), v->len);
428 /* used to initialize from constant string */
429 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
431 v->start = 0;
432 v->len = len;
433 v->str = str;
436 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
438 reader_free_strvalued(reader, &reader->strvalues[type]);
441 static void reader_free_strvalues(xmlreader *reader)
443 int type;
444 for (type = 0; type < StringValue_Last; type++)
445 reader_free_strvalue(reader, type);
448 /* This helper should only be used to test if strings are the same,
449 it doesn't try to sort. */
450 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
452 if (str1->len != str2->len) return 0;
453 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
456 static void reader_clear_elements(xmlreader *reader)
458 struct element *elem, *elem2;
459 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
461 reader_free_strvalued(reader, &elem->prefix);
462 reader_free_strvalued(reader, &elem->localname);
463 reader_free_strvalued(reader, &elem->qname);
464 reader_free(reader, elem);
466 list_init(&reader->elements);
467 reader->is_empty_element = FALSE;
470 static HRESULT reader_inc_depth(xmlreader *reader)
472 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
473 return S_OK;
476 static void reader_dec_depth(xmlreader *reader)
478 if (reader->depth > 1) reader->depth--;
481 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
483 struct ns *ns;
484 HRESULT hr;
486 ns = reader_alloc(reader, sizeof(*ns));
487 if (!ns) return E_OUTOFMEMORY;
489 if (def)
490 memset(&ns->prefix, 0, sizeof(ns->prefix));
491 else {
492 hr = reader_strvaldup(reader, prefix, &ns->prefix);
493 if (FAILED(hr)) {
494 reader_free(reader, ns);
495 return hr;
499 hr = reader_strvaldup(reader, uri, &ns->uri);
500 if (FAILED(hr)) {
501 reader_free_strvalued(reader, &ns->prefix);
502 reader_free(reader, ns);
503 return hr;
506 ns->element = NULL;
507 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
508 return hr;
511 static void reader_free_element(xmlreader *reader, struct element *element)
513 reader_free_strvalued(reader, &element->prefix);
514 reader_free_strvalued(reader, &element->localname);
515 reader_free_strvalued(reader, &element->qname);
516 reader_free(reader, element);
519 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
521 struct ns *ns;
523 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
524 if (ns->element)
525 break;
526 ns->element = element;
529 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
530 if (ns->element)
531 break;
532 ns->element = element;
536 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
537 strval *qname)
539 struct element *element;
540 HRESULT hr;
542 if (!list_empty(&reader->elements))
544 hr = reader_inc_depth(reader);
545 if (FAILED(hr))
546 return hr;
549 element = reader_alloc_zero(reader, sizeof(*element));
550 if (!element)
551 goto failed;
553 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) != S_OK ||
554 (hr = reader_strvaldup(reader, localname, &element->localname)) != S_OK ||
555 (hr = reader_strvaldup(reader, qname, &element->qname)) != S_OK)
557 reader_free_element(reader, element);
558 goto failed;
561 list_add_head(&reader->elements, &element->entry);
562 reader_mark_ns_nodes(reader, element);
563 reader->is_empty_element = FALSE;
565 failed:
566 reader_dec_depth(reader);
567 return hr;
570 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
572 struct ns *ns, *ns2;
574 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
575 if (ns->element != element)
576 break;
578 list_remove(&ns->entry);
579 reader_free_strvalued(reader, &ns->prefix);
580 reader_free_strvalued(reader, &ns->uri);
581 reader_free(reader, ns);
584 if (!list_empty(&reader->nsdef)) {
585 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
586 if (ns->element == element) {
587 list_remove(&ns->entry);
588 reader_free_strvalued(reader, &ns->prefix);
589 reader_free_strvalued(reader, &ns->uri);
590 reader_free(reader, ns);
595 static void reader_pop_element(xmlreader *reader)
597 struct element *element;
599 if (list_empty(&reader->elements))
600 return;
602 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
603 list_remove(&element->entry);
605 reader_pop_ns_nodes(reader, element);
606 reader_free_element(reader, element);
607 reader_dec_depth(reader);
609 /* It was a root element, the rest is expected as Misc */
610 if (list_empty(&reader->elements))
611 reader->instate = XmlReadInState_MiscEnd;
614 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
615 means node value is to be determined. */
616 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
618 strval *v = &reader->strvalues[type];
620 reader_free_strvalue(reader, type);
621 if (!value)
623 v->str = NULL;
624 v->start = 0;
625 v->len = 0;
626 return;
629 if (value->str == strval_empty.str)
630 *v = *value;
631 else
633 if (type == StringValue_Value)
635 /* defer allocation for value string */
636 v->str = NULL;
637 v->start = value->start;
638 v->len = value->len;
640 else
642 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
643 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
644 v->str[value->len] = 0;
645 v->len = value->len;
650 static inline int is_reader_pending(xmlreader *reader)
652 return reader->input->pending;
655 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
657 const int initial_len = 0x2000;
658 buffer->data = readerinput_alloc(input, initial_len);
659 if (!buffer->data) return E_OUTOFMEMORY;
661 memset(buffer->data, 0, 4);
662 buffer->cur = 0;
663 buffer->allocated = initial_len;
664 buffer->written = 0;
666 return S_OK;
669 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
671 readerinput_free(input, buffer->data);
674 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
676 if (encoding == XmlEncoding_Unknown)
678 FIXME("unsupported encoding %d\n", encoding);
679 return E_NOTIMPL;
682 *cp = xml_encoding_map[encoding].cp;
684 return S_OK;
687 xml_encoding parse_encoding_name(const WCHAR *name, int len)
689 int min, max, n, c;
691 if (!name) return XmlEncoding_Unknown;
693 min = 0;
694 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
696 while (min <= max)
698 n = (min+max)/2;
700 if (len != -1)
701 c = strncmpiW(xml_encoding_map[n].name, name, len);
702 else
703 c = strcmpiW(xml_encoding_map[n].name, name);
704 if (!c)
705 return xml_encoding_map[n].enc;
707 if (c > 0)
708 max = n-1;
709 else
710 min = n+1;
713 return XmlEncoding_Unknown;
716 static HRESULT alloc_input_buffer(xmlreaderinput *input)
718 input_buffer *buffer;
719 HRESULT hr;
721 input->buffer = NULL;
723 buffer = readerinput_alloc(input, sizeof(*buffer));
724 if (!buffer) return E_OUTOFMEMORY;
726 buffer->input = input;
727 buffer->code_page = ~0; /* code page is unknown at this point */
728 hr = init_encoded_buffer(input, &buffer->utf16);
729 if (hr != S_OK) {
730 readerinput_free(input, buffer);
731 return hr;
734 hr = init_encoded_buffer(input, &buffer->encoded);
735 if (hr != S_OK) {
736 free_encoded_buffer(input, &buffer->utf16);
737 readerinput_free(input, buffer);
738 return hr;
741 input->buffer = buffer;
742 return S_OK;
745 static void free_input_buffer(input_buffer *buffer)
747 free_encoded_buffer(buffer->input, &buffer->encoded);
748 free_encoded_buffer(buffer->input, &buffer->utf16);
749 readerinput_free(buffer->input, buffer);
752 static void readerinput_release_stream(xmlreaderinput *readerinput)
754 if (readerinput->stream) {
755 ISequentialStream_Release(readerinput->stream);
756 readerinput->stream = NULL;
760 /* Queries already stored interface for IStream/ISequentialStream.
761 Interface supplied on creation will be overwritten */
762 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
764 HRESULT hr;
766 readerinput_release_stream(readerinput);
767 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
768 if (hr != S_OK)
769 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
771 return hr;
774 /* reads a chunk to raw buffer */
775 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
777 encoded_buffer *buffer = &readerinput->buffer->encoded;
778 /* to make sure aligned length won't exceed allocated length */
779 ULONG len = buffer->allocated - buffer->written - 4;
780 ULONG read;
781 HRESULT hr;
783 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
784 variable width encodings like UTF-8 */
785 len = (len + 3) & ~3;
786 /* try to use allocated space or grow */
787 if (buffer->allocated - buffer->written < len)
789 buffer->allocated *= 2;
790 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
791 len = buffer->allocated - buffer->written;
794 read = 0;
795 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
796 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
797 readerinput->pending = hr == E_PENDING;
798 if (FAILED(hr)) return hr;
799 buffer->written += read;
801 return hr;
804 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
805 static void readerinput_grow(xmlreaderinput *readerinput, int length)
807 encoded_buffer *buffer = &readerinput->buffer->utf16;
809 length *= sizeof(WCHAR);
810 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
811 if (buffer->allocated < buffer->written + length + 4)
813 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
814 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
815 buffer->allocated = grown_size;
819 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
821 static const char startA[] = {'<','?'};
822 static const char commentA[] = {'<','!'};
823 encoded_buffer *buffer = &readerinput->buffer->encoded;
824 unsigned char *ptr = (unsigned char*)buffer->data;
826 return !memcmp(buffer->data, startA, sizeof(startA)) ||
827 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
828 /* test start byte */
829 (ptr[0] == '<' &&
831 (ptr[1] && (ptr[1] <= 0x7f)) ||
832 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
833 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
834 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
838 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
840 encoded_buffer *buffer = &readerinput->buffer->encoded;
841 static const WCHAR startW[] = {'<','?'};
842 static const WCHAR commentW[] = {'<','!'};
843 static const char utf8bom[] = {0xef,0xbb,0xbf};
844 static const char utf16lebom[] = {0xff,0xfe};
846 *enc = XmlEncoding_Unknown;
848 if (buffer->written <= 3)
850 HRESULT hr = readerinput_growraw(readerinput);
851 if (FAILED(hr)) return hr;
852 if (buffer->written <= 3) return MX_E_INPUTEND;
855 /* try start symbols if we have enough data to do that, input buffer should contain
856 first chunk already */
857 if (readerinput_is_utf8(readerinput))
858 *enc = XmlEncoding_UTF8;
859 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
860 !memcmp(buffer->data, commentW, sizeof(commentW)))
861 *enc = XmlEncoding_UTF16;
862 /* try with BOM now */
863 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
865 buffer->cur += sizeof(utf8bom);
866 *enc = XmlEncoding_UTF8;
868 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
870 buffer->cur += sizeof(utf16lebom);
871 *enc = XmlEncoding_UTF16;
874 return S_OK;
877 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
879 encoded_buffer *buffer = &readerinput->buffer->encoded;
880 int len = buffer->written;
882 /* complete single byte char */
883 if (!(buffer->data[len-1] & 0x80)) return len;
885 /* find start byte of multibyte char */
886 while (--len && !(buffer->data[len] & 0xc0))
889 return len;
892 /* Returns byte length of complete char sequence for buffer code page,
893 it's relative to current buffer position which is currently used for BOM handling
894 only. */
895 static int readerinput_get_convlen(xmlreaderinput *readerinput)
897 encoded_buffer *buffer = &readerinput->buffer->encoded;
898 int len;
900 if (readerinput->buffer->code_page == CP_UTF8)
901 len = readerinput_get_utf8_convlen(readerinput);
902 else
903 len = buffer->written;
905 TRACE("%d\n", len - buffer->cur);
906 return len - buffer->cur;
909 /* It's possible that raw buffer has some leftovers from last conversion - some char
910 sequence that doesn't represent a full code point. Length argument should be calculated with
911 readerinput_get_convlen(), if it's -1 it will be calculated here. */
912 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
914 encoded_buffer *buffer = &readerinput->buffer->encoded;
916 if (len == -1)
917 len = readerinput_get_convlen(readerinput);
919 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
920 /* everything below cur is lost too */
921 buffer->written -= len + buffer->cur;
922 /* after this point we don't need cur offset really,
923 it's used only to mark where actual data begins when first chunk is read */
924 buffer->cur = 0;
927 /* note that raw buffer content is kept */
928 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
930 encoded_buffer *src = &readerinput->buffer->encoded;
931 encoded_buffer *dest = &readerinput->buffer->utf16;
932 int len, dest_len;
933 HRESULT hr;
934 WCHAR *ptr;
935 UINT cp;
937 hr = get_code_page(enc, &cp);
938 if (FAILED(hr)) return;
940 readerinput->buffer->code_page = cp;
941 len = readerinput_get_convlen(readerinput);
943 TRACE("switching to cp %d\n", cp);
945 /* just copy in this case */
946 if (enc == XmlEncoding_UTF16)
948 readerinput_grow(readerinput, len);
949 memcpy(dest->data, src->data + src->cur, len);
950 dest->written += len*sizeof(WCHAR);
951 return;
954 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
955 readerinput_grow(readerinput, dest_len);
956 ptr = (WCHAR*)dest->data;
957 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
958 ptr[dest_len] = 0;
959 dest->written += dest_len*sizeof(WCHAR);
962 /* shrinks parsed data a buffer begins with */
963 static void reader_shrink(xmlreader *reader)
965 encoded_buffer *buffer = &reader->input->buffer->utf16;
967 /* avoid to move too often using threshold shrink length */
968 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
970 buffer->written -= buffer->cur*sizeof(WCHAR);
971 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
972 buffer->cur = 0;
973 *(WCHAR*)&buffer->data[buffer->written] = 0;
977 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
978 It won't attempt to shrink but will grow destination buffer if needed */
979 static HRESULT reader_more(xmlreader *reader)
981 xmlreaderinput *readerinput = reader->input;
982 encoded_buffer *src = &readerinput->buffer->encoded;
983 encoded_buffer *dest = &readerinput->buffer->utf16;
984 UINT cp = readerinput->buffer->code_page;
985 int len, dest_len;
986 HRESULT hr;
987 WCHAR *ptr;
989 /* get some raw data from stream first */
990 hr = readerinput_growraw(readerinput);
991 len = readerinput_get_convlen(readerinput);
993 /* just copy for UTF-16 case */
994 if (cp == ~0)
996 readerinput_grow(readerinput, len);
997 memcpy(dest->data + dest->written, src->data + src->cur, len);
998 dest->written += len*sizeof(WCHAR);
999 return hr;
1002 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1003 readerinput_grow(readerinput, dest_len);
1004 ptr = (WCHAR*)(dest->data + dest->written);
1005 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1006 ptr[dest_len] = 0;
1007 dest->written += dest_len*sizeof(WCHAR);
1008 /* get rid of processed data */
1009 readerinput_shrinkraw(readerinput, len);
1011 return hr;
1014 static inline UINT reader_get_cur(xmlreader *reader)
1016 return reader->input->buffer->utf16.cur;
1019 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1021 encoded_buffer *buffer = &reader->input->buffer->utf16;
1022 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1023 if (!*ptr) reader_more(reader);
1024 return (WCHAR*)buffer->data + buffer->cur;
1027 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1029 int i=0;
1030 const WCHAR *ptr = reader_get_ptr(reader);
1031 while (str[i])
1033 if (!ptr[i])
1035 reader_more(reader);
1036 ptr = reader_get_ptr(reader);
1038 if (str[i] != ptr[i])
1039 return ptr[i] - str[i];
1040 i++;
1042 return 0;
1045 /* moves cursor n WCHARs forward */
1046 static void reader_skipn(xmlreader *reader, int n)
1048 encoded_buffer *buffer = &reader->input->buffer->utf16;
1049 const WCHAR *ptr = reader_get_ptr(reader);
1051 while (*ptr++ && n--)
1053 buffer->cur++;
1054 reader->pos++;
1058 static inline BOOL is_wchar_space(WCHAR ch)
1060 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1063 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1064 static int reader_skipspaces(xmlreader *reader)
1066 encoded_buffer *buffer = &reader->input->buffer->utf16;
1067 const WCHAR *ptr = reader_get_ptr(reader);
1068 UINT start = reader_get_cur(reader);
1070 while (is_wchar_space(*ptr))
1072 if (*ptr == '\r')
1073 reader->pos = 0;
1074 else if (*ptr == '\n')
1076 reader->line++;
1077 reader->pos = 0;
1079 else
1080 reader->pos++;
1082 buffer->cur++;
1083 ptr = reader_get_ptr(reader);
1086 return reader_get_cur(reader) - start;
1089 /* [26] VersionNum ::= '1.' [0-9]+ */
1090 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1092 static const WCHAR onedotW[] = {'1','.',0};
1093 WCHAR *ptr, *ptr2;
1094 UINT start;
1096 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1098 start = reader_get_cur(reader);
1099 /* skip "1." */
1100 reader_skipn(reader, 2);
1102 ptr2 = ptr = reader_get_ptr(reader);
1103 while (*ptr >= '0' && *ptr <= '9')
1105 reader_skipn(reader, 1);
1106 ptr = reader_get_ptr(reader);
1109 if (ptr2 == ptr) return WC_E_DIGIT;
1110 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1111 TRACE("version=%s\n", debug_strval(reader, val));
1112 return S_OK;
1115 /* [25] Eq ::= S? '=' S? */
1116 static HRESULT reader_parse_eq(xmlreader *reader)
1118 static const WCHAR eqW[] = {'=',0};
1119 reader_skipspaces(reader);
1120 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1121 /* skip '=' */
1122 reader_skipn(reader, 1);
1123 reader_skipspaces(reader);
1124 return S_OK;
1127 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1128 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1130 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1131 strval val, name;
1132 HRESULT hr;
1134 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1136 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1137 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1138 /* skip 'version' */
1139 reader_skipn(reader, 7);
1141 hr = reader_parse_eq(reader);
1142 if (FAILED(hr)) return hr;
1144 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1145 return WC_E_QUOTE;
1146 /* skip "'"|'"' */
1147 reader_skipn(reader, 1);
1149 hr = reader_parse_versionnum(reader, &val);
1150 if (FAILED(hr)) return hr;
1152 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1153 return WC_E_QUOTE;
1155 /* skip "'"|'"' */
1156 reader_skipn(reader, 1);
1158 return reader_add_attr(reader, NULL, &name, &val);
1161 /* ([A-Za-z0-9._] | '-') */
1162 static inline BOOL is_wchar_encname(WCHAR ch)
1164 return ((ch >= 'A' && ch <= 'Z') ||
1165 (ch >= 'a' && ch <= 'z') ||
1166 (ch >= '0' && ch <= '9') ||
1167 (ch == '.') || (ch == '_') ||
1168 (ch == '-'));
1171 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1172 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1174 WCHAR *start = reader_get_ptr(reader), *ptr;
1175 xml_encoding enc;
1176 int len;
1178 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1179 return WC_E_ENCNAME;
1181 val->start = reader_get_cur(reader);
1183 ptr = start;
1184 while (is_wchar_encname(*++ptr))
1187 len = ptr - start;
1188 enc = parse_encoding_name(start, len);
1189 TRACE("encoding name %s\n", debugstr_wn(start, len));
1190 val->str = start;
1191 val->len = len;
1193 if (enc == XmlEncoding_Unknown)
1194 return WC_E_ENCNAME;
1196 /* skip encoding name */
1197 reader_skipn(reader, len);
1198 return S_OK;
1201 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1202 static HRESULT reader_parse_encdecl(xmlreader *reader)
1204 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1205 strval name, val;
1206 HRESULT hr;
1208 if (!reader_skipspaces(reader)) return S_FALSE;
1210 if (reader_cmp(reader, encodingW)) return S_FALSE;
1211 name.str = reader_get_ptr(reader);
1212 name.start = reader_get_cur(reader);
1213 name.len = 8;
1214 /* skip 'encoding' */
1215 reader_skipn(reader, 8);
1217 hr = reader_parse_eq(reader);
1218 if (FAILED(hr)) return hr;
1220 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1221 return WC_E_QUOTE;
1222 /* skip "'"|'"' */
1223 reader_skipn(reader, 1);
1225 hr = reader_parse_encname(reader, &val);
1226 if (FAILED(hr)) return hr;
1228 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1229 return WC_E_QUOTE;
1231 /* skip "'"|'"' */
1232 reader_skipn(reader, 1);
1234 return reader_add_attr(reader, NULL, &name, &val);
1237 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1238 static HRESULT reader_parse_sddecl(xmlreader *reader)
1240 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1241 static const WCHAR yesW[] = {'y','e','s',0};
1242 static const WCHAR noW[] = {'n','o',0};
1243 strval name, val;
1244 UINT start;
1245 HRESULT hr;
1247 if (!reader_skipspaces(reader)) return S_FALSE;
1249 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1250 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1251 /* skip 'standalone' */
1252 reader_skipn(reader, 10);
1254 hr = reader_parse_eq(reader);
1255 if (FAILED(hr)) return hr;
1257 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1258 return WC_E_QUOTE;
1259 /* skip "'"|'"' */
1260 reader_skipn(reader, 1);
1262 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1263 return WC_E_XMLDECL;
1265 start = reader_get_cur(reader);
1266 /* skip 'yes'|'no' */
1267 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1268 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1269 TRACE("standalone=%s\n", debug_strval(reader, &val));
1271 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1272 return WC_E_QUOTE;
1273 /* skip "'"|'"' */
1274 reader_skipn(reader, 1);
1276 return reader_add_attr(reader, NULL, &name, &val);
1279 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1280 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1282 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1283 static const WCHAR declcloseW[] = {'?','>',0};
1284 HRESULT hr;
1286 /* check if we have "<?xml " */
1287 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1289 reader_skipn(reader, 5);
1290 hr = reader_parse_versioninfo(reader);
1291 if (FAILED(hr))
1292 return hr;
1294 hr = reader_parse_encdecl(reader);
1295 if (FAILED(hr))
1296 return hr;
1298 hr = reader_parse_sddecl(reader);
1299 if (FAILED(hr))
1300 return hr;
1302 reader_skipspaces(reader);
1303 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1304 reader_skipn(reader, 2);
1306 reader_inc_depth(reader);
1307 reader->nodetype = XmlNodeType_XmlDeclaration;
1308 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1309 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1310 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1312 return S_OK;
1315 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1316 static HRESULT reader_parse_comment(xmlreader *reader)
1318 WCHAR *ptr;
1319 UINT start;
1321 if (reader->resumestate == XmlReadResumeState_Comment)
1323 start = reader->resume[XmlReadResume_Body];
1324 ptr = reader_get_ptr(reader);
1326 else
1328 /* skip '<!--' */
1329 reader_skipn(reader, 4);
1330 reader_shrink(reader);
1331 ptr = reader_get_ptr(reader);
1332 start = reader_get_cur(reader);
1333 reader->nodetype = XmlNodeType_Comment;
1334 reader->resume[XmlReadResume_Body] = start;
1335 reader->resumestate = XmlReadResumeState_Comment;
1336 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1337 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1338 reader_set_strvalue(reader, StringValue_Value, NULL);
1341 /* will exit when there's no more data, it won't attempt to
1342 read more from stream */
1343 while (*ptr)
1345 if (ptr[0] == '-')
1347 if (ptr[1] == '-')
1349 if (ptr[2] == '>')
1351 strval value;
1353 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1354 TRACE("%s\n", debug_strval(reader, &value));
1356 /* skip rest of markup '->' */
1357 reader_skipn(reader, 3);
1359 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1360 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1361 reader_set_strvalue(reader, StringValue_Value, &value);
1362 reader->resume[XmlReadResume_Body] = 0;
1363 reader->resumestate = XmlReadResumeState_Initial;
1364 return S_OK;
1366 else
1367 return WC_E_COMMENT;
1371 reader_skipn(reader, 1);
1372 ptr++;
1375 return S_OK;
1378 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1379 static inline BOOL is_char(WCHAR ch)
1381 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1382 (ch >= 0x20 && ch <= 0xd7ff) ||
1383 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1384 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1385 (ch >= 0xe000 && ch <= 0xfffd);
1388 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1389 static inline BOOL is_pubchar(WCHAR ch)
1391 return (ch == ' ') ||
1392 (ch >= 'a' && ch <= 'z') ||
1393 (ch >= 'A' && ch <= 'Z') ||
1394 (ch >= '0' && ch <= '9') ||
1395 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1396 (ch == '=') || (ch == '?') ||
1397 (ch == '@') || (ch == '!') ||
1398 (ch >= '#' && ch <= '%') || /* #$% */
1399 (ch == '_') || (ch == '\r') || (ch == '\n');
1402 static inline BOOL is_namestartchar(WCHAR ch)
1404 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1405 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1406 (ch >= 0xc0 && ch <= 0xd6) ||
1407 (ch >= 0xd8 && ch <= 0xf6) ||
1408 (ch >= 0xf8 && ch <= 0x2ff) ||
1409 (ch >= 0x370 && ch <= 0x37d) ||
1410 (ch >= 0x37f && ch <= 0x1fff) ||
1411 (ch >= 0x200c && ch <= 0x200d) ||
1412 (ch >= 0x2070 && ch <= 0x218f) ||
1413 (ch >= 0x2c00 && ch <= 0x2fef) ||
1414 (ch >= 0x3001 && ch <= 0xd7ff) ||
1415 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1416 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1417 (ch >= 0xf900 && ch <= 0xfdcf) ||
1418 (ch >= 0xfdf0 && ch <= 0xfffd);
1421 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1422 static inline BOOL is_ncnamechar(WCHAR ch)
1424 return (ch >= 'A' && ch <= 'Z') ||
1425 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1426 (ch == '-') || (ch == '.') ||
1427 (ch >= '0' && ch <= '9') ||
1428 (ch == 0xb7) ||
1429 (ch >= 0xc0 && ch <= 0xd6) ||
1430 (ch >= 0xd8 && ch <= 0xf6) ||
1431 (ch >= 0xf8 && ch <= 0x2ff) ||
1432 (ch >= 0x300 && ch <= 0x36f) ||
1433 (ch >= 0x370 && ch <= 0x37d) ||
1434 (ch >= 0x37f && ch <= 0x1fff) ||
1435 (ch >= 0x200c && ch <= 0x200d) ||
1436 (ch >= 0x203f && ch <= 0x2040) ||
1437 (ch >= 0x2070 && ch <= 0x218f) ||
1438 (ch >= 0x2c00 && ch <= 0x2fef) ||
1439 (ch >= 0x3001 && ch <= 0xd7ff) ||
1440 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1441 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1442 (ch >= 0xf900 && ch <= 0xfdcf) ||
1443 (ch >= 0xfdf0 && ch <= 0xfffd);
1446 static inline BOOL is_namechar(WCHAR ch)
1448 return (ch == ':') || is_ncnamechar(ch);
1451 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1453 /* When we're on attribute always return attribute type, container node type is kept.
1454 Note that container is not necessarily an element, and attribute doesn't mean it's
1455 an attribute in XML spec terms. */
1456 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1459 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1460 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1461 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1462 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1463 [5] Name ::= NameStartChar (NameChar)* */
1464 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1466 WCHAR *ptr;
1467 UINT start;
1469 if (reader->resume[XmlReadResume_Name])
1471 start = reader->resume[XmlReadResume_Name];
1472 ptr = reader_get_ptr(reader);
1474 else
1476 ptr = reader_get_ptr(reader);
1477 start = reader_get_cur(reader);
1478 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1481 while (is_namechar(*ptr))
1483 reader_skipn(reader, 1);
1484 ptr = reader_get_ptr(reader);
1487 if (is_reader_pending(reader))
1489 reader->resume[XmlReadResume_Name] = start;
1490 return E_PENDING;
1492 else
1493 reader->resume[XmlReadResume_Name] = 0;
1495 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1496 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1498 return S_OK;
1501 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1502 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1504 static const WCHAR xmlW[] = {'x','m','l'};
1505 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1506 strval name;
1507 WCHAR *ptr;
1508 HRESULT hr;
1509 UINT i;
1511 hr = reader_parse_name(reader, &name);
1512 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1514 /* now that we got name check for illegal content */
1515 if (strval_eq(reader, &name, &xmlval))
1516 return WC_E_LEADINGXML;
1518 /* PITarget can't be a qualified name */
1519 ptr = reader_get_strptr(reader, &name);
1520 for (i = 0; i < name.len; i++)
1521 if (ptr[i] == ':')
1522 return i ? NC_E_NAMECOLON : WC_E_PI;
1524 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1525 *target = name;
1526 return S_OK;
1529 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1530 static HRESULT reader_parse_pi(xmlreader *reader)
1532 strval target;
1533 WCHAR *ptr;
1534 UINT start;
1535 HRESULT hr;
1537 switch (reader->resumestate)
1539 case XmlReadResumeState_Initial:
1540 /* skip '<?' */
1541 reader_skipn(reader, 2);
1542 reader_shrink(reader);
1543 reader->resumestate = XmlReadResumeState_PITarget;
1544 case XmlReadResumeState_PITarget:
1545 hr = reader_parse_pitarget(reader, &target);
1546 if (FAILED(hr)) return hr;
1547 reader_set_strvalue(reader, StringValue_LocalName, &target);
1548 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1549 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1550 reader->resumestate = XmlReadResumeState_PIBody;
1551 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1552 default:
1556 start = reader->resume[XmlReadResume_Body];
1557 ptr = reader_get_ptr(reader);
1558 while (*ptr)
1560 if (ptr[0] == '?')
1562 if (ptr[1] == '>')
1564 UINT cur = reader_get_cur(reader);
1565 strval value;
1567 /* strip all leading whitespace chars */
1568 while (start < cur)
1570 ptr = reader_get_ptr2(reader, start);
1571 if (!is_wchar_space(*ptr)) break;
1572 start++;
1575 reader_init_strvalue(start, cur-start, &value);
1577 /* skip '?>' */
1578 reader_skipn(reader, 2);
1579 TRACE("%s\n", debug_strval(reader, &value));
1580 reader->nodetype = XmlNodeType_ProcessingInstruction;
1581 reader->resumestate = XmlReadResumeState_Initial;
1582 reader->resume[XmlReadResume_Body] = 0;
1583 reader_set_strvalue(reader, StringValue_Value, &value);
1584 return S_OK;
1588 reader_skipn(reader, 1);
1589 ptr = reader_get_ptr(reader);
1592 return S_OK;
1595 /* This one is used to parse significant whitespace nodes, like in Misc production */
1596 static HRESULT reader_parse_whitespace(xmlreader *reader)
1598 switch (reader->resumestate)
1600 case XmlReadResumeState_Initial:
1601 reader_shrink(reader);
1602 reader->resumestate = XmlReadResumeState_Whitespace;
1603 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1604 reader->nodetype = XmlNodeType_Whitespace;
1605 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1606 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1607 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1608 /* fallthrough */
1609 case XmlReadResumeState_Whitespace:
1611 strval value;
1612 UINT start;
1614 reader_skipspaces(reader);
1615 if (is_reader_pending(reader)) return S_OK;
1617 start = reader->resume[XmlReadResume_Body];
1618 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1619 reader_set_strvalue(reader, StringValue_Value, &value);
1620 TRACE("%s\n", debug_strval(reader, &value));
1621 reader->resumestate = XmlReadResumeState_Initial;
1623 default:
1627 return S_OK;
1630 /* [27] Misc ::= Comment | PI | S */
1631 static HRESULT reader_parse_misc(xmlreader *reader)
1633 HRESULT hr = S_FALSE;
1635 if (reader->resumestate != XmlReadResumeState_Initial)
1637 hr = reader_more(reader);
1638 if (FAILED(hr)) return hr;
1640 /* finish current node */
1641 switch (reader->resumestate)
1643 case XmlReadResumeState_PITarget:
1644 case XmlReadResumeState_PIBody:
1645 return reader_parse_pi(reader);
1646 case XmlReadResumeState_Comment:
1647 return reader_parse_comment(reader);
1648 case XmlReadResumeState_Whitespace:
1649 return reader_parse_whitespace(reader);
1650 default:
1651 ERR("unknown resume state %d\n", reader->resumestate);
1655 while (1)
1657 const WCHAR *cur = reader_get_ptr(reader);
1659 if (is_wchar_space(*cur))
1660 hr = reader_parse_whitespace(reader);
1661 else if (!reader_cmp(reader, commentW))
1662 hr = reader_parse_comment(reader);
1663 else if (!reader_cmp(reader, piW))
1664 hr = reader_parse_pi(reader);
1665 else
1666 break;
1668 if (hr != S_FALSE) return hr;
1671 return hr;
1674 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1675 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1677 WCHAR *cur = reader_get_ptr(reader), quote;
1678 UINT start;
1680 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1682 quote = *cur;
1683 reader_skipn(reader, 1);
1685 cur = reader_get_ptr(reader);
1686 start = reader_get_cur(reader);
1687 while (is_char(*cur) && *cur != quote)
1689 reader_skipn(reader, 1);
1690 cur = reader_get_ptr(reader);
1692 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1693 if (*cur == quote) reader_skipn(reader, 1);
1695 TRACE("%s\n", debug_strval(reader, literal));
1696 return S_OK;
1699 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1700 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1701 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1703 WCHAR *cur = reader_get_ptr(reader), quote;
1704 UINT start;
1706 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1708 quote = *cur;
1709 reader_skipn(reader, 1);
1711 start = reader_get_cur(reader);
1712 cur = reader_get_ptr(reader);
1713 while (is_pubchar(*cur) && *cur != quote)
1715 reader_skipn(reader, 1);
1716 cur = reader_get_ptr(reader);
1718 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1719 if (*cur == quote) reader_skipn(reader, 1);
1721 TRACE("%s\n", debug_strval(reader, literal));
1722 return S_OK;
1725 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1726 static HRESULT reader_parse_externalid(xmlreader *reader)
1728 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1729 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1730 strval name, sys;
1731 HRESULT hr;
1732 int cnt;
1734 if (!reader_cmp(reader, publicW)) {
1735 strval pub;
1737 /* public id */
1738 reader_skipn(reader, 6);
1739 cnt = reader_skipspaces(reader);
1740 if (!cnt) return WC_E_WHITESPACE;
1742 hr = reader_parse_pub_literal(reader, &pub);
1743 if (FAILED(hr)) return hr;
1745 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1746 hr = reader_add_attr(reader, NULL, &name, &pub);
1747 if (FAILED(hr)) return hr;
1749 cnt = reader_skipspaces(reader);
1750 if (!cnt) return S_OK;
1752 /* optional system id */
1753 hr = reader_parse_sys_literal(reader, &sys);
1754 if (FAILED(hr)) return S_OK;
1756 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1757 hr = reader_add_attr(reader, NULL, &name, &sys);
1758 if (FAILED(hr)) return hr;
1760 return S_OK;
1761 } else if (!reader_cmp(reader, systemW)) {
1762 /* system id */
1763 reader_skipn(reader, 6);
1764 cnt = reader_skipspaces(reader);
1765 if (!cnt) return WC_E_WHITESPACE;
1767 hr = reader_parse_sys_literal(reader, &sys);
1768 if (FAILED(hr)) return hr;
1770 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1771 return reader_add_attr(reader, NULL, &name, &sys);
1774 return S_FALSE;
1777 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1778 static HRESULT reader_parse_dtd(xmlreader *reader)
1780 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1781 strval name;
1782 WCHAR *cur;
1783 HRESULT hr;
1785 /* check if we have "<!DOCTYPE" */
1786 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1787 reader_shrink(reader);
1789 /* DTD processing is not allowed by default */
1790 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1792 reader_skipn(reader, 9);
1793 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1795 /* name */
1796 hr = reader_parse_name(reader, &name);
1797 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1799 reader_skipspaces(reader);
1801 hr = reader_parse_externalid(reader);
1802 if (FAILED(hr)) return hr;
1804 reader_skipspaces(reader);
1806 cur = reader_get_ptr(reader);
1807 if (*cur != '>')
1809 FIXME("internal subset parsing not implemented\n");
1810 return E_NOTIMPL;
1813 /* skip '>' */
1814 reader_skipn(reader, 1);
1816 reader->nodetype = XmlNodeType_DocumentType;
1817 reader_set_strvalue(reader, StringValue_LocalName, &name);
1818 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1820 return S_OK;
1823 /* [11 NS] LocalPart ::= NCName */
1824 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1826 WCHAR *ptr;
1827 UINT start;
1829 if (reader->resume[XmlReadResume_Local])
1831 start = reader->resume[XmlReadResume_Local];
1832 ptr = reader_get_ptr(reader);
1834 else
1836 ptr = reader_get_ptr(reader);
1837 start = reader_get_cur(reader);
1840 while (is_ncnamechar(*ptr))
1842 reader_skipn(reader, 1);
1843 ptr = reader_get_ptr(reader);
1846 if (is_reader_pending(reader))
1848 reader->resume[XmlReadResume_Local] = start;
1849 return E_PENDING;
1851 else
1852 reader->resume[XmlReadResume_Local] = 0;
1854 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1856 return S_OK;
1859 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1860 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1861 [9 NS] UnprefixedName ::= LocalPart
1862 [10 NS] Prefix ::= NCName */
1863 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1865 WCHAR *ptr;
1866 UINT start;
1867 HRESULT hr;
1869 if (reader->resume[XmlReadResume_Name])
1871 start = reader->resume[XmlReadResume_Name];
1872 ptr = reader_get_ptr(reader);
1874 else
1876 ptr = reader_get_ptr(reader);
1877 start = reader_get_cur(reader);
1878 reader->resume[XmlReadResume_Name] = start;
1879 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1882 if (reader->resume[XmlReadResume_Local])
1884 hr = reader_parse_local(reader, local);
1885 if (FAILED(hr)) return hr;
1887 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1888 local->start - reader->resume[XmlReadResume_Name] - 1,
1889 prefix);
1891 else
1893 /* skip prefix part */
1894 while (is_ncnamechar(*ptr))
1896 reader_skipn(reader, 1);
1897 ptr = reader_get_ptr(reader);
1900 if (is_reader_pending(reader)) return E_PENDING;
1902 /* got a qualified name */
1903 if (*ptr == ':')
1905 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1907 /* skip ':' */
1908 reader_skipn(reader, 1);
1909 hr = reader_parse_local(reader, local);
1910 if (FAILED(hr)) return hr;
1912 else
1914 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1915 reader_init_strvalue(0, 0, prefix);
1919 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1921 if (prefix->len)
1922 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1923 else
1924 TRACE("ncname %s\n", debug_strval(reader, local));
1926 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1927 /* count ':' too */
1928 (prefix->len ? prefix->len + 1 : 0) + local->len,
1929 qname);
1931 reader->resume[XmlReadResume_Name] = 0;
1932 reader->resume[XmlReadResume_Local] = 0;
1934 return S_OK;
1937 /* Applies normalization rules to a single char, used for attribute values.
1939 Rules include 2 steps:
1941 1) replacing \r\n with a single \n;
1942 2) replacing all whitespace chars with ' '.
1945 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1947 encoded_buffer *buffer = &reader->input->buffer->utf16;
1949 if (!is_wchar_space(*ptr)) return;
1951 if (*ptr == '\r' && *(ptr+1) == '\n')
1953 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1954 memmove(ptr+1, ptr+2, len);
1956 *ptr = ' ';
1959 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1961 static const WCHAR entltW[] = {'l','t'};
1962 static const WCHAR entgtW[] = {'g','t'};
1963 static const WCHAR entampW[] = {'a','m','p'};
1964 static const WCHAR entaposW[] = {'a','p','o','s'};
1965 static const WCHAR entquotW[] = {'q','u','o','t'};
1966 static const strval lt = { (WCHAR*)entltW, 2 };
1967 static const strval gt = { (WCHAR*)entgtW, 2 };
1968 static const strval amp = { (WCHAR*)entampW, 3 };
1969 static const strval apos = { (WCHAR*)entaposW, 4 };
1970 static const strval quot = { (WCHAR*)entquotW, 4 };
1971 WCHAR *str = reader_get_strptr(reader, name);
1973 switch (*str)
1975 case 'l':
1976 if (strval_eq(reader, name, &lt)) return '<';
1977 break;
1978 case 'g':
1979 if (strval_eq(reader, name, &gt)) return '>';
1980 break;
1981 case 'a':
1982 if (strval_eq(reader, name, &amp))
1983 return '&';
1984 else if (strval_eq(reader, name, &apos))
1985 return '\'';
1986 break;
1987 case 'q':
1988 if (strval_eq(reader, name, &quot)) return '\"';
1989 break;
1990 default:
1994 return 0;
1997 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1998 [67] Reference ::= EntityRef | CharRef
1999 [68] EntityRef ::= '&' Name ';' */
2000 static HRESULT reader_parse_reference(xmlreader *reader)
2002 encoded_buffer *buffer = &reader->input->buffer->utf16;
2003 WCHAR *start = reader_get_ptr(reader), *ptr;
2004 UINT cur = reader_get_cur(reader);
2005 WCHAR ch = 0;
2006 int len;
2008 /* skip '&' */
2009 reader_skipn(reader, 1);
2010 ptr = reader_get_ptr(reader);
2012 if (*ptr == '#')
2014 reader_skipn(reader, 1);
2015 ptr = reader_get_ptr(reader);
2017 /* hex char or decimal */
2018 if (*ptr == 'x')
2020 reader_skipn(reader, 1);
2021 ptr = reader_get_ptr(reader);
2023 while (*ptr != ';')
2025 if ((*ptr >= '0' && *ptr <= '9'))
2026 ch = ch*16 + *ptr - '0';
2027 else if ((*ptr >= 'a' && *ptr <= 'f'))
2028 ch = ch*16 + *ptr - 'a' + 10;
2029 else if ((*ptr >= 'A' && *ptr <= 'F'))
2030 ch = ch*16 + *ptr - 'A' + 10;
2031 else
2032 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2033 reader_skipn(reader, 1);
2034 ptr = reader_get_ptr(reader);
2037 else
2039 while (*ptr != ';')
2041 if ((*ptr >= '0' && *ptr <= '9'))
2043 ch = ch*10 + *ptr - '0';
2044 reader_skipn(reader, 1);
2045 ptr = reader_get_ptr(reader);
2047 else
2048 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2052 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2054 /* normalize */
2055 if (is_wchar_space(ch)) ch = ' ';
2057 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2058 memmove(start+1, ptr+1, len);
2059 buffer->cur = cur + 1;
2061 *start = ch;
2063 else
2065 strval name;
2066 HRESULT hr;
2068 hr = reader_parse_name(reader, &name);
2069 if (FAILED(hr)) return hr;
2071 ptr = reader_get_ptr(reader);
2072 if (*ptr != ';') return WC_E_SEMICOLON;
2074 /* predefined entities resolve to a single character */
2075 ch = get_predefined_entity(reader, &name);
2076 if (ch)
2078 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2079 memmove(start+1, ptr+1, len);
2080 buffer->cur = cur + 1;
2082 *start = ch;
2084 else
2086 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2087 return WC_E_UNDECLAREDENTITY;
2092 return S_OK;
2095 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2096 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2098 WCHAR *ptr, quote;
2099 UINT start;
2101 ptr = reader_get_ptr(reader);
2103 /* skip opening quote */
2104 quote = *ptr;
2105 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2106 reader_skipn(reader, 1);
2108 ptr = reader_get_ptr(reader);
2109 start = reader_get_cur(reader);
2110 while (*ptr)
2112 if (*ptr == '<') return WC_E_LESSTHAN;
2114 if (*ptr == quote)
2116 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2117 /* skip closing quote */
2118 reader_skipn(reader, 1);
2119 return S_OK;
2122 if (*ptr == '&')
2124 HRESULT hr = reader_parse_reference(reader);
2125 if (FAILED(hr)) return hr;
2127 else
2129 reader_normalize_space(reader, ptr);
2130 reader_skipn(reader, 1);
2132 ptr = reader_get_ptr(reader);
2135 return WC_E_QUOTE;
2138 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2139 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2140 [3 NS] DefaultAttName ::= 'xmlns'
2141 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2142 static HRESULT reader_parse_attribute(xmlreader *reader)
2144 strval prefix, local, qname, value;
2145 BOOL ns = FALSE, nsdef = FALSE;
2146 HRESULT hr;
2148 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2149 if (FAILED(hr)) return hr;
2151 if (strval_eq(reader, &prefix, &strval_xmlns))
2152 ns = TRUE;
2154 if (strval_eq(reader, &qname, &strval_xmlns))
2155 ns = nsdef = TRUE;
2157 hr = reader_parse_eq(reader);
2158 if (FAILED(hr)) return hr;
2160 hr = reader_parse_attvalue(reader, &value);
2161 if (FAILED(hr)) return hr;
2163 if (ns)
2164 reader_push_ns(reader, nsdef ? &strval_xmlns : &local, &value, nsdef);
2166 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2167 return reader_add_attr(reader, &prefix, &local, &value);
2170 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2171 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2172 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2174 HRESULT hr;
2176 hr = reader_parse_qname(reader, prefix, local, qname);
2177 if (FAILED(hr)) return hr;
2179 while (1)
2181 static const WCHAR endW[] = {'/','>',0};
2183 reader_skipspaces(reader);
2185 /* empty element */
2186 if ((*empty = !reader_cmp(reader, endW)))
2188 /* skip '/>' */
2189 reader_skipn(reader, 2);
2190 reader->is_empty_element = TRUE;
2191 reader->empty_element.prefix = *prefix;
2192 reader->empty_element.localname = *local;
2193 reader->empty_element.qname = *qname;
2194 reader_mark_ns_nodes(reader, &reader->empty_element);
2195 return S_OK;
2198 /* got a start tag */
2199 if (!reader_cmp(reader, gtW))
2201 /* skip '>' */
2202 reader_skipn(reader, 1);
2203 return reader_push_element(reader, prefix, local, qname);
2206 hr = reader_parse_attribute(reader);
2207 if (FAILED(hr)) return hr;
2210 return S_OK;
2213 /* [39] element ::= EmptyElemTag | STag content ETag */
2214 static HRESULT reader_parse_element(xmlreader *reader)
2216 HRESULT hr;
2218 switch (reader->resumestate)
2220 case XmlReadResumeState_Initial:
2221 /* check if we are really on element */
2222 if (reader_cmp(reader, ltW)) return S_FALSE;
2224 /* skip '<' */
2225 reader_skipn(reader, 1);
2227 reader_shrink(reader);
2228 reader->resumestate = XmlReadResumeState_STag;
2229 case XmlReadResumeState_STag:
2231 strval qname, prefix, local;
2232 int empty = 0;
2234 /* this handles empty elements too */
2235 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2236 if (FAILED(hr)) return hr;
2238 /* FIXME: need to check for defined namespace to reject invalid prefix,
2239 currently reject all prefixes */
2240 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
2242 /* if we got empty element and stack is empty go straight to Misc */
2243 if (empty && list_empty(&reader->elements))
2244 reader->instate = XmlReadInState_MiscEnd;
2245 else
2246 reader->instate = XmlReadInState_Content;
2248 reader->nodetype = XmlNodeType_Element;
2249 reader->resumestate = XmlReadResumeState_Initial;
2250 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2251 reader_set_strvalue(reader, StringValue_LocalName, &local);
2252 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2253 break;
2255 default:
2256 hr = E_FAIL;
2259 return hr;
2262 /* [13 NS] ETag ::= '</' QName S? '>' */
2263 static HRESULT reader_parse_endtag(xmlreader *reader)
2265 strval prefix, local, qname;
2266 struct element *elem;
2267 HRESULT hr;
2269 /* skip '</' */
2270 reader_skipn(reader, 2);
2272 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2273 if (FAILED(hr)) return hr;
2275 reader_skipspaces(reader);
2277 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2279 /* skip '>' */
2280 reader_skipn(reader, 1);
2282 /* Element stack should never be empty at this point, cause we shouldn't get to
2283 content parsing if it's empty. */
2284 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2285 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2287 reader->nodetype = XmlNodeType_EndElement;
2288 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2289 reader_set_strvalue(reader, StringValue_LocalName, &local);
2290 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2292 return S_OK;
2295 /* [18] CDSect ::= CDStart CData CDEnd
2296 [19] CDStart ::= '<![CDATA['
2297 [20] CData ::= (Char* - (Char* ']]>' Char*))
2298 [21] CDEnd ::= ']]>' */
2299 static HRESULT reader_parse_cdata(xmlreader *reader)
2301 WCHAR *ptr;
2302 UINT start;
2304 if (reader->resumestate == XmlReadResumeState_CDATA)
2306 start = reader->resume[XmlReadResume_Body];
2307 ptr = reader_get_ptr(reader);
2309 else
2311 /* skip markup '<![CDATA[' */
2312 reader_skipn(reader, 9);
2313 reader_shrink(reader);
2314 ptr = reader_get_ptr(reader);
2315 start = reader_get_cur(reader);
2316 reader->nodetype = XmlNodeType_CDATA;
2317 reader->resume[XmlReadResume_Body] = start;
2318 reader->resumestate = XmlReadResumeState_CDATA;
2319 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2320 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2321 reader_set_strvalue(reader, StringValue_Value, NULL);
2324 while (*ptr)
2326 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2328 strval value;
2330 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2332 /* skip ']]>' */
2333 reader_skipn(reader, 3);
2334 TRACE("%s\n", debug_strval(reader, &value));
2336 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2337 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2338 reader_set_strvalue(reader, StringValue_Value, &value);
2339 reader->resume[XmlReadResume_Body] = 0;
2340 reader->resumestate = XmlReadResumeState_Initial;
2341 return S_OK;
2343 else
2345 /* Value normalization is not fully implemented, rules are:
2347 - single '\r' -> '\n';
2348 - sequence '\r\n' -> '\n', in this case value length changes;
2350 if (*ptr == '\r') *ptr = '\n';
2351 reader_skipn(reader, 1);
2352 ptr++;
2356 return S_OK;
2359 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2360 static HRESULT reader_parse_chardata(xmlreader *reader)
2362 WCHAR *ptr;
2363 UINT start;
2365 if (reader->resumestate == XmlReadResumeState_CharData)
2367 start = reader->resume[XmlReadResume_Body];
2368 ptr = reader_get_ptr(reader);
2370 else
2372 reader_shrink(reader);
2373 ptr = reader_get_ptr(reader);
2374 start = reader_get_cur(reader);
2375 /* There's no text */
2376 if (!*ptr || *ptr == '<') return S_OK;
2377 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2378 reader->resume[XmlReadResume_Body] = start;
2379 reader->resumestate = XmlReadResumeState_CharData;
2380 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2381 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2382 reader_set_strvalue(reader, StringValue_Value, NULL);
2385 while (*ptr)
2387 /* CDATA closing sequence ']]>' is not allowed */
2388 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2389 return WC_E_CDSECTEND;
2391 /* Found next markup part */
2392 if (ptr[0] == '<')
2394 strval value;
2396 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2397 reader_set_strvalue(reader, StringValue_Value, &value);
2398 reader->resume[XmlReadResume_Body] = 0;
2399 reader->resumestate = XmlReadResumeState_Initial;
2400 return S_OK;
2403 reader_skipn(reader, 1);
2405 /* this covers a case when text has leading whitespace chars */
2406 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2407 ptr++;
2410 return S_OK;
2413 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2414 static HRESULT reader_parse_content(xmlreader *reader)
2416 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2417 static const WCHAR etagW[] = {'<','/',0};
2418 static const WCHAR ampW[] = {'&',0};
2420 if (reader->resumestate != XmlReadResumeState_Initial)
2422 switch (reader->resumestate)
2424 case XmlReadResumeState_CDATA:
2425 return reader_parse_cdata(reader);
2426 case XmlReadResumeState_Comment:
2427 return reader_parse_comment(reader);
2428 case XmlReadResumeState_PIBody:
2429 case XmlReadResumeState_PITarget:
2430 return reader_parse_pi(reader);
2431 case XmlReadResumeState_CharData:
2432 return reader_parse_chardata(reader);
2433 default:
2434 ERR("unknown resume state %d\n", reader->resumestate);
2438 reader_shrink(reader);
2440 /* handle end tag here, it indicates end of content as well */
2441 if (!reader_cmp(reader, etagW))
2442 return reader_parse_endtag(reader);
2444 if (!reader_cmp(reader, commentW))
2445 return reader_parse_comment(reader);
2447 if (!reader_cmp(reader, piW))
2448 return reader_parse_pi(reader);
2450 if (!reader_cmp(reader, cdstartW))
2451 return reader_parse_cdata(reader);
2453 if (!reader_cmp(reader, ampW))
2454 return reader_parse_reference(reader);
2456 if (!reader_cmp(reader, ltW))
2457 return reader_parse_element(reader);
2459 /* what's left must be CharData */
2460 return reader_parse_chardata(reader);
2463 static HRESULT reader_parse_nextnode(xmlreader *reader)
2465 XmlNodeType nodetype = reader_get_nodetype(reader);
2466 HRESULT hr;
2468 if (!is_reader_pending(reader))
2469 reader_clear_attrs(reader);
2471 /* When moving from EndElement or empty element, pop its own namespace defitions */
2472 if (nodetype == XmlNodeType_Element && reader->is_empty_element)
2473 reader_pop_ns_nodes(reader, &reader->empty_element);
2474 else if (nodetype == XmlNodeType_EndElement)
2475 reader_pop_element(reader);
2477 while (1)
2479 switch (reader->instate)
2481 /* if it's a first call for a new input we need to detect stream encoding */
2482 case XmlReadInState_Initial:
2484 xml_encoding enc;
2486 hr = readerinput_growraw(reader->input);
2487 if (FAILED(hr)) return hr;
2489 /* try to detect encoding by BOM or data and set input code page */
2490 hr = readerinput_detectencoding(reader->input, &enc);
2491 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2492 if (FAILED(hr)) return hr;
2494 /* always switch first time cause we have to put something in */
2495 readerinput_switchencoding(reader->input, enc);
2497 /* parse xml declaration */
2498 hr = reader_parse_xmldecl(reader);
2499 if (FAILED(hr)) return hr;
2501 readerinput_shrinkraw(reader->input, -1);
2502 reader->instate = XmlReadInState_Misc_DTD;
2503 if (hr == S_OK) return hr;
2505 break;
2506 case XmlReadInState_Misc_DTD:
2507 hr = reader_parse_misc(reader);
2508 if (FAILED(hr)) return hr;
2510 if (hr == S_FALSE)
2511 reader->instate = XmlReadInState_DTD;
2512 else
2513 return hr;
2514 break;
2515 case XmlReadInState_DTD:
2516 hr = reader_parse_dtd(reader);
2517 if (FAILED(hr)) return hr;
2519 if (hr == S_OK)
2521 reader->instate = XmlReadInState_DTD_Misc;
2522 return hr;
2524 else
2525 reader->instate = XmlReadInState_Element;
2526 break;
2527 case XmlReadInState_DTD_Misc:
2528 hr = reader_parse_misc(reader);
2529 if (FAILED(hr)) return hr;
2531 if (hr == S_FALSE)
2532 reader->instate = XmlReadInState_Element;
2533 else
2534 return hr;
2535 break;
2536 case XmlReadInState_Element:
2537 return reader_parse_element(reader);
2538 case XmlReadInState_Content:
2539 return reader_parse_content(reader);
2540 case XmlReadInState_MiscEnd:
2541 hr = reader_parse_misc(reader);
2542 if (FAILED(hr)) return hr;
2544 if (hr == S_FALSE)
2545 reader->instate = XmlReadInState_Eof;
2546 return hr;
2547 case XmlReadInState_Eof:
2548 return S_FALSE;
2549 default:
2550 FIXME("internal state %d not handled\n", reader->instate);
2551 return E_NOTIMPL;
2555 return E_NOTIMPL;
2558 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2560 xmlreader *This = impl_from_IXmlReader(iface);
2562 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2564 if (IsEqualGUID(riid, &IID_IUnknown) ||
2565 IsEqualGUID(riid, &IID_IXmlReader))
2567 *ppvObject = iface;
2569 else
2571 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2572 *ppvObject = NULL;
2573 return E_NOINTERFACE;
2576 IXmlReader_AddRef(iface);
2578 return S_OK;
2581 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2583 xmlreader *This = impl_from_IXmlReader(iface);
2584 ULONG ref = InterlockedIncrement(&This->ref);
2585 TRACE("(%p)->(%d)\n", This, ref);
2586 return ref;
2589 static void reader_clear_ns(xmlreader *reader)
2591 struct ns *ns, *ns2;
2593 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2594 reader_free_strvalued(reader, &ns->prefix);
2595 reader_free_strvalued(reader, &ns->uri);
2596 reader_free(reader, ns);
2599 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2600 reader_free_strvalued(reader, &ns->uri);
2601 reader_free(reader, ns);
2605 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2607 xmlreader *This = impl_from_IXmlReader(iface);
2608 LONG ref = InterlockedDecrement(&This->ref);
2610 TRACE("(%p)->(%d)\n", This, ref);
2612 if (ref == 0)
2614 IMalloc *imalloc = This->imalloc;
2615 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2616 if (This->resolver) IXmlResolver_Release(This->resolver);
2617 if (This->mlang) IUnknown_Release(This->mlang);
2618 reader_clear_attrs(This);
2619 reader_clear_ns(This);
2620 reader_clear_elements(This);
2621 reader_free_strvalues(This);
2622 reader_free(This, This);
2623 if (imalloc) IMalloc_Release(imalloc);
2626 return ref;
2629 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2631 xmlreader *This = impl_from_IXmlReader(iface);
2632 IXmlReaderInput *readerinput;
2633 HRESULT hr;
2635 TRACE("(%p)->(%p)\n", This, input);
2637 if (This->input)
2639 readerinput_release_stream(This->input);
2640 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2641 This->input = NULL;
2644 This->line = This->pos = 0;
2645 reader_clear_elements(This);
2646 This->depth = 0;
2647 This->resumestate = XmlReadResumeState_Initial;
2648 memset(This->resume, 0, sizeof(This->resume));
2650 /* just reset current input */
2651 if (!input)
2653 This->state = XmlReadState_Initial;
2654 return S_OK;
2657 /* now try IXmlReaderInput, ISequentialStream, IStream */
2658 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2659 if (hr == S_OK)
2661 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2662 This->input = impl_from_IXmlReaderInput(readerinput);
2663 else
2665 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2666 readerinput, readerinput->lpVtbl);
2667 IUnknown_Release(readerinput);
2668 return E_FAIL;
2673 if (hr != S_OK || !readerinput)
2675 /* create IXmlReaderInput basing on supplied interface */
2676 hr = CreateXmlReaderInputWithEncodingName(input,
2677 This->imalloc, NULL, FALSE, NULL, &readerinput);
2678 if (hr != S_OK) return hr;
2679 This->input = impl_from_IXmlReaderInput(readerinput);
2682 /* set stream for supplied IXmlReaderInput */
2683 hr = readerinput_query_for_stream(This->input);
2684 if (hr == S_OK)
2686 This->state = XmlReadState_Initial;
2687 This->instate = XmlReadInState_Initial;
2690 return hr;
2693 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2695 xmlreader *This = impl_from_IXmlReader(iface);
2697 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2699 if (!value) return E_INVALIDARG;
2701 switch (property)
2703 case XmlReaderProperty_MultiLanguage:
2704 *value = (LONG_PTR)This->mlang;
2705 if (This->mlang)
2706 IUnknown_AddRef(This->mlang);
2707 break;
2708 case XmlReaderProperty_XmlResolver:
2709 *value = (LONG_PTR)This->resolver;
2710 if (This->resolver)
2711 IXmlResolver_AddRef(This->resolver);
2712 break;
2713 case XmlReaderProperty_DtdProcessing:
2714 *value = This->dtdmode;
2715 break;
2716 case XmlReaderProperty_ReadState:
2717 *value = This->state;
2718 break;
2719 default:
2720 FIXME("Unimplemented property (%u)\n", property);
2721 return E_NOTIMPL;
2724 return S_OK;
2727 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2729 xmlreader *This = impl_from_IXmlReader(iface);
2731 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2733 switch (property)
2735 case XmlReaderProperty_MultiLanguage:
2736 if (This->mlang)
2737 IUnknown_Release(This->mlang);
2738 This->mlang = (IUnknown*)value;
2739 if (This->mlang)
2740 IUnknown_AddRef(This->mlang);
2741 if (This->mlang)
2742 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2743 break;
2744 case XmlReaderProperty_XmlResolver:
2745 if (This->resolver)
2746 IXmlResolver_Release(This->resolver);
2747 This->resolver = (IXmlResolver*)value;
2748 if (This->resolver)
2749 IXmlResolver_AddRef(This->resolver);
2750 break;
2751 case XmlReaderProperty_DtdProcessing:
2752 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2753 This->dtdmode = value;
2754 break;
2755 case XmlReaderProperty_MaxElementDepth:
2756 FIXME("Ignoring MaxElementDepth %ld\n", value);
2757 break;
2758 default:
2759 FIXME("Unimplemented property (%u)\n", property);
2760 return E_NOTIMPL;
2763 return S_OK;
2766 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2768 xmlreader *This = impl_from_IXmlReader(iface);
2769 XmlNodeType oldtype = This->nodetype;
2770 HRESULT hr;
2772 TRACE("(%p)->(%p)\n", This, nodetype);
2774 if (This->state == XmlReadState_Closed) return S_FALSE;
2776 hr = reader_parse_nextnode(This);
2777 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2778 This->state = XmlReadState_Interactive;
2779 if (hr == S_OK)
2781 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2782 *nodetype = This->nodetype;
2785 return hr;
2788 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2790 xmlreader *This = impl_from_IXmlReader(iface);
2791 TRACE("(%p)->(%p)\n", This, node_type);
2793 *node_type = reader_get_nodetype(This);
2794 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2797 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2799 if (!reader->attr_count)
2800 return S_FALSE;
2802 reader->attr = LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry);
2803 reader_set_strvalue(reader, StringValue_Prefix, &reader->attr->prefix);
2804 reader_set_strvalue(reader, StringValue_LocalName, &reader->attr->localname);
2805 reader_set_strvalue(reader, StringValue_Value, &reader->attr->value);
2807 return S_OK;
2810 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2812 xmlreader *This = impl_from_IXmlReader(iface);
2814 TRACE("(%p)\n", This);
2816 return reader_move_to_first_attribute(This);
2819 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2821 xmlreader *This = impl_from_IXmlReader(iface);
2822 const struct list *next;
2824 TRACE("(%p)\n", This);
2826 if (!This->attr_count) return S_FALSE;
2828 if (!This->attr)
2829 return reader_move_to_first_attribute(This);
2831 next = list_next(&This->attrs, &This->attr->entry);
2832 if (next)
2834 This->attr = LIST_ENTRY(next, struct attribute, entry);
2835 reader_set_strvalue(This, StringValue_Prefix, &This->attr->prefix);
2836 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2837 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2840 return next ? S_OK : S_FALSE;
2843 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2844 LPCWSTR local_name,
2845 LPCWSTR namespaceUri)
2847 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2848 return E_NOTIMPL;
2851 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2853 xmlreader *This = impl_from_IXmlReader(iface);
2855 TRACE("(%p)\n", This);
2857 if (!This->attr_count) return S_FALSE;
2858 This->attr = NULL;
2860 /* FIXME: support other node types with 'attributes' like DTD */
2861 if (This->is_empty_element) {
2862 reader_set_strvalue(This, StringValue_LocalName, &This->empty_element.localname);
2863 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
2865 else {
2866 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2867 if (element) {
2868 reader_set_strvalue(This, StringValue_LocalName, &element->localname);
2869 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
2873 return S_OK;
2876 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2878 xmlreader *This = impl_from_IXmlReader(iface);
2880 TRACE("(%p)->(%p %p)\n", This, name, len);
2881 *name = This->strvalues[StringValue_QualifiedName].str;
2882 if (len) *len = This->strvalues[StringValue_QualifiedName].len;
2883 return S_OK;
2886 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
2888 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
2889 struct ns *ns;
2891 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
2892 if (strval_eq(reader, prefix, &ns->prefix))
2893 return ns;
2896 return NULL;
2899 static struct ns *reader_lookup_nsdef(xmlreader *reader)
2901 if (list_empty(&reader->nsdef))
2902 return NULL;
2904 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
2907 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
2909 xmlreader *This = impl_from_IXmlReader(iface);
2910 const strval *prefix = &This->strvalues[StringValue_Prefix];
2911 XmlNodeType nodetype;
2912 struct ns *ns;
2913 UINT length;
2915 TRACE("(%p %p %p)\n", iface, uri, len);
2917 if (!len)
2918 len = &length;
2920 *uri = NULL;
2921 *len = 0;
2923 switch ((nodetype = reader_get_nodetype(This)))
2925 case XmlNodeType_Attribute:
2927 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2928 '2','0','0','0','/','x','m','l','n','s','/',0};
2929 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2930 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
2931 const strval *local = &This->strvalues[StringValue_LocalName];
2933 /* check for reserved prefixes first */
2934 if ((strval_eq(This, prefix, &strval_empty) && strval_eq(This, local, &strval_xmlns)) ||
2935 strval_eq(This, prefix, &strval_xmlns))
2937 *uri = xmlns_uriW;
2938 *len = sizeof(xmlns_uriW)/sizeof(xmlns_uriW[0]) - 1;
2940 else if (strval_eq(This, prefix, &strval_xml)) {
2941 *uri = xml_uriW;
2942 *len = sizeof(xml_uriW)/sizeof(xml_uriW[0]) - 1;
2945 if (!*uri) {
2946 ns = reader_lookup_ns(This, prefix);
2947 if (ns) {
2948 *uri = ns->uri.str;
2949 *len = ns->uri.len;
2951 else {
2952 *uri = emptyW;
2953 *len = 0;
2957 break;
2958 case XmlNodeType_Element:
2959 case XmlNodeType_EndElement:
2961 ns = reader_lookup_ns(This, prefix);
2963 /* pick top default ns if any */
2964 if (!ns)
2965 ns = reader_lookup_nsdef(This);
2967 if (ns) {
2968 *uri = ns->uri.str;
2969 *len = ns->uri.len;
2971 else {
2972 *uri = emptyW;
2973 *len = 0;
2976 break;
2977 default:
2978 FIXME("Unhandled node type %d\n", nodetype);
2979 return E_NOTIMPL;
2982 return S_OK;
2985 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2987 xmlreader *This = impl_from_IXmlReader(iface);
2989 TRACE("(%p)->(%p %p)\n", This, name, len);
2990 *name = This->strvalues[StringValue_LocalName].str;
2991 if (len) *len = This->strvalues[StringValue_LocalName].len;
2992 return S_OK;
2995 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2997 xmlreader *This = impl_from_IXmlReader(iface);
2999 TRACE("(%p)->(%p %p)\n", This, prefix, len);
3000 *prefix = This->strvalues[StringValue_Prefix].str;
3001 if (len) *len = This->strvalues[StringValue_Prefix].len;
3002 return S_OK;
3005 static BOOL is_namespace_definition(xmlreader *reader)
3007 const strval *local = &reader->strvalues[StringValue_LocalName];
3008 const strval *prefix = &reader->strvalues[StringValue_Prefix];
3010 if (reader_get_nodetype(reader) != XmlNodeType_Attribute)
3011 return FALSE;
3013 return ((strval_eq(reader, prefix, &strval_empty) && strval_eq(reader, local, &strval_xmlns)) ||
3014 strval_eq(reader, prefix, &strval_xmlns));
3017 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3019 xmlreader *reader = impl_from_IXmlReader(iface);
3020 strval *val = &reader->strvalues[StringValue_Value];
3022 TRACE("(%p)->(%p %p)\n", reader, value, len);
3024 *value = NULL;
3026 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
3028 XmlNodeType type;
3029 HRESULT hr;
3031 hr = IXmlReader_Read(iface, &type);
3032 if (FAILED(hr)) return hr;
3034 /* return if still pending, partially read values are not reported */
3035 if (is_reader_pending(reader)) return E_PENDING;
3038 if (!val->str)
3040 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3041 if (!ptr) return E_OUTOFMEMORY;
3042 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3043 ptr[val->len] = 0;
3044 val->str = ptr;
3047 /* For namespace definition attributes return values from namespace list */
3048 if (is_namespace_definition(reader)) {
3049 const strval *local = &reader->strvalues[StringValue_LocalName];
3050 struct ns *ns;
3052 ns = reader_lookup_ns(reader, local);
3053 if (!ns)
3054 ns = reader_lookup_nsdef(reader);
3056 val = &ns->uri;
3059 *value = val->str;
3060 if (len) *len = val->len;
3061 return S_OK;
3064 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3066 xmlreader *reader = impl_from_IXmlReader(iface);
3067 strval *val = &reader->strvalues[StringValue_Value];
3068 UINT len;
3070 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3072 /* Value is already allocated, chunked reads are not possible. */
3073 if (val->str) return S_FALSE;
3075 if (val->len)
3077 len = min(chunk_size, val->len);
3078 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
3079 val->start += len;
3080 val->len -= len;
3081 if (read) *read = len;
3084 return S_OK;
3087 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3088 LPCWSTR *baseUri,
3089 UINT *baseUri_length)
3091 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3092 return E_NOTIMPL;
3095 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3097 FIXME("(%p): stub\n", iface);
3098 return FALSE;
3101 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3103 xmlreader *This = impl_from_IXmlReader(iface);
3104 TRACE("(%p)\n", This);
3105 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3106 when current node is start tag of an element */
3107 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3110 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
3112 xmlreader *This = impl_from_IXmlReader(iface);
3114 TRACE("(%p %p)\n", This, lineNumber);
3116 if (!lineNumber) return E_INVALIDARG;
3118 *lineNumber = This->line;
3120 return S_OK;
3123 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
3125 xmlreader *This = impl_from_IXmlReader(iface);
3127 TRACE("(%p %p)\n", This, linePosition);
3129 if (!linePosition) return E_INVALIDARG;
3131 *linePosition = This->pos;
3133 return S_OK;
3136 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3138 xmlreader *This = impl_from_IXmlReader(iface);
3140 TRACE("(%p)->(%p)\n", This, count);
3142 if (!count) return E_INVALIDARG;
3144 *count = This->attr_count;
3145 return S_OK;
3148 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3150 xmlreader *This = impl_from_IXmlReader(iface);
3151 TRACE("(%p)->(%p)\n", This, depth);
3152 *depth = This->depth;
3153 return S_OK;
3156 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3158 FIXME("(%p): stub\n", iface);
3159 return FALSE;
3162 static const struct IXmlReaderVtbl xmlreader_vtbl =
3164 xmlreader_QueryInterface,
3165 xmlreader_AddRef,
3166 xmlreader_Release,
3167 xmlreader_SetInput,
3168 xmlreader_GetProperty,
3169 xmlreader_SetProperty,
3170 xmlreader_Read,
3171 xmlreader_GetNodeType,
3172 xmlreader_MoveToFirstAttribute,
3173 xmlreader_MoveToNextAttribute,
3174 xmlreader_MoveToAttributeByName,
3175 xmlreader_MoveToElement,
3176 xmlreader_GetQualifiedName,
3177 xmlreader_GetNamespaceUri,
3178 xmlreader_GetLocalName,
3179 xmlreader_GetPrefix,
3180 xmlreader_GetValue,
3181 xmlreader_ReadValueChunk,
3182 xmlreader_GetBaseUri,
3183 xmlreader_IsDefault,
3184 xmlreader_IsEmptyElement,
3185 xmlreader_GetLineNumber,
3186 xmlreader_GetLinePosition,
3187 xmlreader_GetAttributeCount,
3188 xmlreader_GetDepth,
3189 xmlreader_IsEOF
3192 /** IXmlReaderInput **/
3193 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3195 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3197 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3199 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3200 IsEqualGUID(riid, &IID_IUnknown))
3202 *ppvObject = iface;
3204 else
3206 WARN("interface %s not implemented\n", debugstr_guid(riid));
3207 *ppvObject = NULL;
3208 return E_NOINTERFACE;
3211 IUnknown_AddRef(iface);
3213 return S_OK;
3216 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3218 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3219 ULONG ref = InterlockedIncrement(&This->ref);
3220 TRACE("(%p)->(%d)\n", This, ref);
3221 return ref;
3224 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3226 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3227 LONG ref = InterlockedDecrement(&This->ref);
3229 TRACE("(%p)->(%d)\n", This, ref);
3231 if (ref == 0)
3233 IMalloc *imalloc = This->imalloc;
3234 if (This->input) IUnknown_Release(This->input);
3235 if (This->stream) ISequentialStream_Release(This->stream);
3236 if (This->buffer) free_input_buffer(This->buffer);
3237 readerinput_free(This, This->baseuri);
3238 readerinput_free(This, This);
3239 if (imalloc) IMalloc_Release(imalloc);
3242 return ref;
3245 static const struct IUnknownVtbl xmlreaderinputvtbl =
3247 xmlreaderinput_QueryInterface,
3248 xmlreaderinput_AddRef,
3249 xmlreaderinput_Release
3252 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3254 xmlreader *reader;
3255 int i;
3257 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3259 if (!IsEqualGUID(riid, &IID_IXmlReader))
3261 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
3262 return E_FAIL;
3265 if (imalloc)
3266 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3267 else
3268 reader = heap_alloc(sizeof(*reader));
3269 if(!reader) return E_OUTOFMEMORY;
3271 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3272 reader->ref = 1;
3273 reader->input = NULL;
3274 reader->state = XmlReadState_Closed;
3275 reader->instate = XmlReadInState_Initial;
3276 reader->resumestate = XmlReadResumeState_Initial;
3277 reader->dtdmode = DtdProcessing_Prohibit;
3278 reader->resolver = NULL;
3279 reader->mlang = NULL;
3280 reader->line = reader->pos = 0;
3281 reader->imalloc = imalloc;
3282 if (imalloc) IMalloc_AddRef(imalloc);
3283 reader->nodetype = XmlNodeType_None;
3284 list_init(&reader->attrs);
3285 reader->attr_count = 0;
3286 reader->attr = NULL;
3287 list_init(&reader->nsdef);
3288 list_init(&reader->ns);
3289 list_init(&reader->elements);
3290 reader->depth = 0;
3291 reader->max_depth = 256;
3292 reader->is_empty_element = FALSE;
3293 memset(reader->resume, 0, sizeof(reader->resume));
3295 for (i = 0; i < StringValue_Last; i++)
3296 reader->strvalues[i] = strval_empty;
3298 *obj = &reader->IXmlReader_iface;
3300 TRACE("returning iface %p\n", *obj);
3302 return S_OK;
3305 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3306 IMalloc *imalloc,
3307 LPCWSTR encoding,
3308 BOOL hint,
3309 LPCWSTR base_uri,
3310 IXmlReaderInput **ppInput)
3312 xmlreaderinput *readerinput;
3313 HRESULT hr;
3315 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3316 hint, wine_dbgstr_w(base_uri), ppInput);
3318 if (!stream || !ppInput) return E_INVALIDARG;
3320 if (imalloc)
3321 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3322 else
3323 readerinput = heap_alloc(sizeof(*readerinput));
3324 if(!readerinput) return E_OUTOFMEMORY;
3326 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3327 readerinput->ref = 1;
3328 readerinput->imalloc = imalloc;
3329 readerinput->stream = NULL;
3330 if (imalloc) IMalloc_AddRef(imalloc);
3331 readerinput->encoding = parse_encoding_name(encoding, -1);
3332 readerinput->hint = hint;
3333 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3334 readerinput->pending = 0;
3336 hr = alloc_input_buffer(readerinput);
3337 if (hr != S_OK)
3339 readerinput_free(readerinput, readerinput->baseuri);
3340 readerinput_free(readerinput, readerinput);
3341 if (imalloc) IMalloc_Release(imalloc);
3342 return hr;
3344 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3346 *ppInput = &readerinput->IXmlReaderInput_iface;
3348 TRACE("returning iface %p\n", *ppInput);
3350 return S_OK;