xmllite: Fix setting local name when parsing QName.
[wine.git] / dlls / xmllite / reader.c
blobef535e6ee69784ecda698f5387d88e104898cf84
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW[] = {'\"',0};
90 static const WCHAR quoteW[] = {'\'',0};
91 static const WCHAR ltW[] = {'<',0};
92 static const WCHAR gtW[] = {'>',0};
93 static const WCHAR commentW[] = {'<','!','-','-',0};
94 static const WCHAR piW[] = {'<','?',0};
96 static const char *debugstr_nodetype(XmlNodeType nodetype)
98 static const char * const type_names[] =
100 "None",
101 "Element",
102 "Attribute",
103 "Text",
104 "CDATA",
107 "ProcessingInstruction",
108 "Comment",
110 "DocumentType",
113 "Whitespace",
115 "EndElement",
117 "XmlDeclaration"
120 if (nodetype > _XmlNodeType_Last)
121 return wine_dbg_sprintf("unknown type=%d", nodetype);
123 return type_names[nodetype];
126 static const char *debugstr_reader_prop(XmlReaderProperty prop)
128 static const char * const prop_names[] =
130 "MultiLanguage",
131 "ConformanceLevel",
132 "RandomAccess",
133 "XmlResolver",
134 "DtdProcessing",
135 "ReadState",
136 "MaxElementDepth",
137 "MaxEntityExpansion"
140 if (prop > _XmlReaderProperty_Last)
141 return wine_dbg_sprintf("unknown property=%d", prop);
143 return prop_names[prop];
146 struct xml_encoding_data
148 const WCHAR *name;
149 xml_encoding enc;
150 UINT cp;
153 static const struct xml_encoding_data xml_encoding_map[] = {
154 { utf16W, XmlEncoding_UTF16, ~0 },
155 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
158 const WCHAR *get_encoding_name(xml_encoding encoding)
160 return xml_encoding_map[encoding].name;
163 xml_encoding get_encoding_from_codepage(UINT codepage)
165 int i;
166 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
168 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
170 return XmlEncoding_Unknown;
173 typedef struct
175 char *data;
176 UINT cur;
177 unsigned int allocated;
178 unsigned int written;
179 } encoded_buffer;
181 typedef struct input_buffer input_buffer;
183 typedef struct
185 IXmlReaderInput IXmlReaderInput_iface;
186 LONG ref;
187 /* reference passed on IXmlReaderInput creation, is kept when input is created */
188 IUnknown *input;
189 IMalloc *imalloc;
190 xml_encoding encoding;
191 BOOL hint;
192 WCHAR *baseuri;
193 /* stream reference set after SetInput() call from reader,
194 stored as sequential stream, cause currently
195 optimizations possible with IStream aren't implemented */
196 ISequentialStream *stream;
197 input_buffer *buffer;
198 unsigned int pending : 1;
199 } xmlreaderinput;
201 static const struct IUnknownVtbl xmlreaderinputvtbl;
203 /* Structure to hold parsed string of specific length.
205 Reader stores node value as 'start' pointer, on request
206 a null-terminated version of it is allocated.
208 To init a strval variable use reader_init_strval(),
209 to set strval as a reader value use reader_set_strval().
211 typedef struct
213 WCHAR *str; /* allocated null-terminated string */
214 UINT len; /* length in WCHARs, altered after ReadValueChunk */
215 UINT start; /* input position where value starts */
216 } strval;
218 static WCHAR emptyW[] = {0};
219 static WCHAR xmlW[] = {'x','m','l',0};
220 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
221 static const strval strval_empty = { emptyW };
222 static const strval strval_xml = { xmlW, 3 };
223 static const strval strval_xmlns = { xmlnsW, 5 };
225 struct attribute
227 struct list entry;
228 strval prefix;
229 strval localname;
230 strval value;
233 struct element
235 struct list entry;
236 strval prefix;
237 strval localname;
238 strval qname;
241 struct ns
243 struct list entry;
244 strval prefix;
245 strval uri;
246 struct element *element;
249 typedef struct
251 IXmlReader IXmlReader_iface;
252 LONG ref;
253 xmlreaderinput *input;
254 IMalloc *imalloc;
255 XmlReadState state;
256 XmlReaderInternalState instate;
257 XmlReaderResumeState resumestate;
258 XmlNodeType nodetype;
259 DtdProcessing dtdmode;
260 IXmlResolver *resolver;
261 IUnknown *mlang;
262 UINT line, pos; /* reader position in XML stream */
263 struct list attrs; /* attributes list for current node */
264 struct attribute *attr; /* current attribute */
265 UINT attr_count;
266 struct list nsdef;
267 struct list ns;
268 struct list elements;
269 strval strvalues[StringValue_Last];
270 UINT depth;
271 UINT max_depth;
272 BOOL is_empty_element;
273 struct element empty_element;
274 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
275 } xmlreader;
277 struct input_buffer
279 encoded_buffer utf16;
280 encoded_buffer encoded;
281 UINT code_page;
282 xmlreaderinput *input;
285 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
287 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
290 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
292 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
295 /* reader memory allocation functions */
296 static inline void *reader_alloc(xmlreader *reader, size_t len)
298 return m_alloc(reader->imalloc, len);
301 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
303 void *ret = reader_alloc(reader, len);
304 if (ret)
305 memset(ret, 0, len);
306 return ret;
309 static inline void reader_free(xmlreader *reader, void *mem)
311 m_free(reader->imalloc, mem);
314 /* Just return pointer from offset, no attempt to read more. */
315 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
317 encoded_buffer *buffer = &reader->input->buffer->utf16;
318 return (WCHAR*)buffer->data + offset;
321 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
323 return v->str ? v->str : reader_get_ptr2(reader, v->start);
326 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
328 *dest = *src;
330 if (src->str != strval_empty.str)
332 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
333 if (!dest->str) return E_OUTOFMEMORY;
334 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
335 dest->str[dest->len] = 0;
336 dest->start = 0;
339 return S_OK;
342 /* reader input memory allocation functions */
343 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
345 return m_alloc(input->imalloc, len);
348 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
350 return m_realloc(input->imalloc, mem, len);
353 static inline void readerinput_free(xmlreaderinput *input, void *mem)
355 m_free(input->imalloc, mem);
358 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
360 LPWSTR ret = NULL;
362 if(str) {
363 DWORD size;
365 size = (strlenW(str)+1)*sizeof(WCHAR);
366 ret = readerinput_alloc(input, size);
367 if (ret) memcpy(ret, str, size);
370 return ret;
373 static void reader_clear_attrs(xmlreader *reader)
375 struct attribute *attr, *attr2;
376 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
378 reader_free(reader, attr);
380 list_init(&reader->attrs);
381 reader->attr_count = 0;
382 reader->attr = NULL;
385 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
386 while we are on a node with attributes */
387 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *value)
389 struct attribute *attr;
391 attr = reader_alloc(reader, sizeof(*attr));
392 if (!attr) return E_OUTOFMEMORY;
394 if (prefix)
395 attr->prefix = *prefix;
396 else
397 memset(&attr->prefix, 0, sizeof(attr->prefix));
398 attr->localname = *localname;
399 attr->value = *value;
400 list_add_tail(&reader->attrs, &attr->entry);
401 reader->attr_count++;
403 return S_OK;
406 /* This one frees stored string value if needed */
407 static void reader_free_strvalued(xmlreader *reader, strval *v)
409 if (v->str != strval_empty.str)
411 reader_free(reader, v->str);
412 *v = strval_empty;
416 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
418 v->start = start;
419 v->len = len;
420 v->str = NULL;
423 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
425 return debugstr_wn(reader_get_strptr(reader, v), v->len);
428 /* used to initialize from constant string */
429 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
431 v->start = 0;
432 v->len = len;
433 v->str = str;
436 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
438 reader_free_strvalued(reader, &reader->strvalues[type]);
441 static void reader_free_strvalues(xmlreader *reader)
443 int type;
444 for (type = 0; type < StringValue_Last; type++)
445 reader_free_strvalue(reader, type);
448 /* This helper should only be used to test if strings are the same,
449 it doesn't try to sort. */
450 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
452 if (str1->len != str2->len) return 0;
453 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
456 static void reader_clear_elements(xmlreader *reader)
458 struct element *elem, *elem2;
459 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
461 reader_free_strvalued(reader, &elem->prefix);
462 reader_free_strvalued(reader, &elem->localname);
463 reader_free_strvalued(reader, &elem->qname);
464 reader_free(reader, elem);
466 list_init(&reader->elements);
467 reader->is_empty_element = FALSE;
470 static HRESULT reader_inc_depth(xmlreader *reader)
472 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
473 return S_OK;
476 static void reader_dec_depth(xmlreader *reader)
478 if (reader->depth > 1) reader->depth--;
481 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
483 struct ns *ns;
484 HRESULT hr;
486 ns = reader_alloc(reader, sizeof(*ns));
487 if (!ns) return E_OUTOFMEMORY;
489 if (def)
490 memset(&ns->prefix, 0, sizeof(ns->prefix));
491 else {
492 hr = reader_strvaldup(reader, prefix, &ns->prefix);
493 if (FAILED(hr)) {
494 reader_free(reader, ns);
495 return hr;
499 hr = reader_strvaldup(reader, uri, &ns->uri);
500 if (FAILED(hr)) {
501 reader_free_strvalued(reader, &ns->prefix);
502 reader_free(reader, ns);
503 return hr;
506 ns->element = NULL;
507 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
508 return hr;
511 static void reader_free_element(xmlreader *reader, struct element *element)
513 reader_free_strvalued(reader, &element->prefix);
514 reader_free_strvalued(reader, &element->localname);
515 reader_free_strvalued(reader, &element->qname);
516 reader_free(reader, element);
519 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
521 struct ns *ns;
523 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
524 if (ns->element)
525 break;
526 ns->element = element;
529 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
530 if (ns->element)
531 break;
532 ns->element = element;
536 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
537 strval *qname)
539 struct element *element;
540 HRESULT hr;
542 if (!list_empty(&reader->elements))
544 hr = reader_inc_depth(reader);
545 if (FAILED(hr))
546 return hr;
549 element = reader_alloc_zero(reader, sizeof(*element));
550 if (!element)
551 goto failed;
553 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) != S_OK ||
554 (hr = reader_strvaldup(reader, localname, &element->localname)) != S_OK ||
555 (hr = reader_strvaldup(reader, qname, &element->qname)) != S_OK)
557 reader_free_element(reader, element);
558 goto failed;
561 list_add_head(&reader->elements, &element->entry);
562 reader_mark_ns_nodes(reader, element);
563 reader->is_empty_element = FALSE;
565 failed:
566 reader_dec_depth(reader);
567 return hr;
570 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
572 struct ns *ns, *ns2;
574 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
575 if (ns->element != element)
576 break;
578 list_remove(&ns->entry);
579 reader_free_strvalued(reader, &ns->prefix);
580 reader_free_strvalued(reader, &ns->uri);
581 reader_free(reader, ns);
584 if (!list_empty(&reader->nsdef)) {
585 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
586 if (ns->element == element) {
587 list_remove(&ns->entry);
588 reader_free_strvalued(reader, &ns->prefix);
589 reader_free_strvalued(reader, &ns->uri);
590 reader_free(reader, ns);
595 static void reader_pop_element(xmlreader *reader)
597 struct element *element;
599 if (list_empty(&reader->elements))
600 return;
602 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
603 list_remove(&element->entry);
605 reader_pop_ns_nodes(reader, element);
606 reader_free_element(reader, element);
607 reader_dec_depth(reader);
609 /* It was a root element, the rest is expected as Misc */
610 if (list_empty(&reader->elements))
611 reader->instate = XmlReadInState_MiscEnd;
614 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
615 means node value is to be determined. */
616 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
618 strval *v = &reader->strvalues[type];
620 reader_free_strvalue(reader, type);
621 if (!value)
623 v->str = NULL;
624 v->start = 0;
625 v->len = 0;
626 return;
629 if (value->str == strval_empty.str)
630 *v = *value;
631 else
633 if (type == StringValue_Value)
635 /* defer allocation for value string */
636 v->str = NULL;
637 v->start = value->start;
638 v->len = value->len;
640 else
642 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
643 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
644 v->str[value->len] = 0;
645 v->len = value->len;
650 static inline int is_reader_pending(xmlreader *reader)
652 return reader->input->pending;
655 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
657 const int initial_len = 0x2000;
658 buffer->data = readerinput_alloc(input, initial_len);
659 if (!buffer->data) return E_OUTOFMEMORY;
661 memset(buffer->data, 0, 4);
662 buffer->cur = 0;
663 buffer->allocated = initial_len;
664 buffer->written = 0;
666 return S_OK;
669 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
671 readerinput_free(input, buffer->data);
674 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
676 if (encoding == XmlEncoding_Unknown)
678 FIXME("unsupported encoding %d\n", encoding);
679 return E_NOTIMPL;
682 *cp = xml_encoding_map[encoding].cp;
684 return S_OK;
687 xml_encoding parse_encoding_name(const WCHAR *name, int len)
689 int min, max, n, c;
691 if (!name) return XmlEncoding_Unknown;
693 min = 0;
694 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
696 while (min <= max)
698 n = (min+max)/2;
700 if (len != -1)
701 c = strncmpiW(xml_encoding_map[n].name, name, len);
702 else
703 c = strcmpiW(xml_encoding_map[n].name, name);
704 if (!c)
705 return xml_encoding_map[n].enc;
707 if (c > 0)
708 max = n-1;
709 else
710 min = n+1;
713 return XmlEncoding_Unknown;
716 static HRESULT alloc_input_buffer(xmlreaderinput *input)
718 input_buffer *buffer;
719 HRESULT hr;
721 input->buffer = NULL;
723 buffer = readerinput_alloc(input, sizeof(*buffer));
724 if (!buffer) return E_OUTOFMEMORY;
726 buffer->input = input;
727 buffer->code_page = ~0; /* code page is unknown at this point */
728 hr = init_encoded_buffer(input, &buffer->utf16);
729 if (hr != S_OK) {
730 readerinput_free(input, buffer);
731 return hr;
734 hr = init_encoded_buffer(input, &buffer->encoded);
735 if (hr != S_OK) {
736 free_encoded_buffer(input, &buffer->utf16);
737 readerinput_free(input, buffer);
738 return hr;
741 input->buffer = buffer;
742 return S_OK;
745 static void free_input_buffer(input_buffer *buffer)
747 free_encoded_buffer(buffer->input, &buffer->encoded);
748 free_encoded_buffer(buffer->input, &buffer->utf16);
749 readerinput_free(buffer->input, buffer);
752 static void readerinput_release_stream(xmlreaderinput *readerinput)
754 if (readerinput->stream) {
755 ISequentialStream_Release(readerinput->stream);
756 readerinput->stream = NULL;
760 /* Queries already stored interface for IStream/ISequentialStream.
761 Interface supplied on creation will be overwritten */
762 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
764 HRESULT hr;
766 readerinput_release_stream(readerinput);
767 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
768 if (hr != S_OK)
769 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
771 return hr;
774 /* reads a chunk to raw buffer */
775 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
777 encoded_buffer *buffer = &readerinput->buffer->encoded;
778 /* to make sure aligned length won't exceed allocated length */
779 ULONG len = buffer->allocated - buffer->written - 4;
780 ULONG read;
781 HRESULT hr;
783 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
784 variable width encodings like UTF-8 */
785 len = (len + 3) & ~3;
786 /* try to use allocated space or grow */
787 if (buffer->allocated - buffer->written < len)
789 buffer->allocated *= 2;
790 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
791 len = buffer->allocated - buffer->written;
794 read = 0;
795 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
796 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
797 readerinput->pending = hr == E_PENDING;
798 if (FAILED(hr)) return hr;
799 buffer->written += read;
801 return hr;
804 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
805 static void readerinput_grow(xmlreaderinput *readerinput, int length)
807 encoded_buffer *buffer = &readerinput->buffer->utf16;
809 length *= sizeof(WCHAR);
810 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
811 if (buffer->allocated < buffer->written + length + 4)
813 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
814 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
815 buffer->allocated = grown_size;
819 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
821 static const char startA[] = {'<','?'};
822 static const char commentA[] = {'<','!'};
823 encoded_buffer *buffer = &readerinput->buffer->encoded;
824 unsigned char *ptr = (unsigned char*)buffer->data;
826 return !memcmp(buffer->data, startA, sizeof(startA)) ||
827 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
828 /* test start byte */
829 (ptr[0] == '<' &&
831 (ptr[1] && (ptr[1] <= 0x7f)) ||
832 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
833 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
834 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
838 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
840 encoded_buffer *buffer = &readerinput->buffer->encoded;
841 static const WCHAR startW[] = {'<','?'};
842 static const WCHAR commentW[] = {'<','!'};
843 static const char utf8bom[] = {0xef,0xbb,0xbf};
844 static const char utf16lebom[] = {0xff,0xfe};
846 *enc = XmlEncoding_Unknown;
848 if (buffer->written <= 3)
850 HRESULT hr = readerinput_growraw(readerinput);
851 if (FAILED(hr)) return hr;
852 if (buffer->written <= 3) return MX_E_INPUTEND;
855 /* try start symbols if we have enough data to do that, input buffer should contain
856 first chunk already */
857 if (readerinput_is_utf8(readerinput))
858 *enc = XmlEncoding_UTF8;
859 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
860 !memcmp(buffer->data, commentW, sizeof(commentW)))
861 *enc = XmlEncoding_UTF16;
862 /* try with BOM now */
863 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
865 buffer->cur += sizeof(utf8bom);
866 *enc = XmlEncoding_UTF8;
868 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
870 buffer->cur += sizeof(utf16lebom);
871 *enc = XmlEncoding_UTF16;
874 return S_OK;
877 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
879 encoded_buffer *buffer = &readerinput->buffer->encoded;
880 int len = buffer->written;
882 /* complete single byte char */
883 if (!(buffer->data[len-1] & 0x80)) return len;
885 /* find start byte of multibyte char */
886 while (--len && !(buffer->data[len] & 0xc0))
889 return len;
892 /* Returns byte length of complete char sequence for buffer code page,
893 it's relative to current buffer position which is currently used for BOM handling
894 only. */
895 static int readerinput_get_convlen(xmlreaderinput *readerinput)
897 encoded_buffer *buffer = &readerinput->buffer->encoded;
898 int len;
900 if (readerinput->buffer->code_page == CP_UTF8)
901 len = readerinput_get_utf8_convlen(readerinput);
902 else
903 len = buffer->written;
905 TRACE("%d\n", len - buffer->cur);
906 return len - buffer->cur;
909 /* It's possible that raw buffer has some leftovers from last conversion - some char
910 sequence that doesn't represent a full code point. Length argument should be calculated with
911 readerinput_get_convlen(), if it's -1 it will be calculated here. */
912 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
914 encoded_buffer *buffer = &readerinput->buffer->encoded;
916 if (len == -1)
917 len = readerinput_get_convlen(readerinput);
919 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
920 /* everything below cur is lost too */
921 buffer->written -= len + buffer->cur;
922 /* after this point we don't need cur offset really,
923 it's used only to mark where actual data begins when first chunk is read */
924 buffer->cur = 0;
927 /* note that raw buffer content is kept */
928 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
930 encoded_buffer *src = &readerinput->buffer->encoded;
931 encoded_buffer *dest = &readerinput->buffer->utf16;
932 int len, dest_len;
933 HRESULT hr;
934 WCHAR *ptr;
935 UINT cp;
937 hr = get_code_page(enc, &cp);
938 if (FAILED(hr)) return;
940 readerinput->buffer->code_page = cp;
941 len = readerinput_get_convlen(readerinput);
943 TRACE("switching to cp %d\n", cp);
945 /* just copy in this case */
946 if (enc == XmlEncoding_UTF16)
948 readerinput_grow(readerinput, len);
949 memcpy(dest->data, src->data + src->cur, len);
950 dest->written += len*sizeof(WCHAR);
951 return;
954 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
955 readerinput_grow(readerinput, dest_len);
956 ptr = (WCHAR*)dest->data;
957 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
958 ptr[dest_len] = 0;
959 dest->written += dest_len*sizeof(WCHAR);
962 /* shrinks parsed data a buffer begins with */
963 static void reader_shrink(xmlreader *reader)
965 encoded_buffer *buffer = &reader->input->buffer->utf16;
967 /* avoid to move too often using threshold shrink length */
968 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
970 buffer->written -= buffer->cur*sizeof(WCHAR);
971 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
972 buffer->cur = 0;
973 *(WCHAR*)&buffer->data[buffer->written] = 0;
977 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
978 It won't attempt to shrink but will grow destination buffer if needed */
979 static HRESULT reader_more(xmlreader *reader)
981 xmlreaderinput *readerinput = reader->input;
982 encoded_buffer *src = &readerinput->buffer->encoded;
983 encoded_buffer *dest = &readerinput->buffer->utf16;
984 UINT cp = readerinput->buffer->code_page;
985 int len, dest_len;
986 HRESULT hr;
987 WCHAR *ptr;
989 /* get some raw data from stream first */
990 hr = readerinput_growraw(readerinput);
991 len = readerinput_get_convlen(readerinput);
993 /* just copy for UTF-16 case */
994 if (cp == ~0)
996 readerinput_grow(readerinput, len);
997 memcpy(dest->data + dest->written, src->data + src->cur, len);
998 dest->written += len*sizeof(WCHAR);
999 return hr;
1002 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1003 readerinput_grow(readerinput, dest_len);
1004 ptr = (WCHAR*)(dest->data + dest->written);
1005 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1006 ptr[dest_len] = 0;
1007 dest->written += dest_len*sizeof(WCHAR);
1008 /* get rid of processed data */
1009 readerinput_shrinkraw(readerinput, len);
1011 return hr;
1014 static inline UINT reader_get_cur(xmlreader *reader)
1016 return reader->input->buffer->utf16.cur;
1019 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1021 encoded_buffer *buffer = &reader->input->buffer->utf16;
1022 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1023 if (!*ptr) reader_more(reader);
1024 return (WCHAR*)buffer->data + buffer->cur;
1027 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1029 int i=0;
1030 const WCHAR *ptr = reader_get_ptr(reader);
1031 while (str[i])
1033 if (!ptr[i])
1035 reader_more(reader);
1036 ptr = reader_get_ptr(reader);
1038 if (str[i] != ptr[i])
1039 return ptr[i] - str[i];
1040 i++;
1042 return 0;
1045 /* moves cursor n WCHARs forward */
1046 static void reader_skipn(xmlreader *reader, int n)
1048 encoded_buffer *buffer = &reader->input->buffer->utf16;
1049 const WCHAR *ptr = reader_get_ptr(reader);
1051 while (*ptr++ && n--)
1053 buffer->cur++;
1054 reader->pos++;
1058 static inline BOOL is_wchar_space(WCHAR ch)
1060 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1063 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1064 static int reader_skipspaces(xmlreader *reader)
1066 encoded_buffer *buffer = &reader->input->buffer->utf16;
1067 const WCHAR *ptr = reader_get_ptr(reader);
1068 UINT start = reader_get_cur(reader);
1070 while (is_wchar_space(*ptr))
1072 if (*ptr == '\r')
1073 reader->pos = 0;
1074 else if (*ptr == '\n')
1076 reader->line++;
1077 reader->pos = 0;
1079 else
1080 reader->pos++;
1082 buffer->cur++;
1083 ptr = reader_get_ptr(reader);
1086 return reader_get_cur(reader) - start;
1089 /* [26] VersionNum ::= '1.' [0-9]+ */
1090 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1092 static const WCHAR onedotW[] = {'1','.',0};
1093 WCHAR *ptr, *ptr2;
1094 UINT start;
1096 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1098 start = reader_get_cur(reader);
1099 /* skip "1." */
1100 reader_skipn(reader, 2);
1102 ptr2 = ptr = reader_get_ptr(reader);
1103 while (*ptr >= '0' && *ptr <= '9')
1105 reader_skipn(reader, 1);
1106 ptr = reader_get_ptr(reader);
1109 if (ptr2 == ptr) return WC_E_DIGIT;
1110 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1111 TRACE("version=%s\n", debug_strval(reader, val));
1112 return S_OK;
1115 /* [25] Eq ::= S? '=' S? */
1116 static HRESULT reader_parse_eq(xmlreader *reader)
1118 static const WCHAR eqW[] = {'=',0};
1119 reader_skipspaces(reader);
1120 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1121 /* skip '=' */
1122 reader_skipn(reader, 1);
1123 reader_skipspaces(reader);
1124 return S_OK;
1127 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1128 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1130 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1131 strval val, name;
1132 HRESULT hr;
1134 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1136 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1137 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1138 /* skip 'version' */
1139 reader_skipn(reader, 7);
1141 hr = reader_parse_eq(reader);
1142 if (FAILED(hr)) return hr;
1144 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1145 return WC_E_QUOTE;
1146 /* skip "'"|'"' */
1147 reader_skipn(reader, 1);
1149 hr = reader_parse_versionnum(reader, &val);
1150 if (FAILED(hr)) return hr;
1152 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1153 return WC_E_QUOTE;
1155 /* skip "'"|'"' */
1156 reader_skipn(reader, 1);
1158 return reader_add_attr(reader, NULL, &name, &val);
1161 /* ([A-Za-z0-9._] | '-') */
1162 static inline BOOL is_wchar_encname(WCHAR ch)
1164 return ((ch >= 'A' && ch <= 'Z') ||
1165 (ch >= 'a' && ch <= 'z') ||
1166 (ch >= '0' && ch <= '9') ||
1167 (ch == '.') || (ch == '_') ||
1168 (ch == '-'));
1171 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1172 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1174 WCHAR *start = reader_get_ptr(reader), *ptr;
1175 xml_encoding enc;
1176 int len;
1178 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1179 return WC_E_ENCNAME;
1181 val->start = reader_get_cur(reader);
1183 ptr = start;
1184 while (is_wchar_encname(*++ptr))
1187 len = ptr - start;
1188 enc = parse_encoding_name(start, len);
1189 TRACE("encoding name %s\n", debugstr_wn(start, len));
1190 val->str = start;
1191 val->len = len;
1193 if (enc == XmlEncoding_Unknown)
1194 return WC_E_ENCNAME;
1196 /* skip encoding name */
1197 reader_skipn(reader, len);
1198 return S_OK;
1201 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1202 static HRESULT reader_parse_encdecl(xmlreader *reader)
1204 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1205 strval name, val;
1206 HRESULT hr;
1208 if (!reader_skipspaces(reader)) return S_FALSE;
1210 if (reader_cmp(reader, encodingW)) return S_FALSE;
1211 name.str = reader_get_ptr(reader);
1212 name.start = reader_get_cur(reader);
1213 name.len = 8;
1214 /* skip 'encoding' */
1215 reader_skipn(reader, 8);
1217 hr = reader_parse_eq(reader);
1218 if (FAILED(hr)) return hr;
1220 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1221 return WC_E_QUOTE;
1222 /* skip "'"|'"' */
1223 reader_skipn(reader, 1);
1225 hr = reader_parse_encname(reader, &val);
1226 if (FAILED(hr)) return hr;
1228 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1229 return WC_E_QUOTE;
1231 /* skip "'"|'"' */
1232 reader_skipn(reader, 1);
1234 return reader_add_attr(reader, NULL, &name, &val);
1237 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1238 static HRESULT reader_parse_sddecl(xmlreader *reader)
1240 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1241 static const WCHAR yesW[] = {'y','e','s',0};
1242 static const WCHAR noW[] = {'n','o',0};
1243 strval name, val;
1244 UINT start;
1245 HRESULT hr;
1247 if (!reader_skipspaces(reader)) return S_FALSE;
1249 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1250 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1251 /* skip 'standalone' */
1252 reader_skipn(reader, 10);
1254 hr = reader_parse_eq(reader);
1255 if (FAILED(hr)) return hr;
1257 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1258 return WC_E_QUOTE;
1259 /* skip "'"|'"' */
1260 reader_skipn(reader, 1);
1262 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1263 return WC_E_XMLDECL;
1265 start = reader_get_cur(reader);
1266 /* skip 'yes'|'no' */
1267 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1268 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1269 TRACE("standalone=%s\n", debug_strval(reader, &val));
1271 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1272 return WC_E_QUOTE;
1273 /* skip "'"|'"' */
1274 reader_skipn(reader, 1);
1276 return reader_add_attr(reader, NULL, &name, &val);
1279 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1280 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1282 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1283 static const WCHAR declcloseW[] = {'?','>',0};
1284 HRESULT hr;
1286 /* check if we have "<?xml " */
1287 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1289 reader_skipn(reader, 5);
1290 hr = reader_parse_versioninfo(reader);
1291 if (FAILED(hr))
1292 return hr;
1294 hr = reader_parse_encdecl(reader);
1295 if (FAILED(hr))
1296 return hr;
1298 hr = reader_parse_sddecl(reader);
1299 if (FAILED(hr))
1300 return hr;
1302 reader_skipspaces(reader);
1303 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1304 reader_skipn(reader, 2);
1306 reader_inc_depth(reader);
1307 reader->nodetype = XmlNodeType_XmlDeclaration;
1308 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1309 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1310 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1312 return S_OK;
1315 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1316 static HRESULT reader_parse_comment(xmlreader *reader)
1318 WCHAR *ptr;
1319 UINT start;
1321 if (reader->resumestate == XmlReadResumeState_Comment)
1323 start = reader->resume[XmlReadResume_Body];
1324 ptr = reader_get_ptr(reader);
1326 else
1328 /* skip '<!--' */
1329 reader_skipn(reader, 4);
1330 reader_shrink(reader);
1331 ptr = reader_get_ptr(reader);
1332 start = reader_get_cur(reader);
1333 reader->nodetype = XmlNodeType_Comment;
1334 reader->resume[XmlReadResume_Body] = start;
1335 reader->resumestate = XmlReadResumeState_Comment;
1336 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1337 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1338 reader_set_strvalue(reader, StringValue_Value, NULL);
1341 /* will exit when there's no more data, it won't attempt to
1342 read more from stream */
1343 while (*ptr)
1345 if (ptr[0] == '-')
1347 if (ptr[1] == '-')
1349 if (ptr[2] == '>')
1351 strval value;
1353 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1354 TRACE("%s\n", debug_strval(reader, &value));
1356 /* skip rest of markup '->' */
1357 reader_skipn(reader, 3);
1359 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1360 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1361 reader_set_strvalue(reader, StringValue_Value, &value);
1362 reader->resume[XmlReadResume_Body] = 0;
1363 reader->resumestate = XmlReadResumeState_Initial;
1364 return S_OK;
1366 else
1367 return WC_E_COMMENT;
1371 reader_skipn(reader, 1);
1372 ptr++;
1375 return S_OK;
1378 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1379 static inline BOOL is_char(WCHAR ch)
1381 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1382 (ch >= 0x20 && ch <= 0xd7ff) ||
1383 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1384 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1385 (ch >= 0xe000 && ch <= 0xfffd);
1388 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1389 static inline BOOL is_pubchar(WCHAR ch)
1391 return (ch == ' ') ||
1392 (ch >= 'a' && ch <= 'z') ||
1393 (ch >= 'A' && ch <= 'Z') ||
1394 (ch >= '0' && ch <= '9') ||
1395 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1396 (ch == '=') || (ch == '?') ||
1397 (ch == '@') || (ch == '!') ||
1398 (ch >= '#' && ch <= '%') || /* #$% */
1399 (ch == '_') || (ch == '\r') || (ch == '\n');
1402 static inline BOOL is_namestartchar(WCHAR ch)
1404 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1405 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1406 (ch >= 0xc0 && ch <= 0xd6) ||
1407 (ch >= 0xd8 && ch <= 0xf6) ||
1408 (ch >= 0xf8 && ch <= 0x2ff) ||
1409 (ch >= 0x370 && ch <= 0x37d) ||
1410 (ch >= 0x37f && ch <= 0x1fff) ||
1411 (ch >= 0x200c && ch <= 0x200d) ||
1412 (ch >= 0x2070 && ch <= 0x218f) ||
1413 (ch >= 0x2c00 && ch <= 0x2fef) ||
1414 (ch >= 0x3001 && ch <= 0xd7ff) ||
1415 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1416 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1417 (ch >= 0xf900 && ch <= 0xfdcf) ||
1418 (ch >= 0xfdf0 && ch <= 0xfffd);
1421 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1422 static inline BOOL is_ncnamechar(WCHAR ch)
1424 return (ch >= 'A' && ch <= 'Z') ||
1425 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1426 (ch == '-') || (ch == '.') ||
1427 (ch >= '0' && ch <= '9') ||
1428 (ch == 0xb7) ||
1429 (ch >= 0xc0 && ch <= 0xd6) ||
1430 (ch >= 0xd8 && ch <= 0xf6) ||
1431 (ch >= 0xf8 && ch <= 0x2ff) ||
1432 (ch >= 0x300 && ch <= 0x36f) ||
1433 (ch >= 0x370 && ch <= 0x37d) ||
1434 (ch >= 0x37f && ch <= 0x1fff) ||
1435 (ch >= 0x200c && ch <= 0x200d) ||
1436 (ch >= 0x203f && ch <= 0x2040) ||
1437 (ch >= 0x2070 && ch <= 0x218f) ||
1438 (ch >= 0x2c00 && ch <= 0x2fef) ||
1439 (ch >= 0x3001 && ch <= 0xd7ff) ||
1440 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1441 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1442 (ch >= 0xf900 && ch <= 0xfdcf) ||
1443 (ch >= 0xfdf0 && ch <= 0xfffd);
1446 static inline BOOL is_namechar(WCHAR ch)
1448 return (ch == ':') || is_ncnamechar(ch);
1451 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1453 /* When we're on attribute always return attribute type, container node type is kept.
1454 Note that container is not necessarily an element, and attribute doesn't mean it's
1455 an attribute in XML spec terms. */
1456 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1459 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1460 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1461 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1462 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1463 [5] Name ::= NameStartChar (NameChar)* */
1464 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1466 WCHAR *ptr;
1467 UINT start;
1469 if (reader->resume[XmlReadResume_Name])
1471 start = reader->resume[XmlReadResume_Name];
1472 ptr = reader_get_ptr(reader);
1474 else
1476 ptr = reader_get_ptr(reader);
1477 start = reader_get_cur(reader);
1478 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1481 while (is_namechar(*ptr))
1483 reader_skipn(reader, 1);
1484 ptr = reader_get_ptr(reader);
1487 if (is_reader_pending(reader))
1489 reader->resume[XmlReadResume_Name] = start;
1490 return E_PENDING;
1492 else
1493 reader->resume[XmlReadResume_Name] = 0;
1495 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1496 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1498 return S_OK;
1501 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1502 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1504 static const WCHAR xmlW[] = {'x','m','l'};
1505 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1506 strval name;
1507 WCHAR *ptr;
1508 HRESULT hr;
1509 UINT i;
1511 hr = reader_parse_name(reader, &name);
1512 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1514 /* now that we got name check for illegal content */
1515 if (strval_eq(reader, &name, &xmlval))
1516 return WC_E_LEADINGXML;
1518 /* PITarget can't be a qualified name */
1519 ptr = reader_get_strptr(reader, &name);
1520 for (i = 0; i < name.len; i++)
1521 if (ptr[i] == ':')
1522 return i ? NC_E_NAMECOLON : WC_E_PI;
1524 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1525 *target = name;
1526 return S_OK;
1529 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1530 static HRESULT reader_parse_pi(xmlreader *reader)
1532 strval target;
1533 WCHAR *ptr;
1534 UINT start;
1535 HRESULT hr;
1537 switch (reader->resumestate)
1539 case XmlReadResumeState_Initial:
1540 /* skip '<?' */
1541 reader_skipn(reader, 2);
1542 reader_shrink(reader);
1543 reader->resumestate = XmlReadResumeState_PITarget;
1544 case XmlReadResumeState_PITarget:
1545 hr = reader_parse_pitarget(reader, &target);
1546 if (FAILED(hr)) return hr;
1547 reader_set_strvalue(reader, StringValue_LocalName, &target);
1548 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1549 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1550 reader->resumestate = XmlReadResumeState_PIBody;
1551 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1552 default:
1556 start = reader->resume[XmlReadResume_Body];
1557 ptr = reader_get_ptr(reader);
1558 while (*ptr)
1560 if (ptr[0] == '?')
1562 if (ptr[1] == '>')
1564 UINT cur = reader_get_cur(reader);
1565 strval value;
1567 /* strip all leading whitespace chars */
1568 while (start < cur)
1570 ptr = reader_get_ptr2(reader, start);
1571 if (!is_wchar_space(*ptr)) break;
1572 start++;
1575 reader_init_strvalue(start, cur-start, &value);
1577 /* skip '?>' */
1578 reader_skipn(reader, 2);
1579 TRACE("%s\n", debug_strval(reader, &value));
1580 reader->nodetype = XmlNodeType_ProcessingInstruction;
1581 reader->resumestate = XmlReadResumeState_Initial;
1582 reader->resume[XmlReadResume_Body] = 0;
1583 reader_set_strvalue(reader, StringValue_Value, &value);
1584 return S_OK;
1588 reader_skipn(reader, 1);
1589 ptr = reader_get_ptr(reader);
1592 return S_OK;
1595 /* This one is used to parse significant whitespace nodes, like in Misc production */
1596 static HRESULT reader_parse_whitespace(xmlreader *reader)
1598 switch (reader->resumestate)
1600 case XmlReadResumeState_Initial:
1601 reader_shrink(reader);
1602 reader->resumestate = XmlReadResumeState_Whitespace;
1603 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1604 reader->nodetype = XmlNodeType_Whitespace;
1605 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1606 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1607 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1608 /* fallthrough */
1609 case XmlReadResumeState_Whitespace:
1611 strval value;
1612 UINT start;
1614 reader_skipspaces(reader);
1615 if (is_reader_pending(reader)) return S_OK;
1617 start = reader->resume[XmlReadResume_Body];
1618 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1619 reader_set_strvalue(reader, StringValue_Value, &value);
1620 TRACE("%s\n", debug_strval(reader, &value));
1621 reader->resumestate = XmlReadResumeState_Initial;
1623 default:
1627 return S_OK;
1630 /* [27] Misc ::= Comment | PI | S */
1631 static HRESULT reader_parse_misc(xmlreader *reader)
1633 HRESULT hr = S_FALSE;
1635 if (reader->resumestate != XmlReadResumeState_Initial)
1637 hr = reader_more(reader);
1638 if (FAILED(hr)) return hr;
1640 /* finish current node */
1641 switch (reader->resumestate)
1643 case XmlReadResumeState_PITarget:
1644 case XmlReadResumeState_PIBody:
1645 return reader_parse_pi(reader);
1646 case XmlReadResumeState_Comment:
1647 return reader_parse_comment(reader);
1648 case XmlReadResumeState_Whitespace:
1649 return reader_parse_whitespace(reader);
1650 default:
1651 ERR("unknown resume state %d\n", reader->resumestate);
1655 while (1)
1657 const WCHAR *cur = reader_get_ptr(reader);
1659 if (is_wchar_space(*cur))
1660 hr = reader_parse_whitespace(reader);
1661 else if (!reader_cmp(reader, commentW))
1662 hr = reader_parse_comment(reader);
1663 else if (!reader_cmp(reader, piW))
1664 hr = reader_parse_pi(reader);
1665 else
1666 break;
1668 if (hr != S_FALSE) return hr;
1671 return hr;
1674 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1675 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1677 WCHAR *cur = reader_get_ptr(reader), quote;
1678 UINT start;
1680 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1682 quote = *cur;
1683 reader_skipn(reader, 1);
1685 cur = reader_get_ptr(reader);
1686 start = reader_get_cur(reader);
1687 while (is_char(*cur) && *cur != quote)
1689 reader_skipn(reader, 1);
1690 cur = reader_get_ptr(reader);
1692 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1693 if (*cur == quote) reader_skipn(reader, 1);
1695 TRACE("%s\n", debug_strval(reader, literal));
1696 return S_OK;
1699 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1700 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1701 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1703 WCHAR *cur = reader_get_ptr(reader), quote;
1704 UINT start;
1706 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1708 quote = *cur;
1709 reader_skipn(reader, 1);
1711 start = reader_get_cur(reader);
1712 cur = reader_get_ptr(reader);
1713 while (is_pubchar(*cur) && *cur != quote)
1715 reader_skipn(reader, 1);
1716 cur = reader_get_ptr(reader);
1718 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1719 if (*cur == quote) reader_skipn(reader, 1);
1721 TRACE("%s\n", debug_strval(reader, literal));
1722 return S_OK;
1725 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1726 static HRESULT reader_parse_externalid(xmlreader *reader)
1728 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1729 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1730 strval name, sys;
1731 HRESULT hr;
1732 int cnt;
1734 if (!reader_cmp(reader, publicW)) {
1735 strval pub;
1737 /* public id */
1738 reader_skipn(reader, 6);
1739 cnt = reader_skipspaces(reader);
1740 if (!cnt) return WC_E_WHITESPACE;
1742 hr = reader_parse_pub_literal(reader, &pub);
1743 if (FAILED(hr)) return hr;
1745 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1746 hr = reader_add_attr(reader, NULL, &name, &pub);
1747 if (FAILED(hr)) return hr;
1749 cnt = reader_skipspaces(reader);
1750 if (!cnt) return S_OK;
1752 /* optional system id */
1753 hr = reader_parse_sys_literal(reader, &sys);
1754 if (FAILED(hr)) return S_OK;
1756 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1757 hr = reader_add_attr(reader, NULL, &name, &sys);
1758 if (FAILED(hr)) return hr;
1760 return S_OK;
1761 } else if (!reader_cmp(reader, systemW)) {
1762 /* system id */
1763 reader_skipn(reader, 6);
1764 cnt = reader_skipspaces(reader);
1765 if (!cnt) return WC_E_WHITESPACE;
1767 hr = reader_parse_sys_literal(reader, &sys);
1768 if (FAILED(hr)) return hr;
1770 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1771 return reader_add_attr(reader, NULL, &name, &sys);
1774 return S_FALSE;
1777 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1778 static HRESULT reader_parse_dtd(xmlreader *reader)
1780 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1781 strval name;
1782 WCHAR *cur;
1783 HRESULT hr;
1785 /* check if we have "<!DOCTYPE" */
1786 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1787 reader_shrink(reader);
1789 /* DTD processing is not allowed by default */
1790 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1792 reader_skipn(reader, 9);
1793 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1795 /* name */
1796 hr = reader_parse_name(reader, &name);
1797 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1799 reader_skipspaces(reader);
1801 hr = reader_parse_externalid(reader);
1802 if (FAILED(hr)) return hr;
1804 reader_skipspaces(reader);
1806 cur = reader_get_ptr(reader);
1807 if (*cur != '>')
1809 FIXME("internal subset parsing not implemented\n");
1810 return E_NOTIMPL;
1813 /* skip '>' */
1814 reader_skipn(reader, 1);
1816 reader->nodetype = XmlNodeType_DocumentType;
1817 reader_set_strvalue(reader, StringValue_LocalName, &name);
1818 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1820 return S_OK;
1823 /* [11 NS] LocalPart ::= NCName */
1824 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1826 WCHAR *ptr;
1827 UINT start;
1829 if (reader->resume[XmlReadResume_Local])
1831 start = reader->resume[XmlReadResume_Local];
1832 ptr = reader_get_ptr(reader);
1834 else
1836 ptr = reader_get_ptr(reader);
1837 start = reader_get_cur(reader);
1840 while (is_ncnamechar(*ptr))
1842 reader_skipn(reader, 1);
1843 ptr = reader_get_ptr(reader);
1846 if (is_reader_pending(reader))
1848 reader->resume[XmlReadResume_Local] = start;
1849 return E_PENDING;
1851 else
1852 reader->resume[XmlReadResume_Local] = 0;
1854 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1856 return S_OK;
1859 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1860 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1861 [9 NS] UnprefixedName ::= LocalPart
1862 [10 NS] Prefix ::= NCName */
1863 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1865 WCHAR *ptr;
1866 UINT start;
1867 HRESULT hr;
1869 if (reader->resume[XmlReadResume_Name])
1871 start = reader->resume[XmlReadResume_Name];
1872 ptr = reader_get_ptr(reader);
1874 else
1876 ptr = reader_get_ptr(reader);
1877 start = reader_get_cur(reader);
1878 reader->resume[XmlReadResume_Name] = start;
1879 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1882 if (reader->resume[XmlReadResume_Local])
1884 hr = reader_parse_local(reader, local);
1885 if (FAILED(hr)) return hr;
1887 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1888 local->start - reader->resume[XmlReadResume_Name] - 1,
1889 prefix);
1891 else
1893 /* skip prefix part */
1894 while (is_ncnamechar(*ptr))
1896 reader_skipn(reader, 1);
1897 ptr = reader_get_ptr(reader);
1900 if (is_reader_pending(reader)) return E_PENDING;
1902 /* got a qualified name */
1903 if (*ptr == ':')
1905 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1907 /* skip ':' */
1908 reader_skipn(reader, 1);
1909 hr = reader_parse_local(reader, local);
1910 if (FAILED(hr)) return hr;
1912 else
1914 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1915 reader_init_strvalue(0, 0, prefix);
1919 if (prefix->len)
1920 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1921 else
1922 TRACE("ncname %s\n", debug_strval(reader, local));
1924 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1925 /* count ':' too */
1926 (prefix->len ? prefix->len + 1 : 0) + local->len,
1927 qname);
1929 reader->resume[XmlReadResume_Name] = 0;
1930 reader->resume[XmlReadResume_Local] = 0;
1932 return S_OK;
1935 /* Applies normalization rules to a single char, used for attribute values.
1937 Rules include 2 steps:
1939 1) replacing \r\n with a single \n;
1940 2) replacing all whitespace chars with ' '.
1943 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1945 encoded_buffer *buffer = &reader->input->buffer->utf16;
1947 if (!is_wchar_space(*ptr)) return;
1949 if (*ptr == '\r' && *(ptr+1) == '\n')
1951 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1952 memmove(ptr+1, ptr+2, len);
1954 *ptr = ' ';
1957 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1959 static const WCHAR entltW[] = {'l','t'};
1960 static const WCHAR entgtW[] = {'g','t'};
1961 static const WCHAR entampW[] = {'a','m','p'};
1962 static const WCHAR entaposW[] = {'a','p','o','s'};
1963 static const WCHAR entquotW[] = {'q','u','o','t'};
1964 static const strval lt = { (WCHAR*)entltW, 2 };
1965 static const strval gt = { (WCHAR*)entgtW, 2 };
1966 static const strval amp = { (WCHAR*)entampW, 3 };
1967 static const strval apos = { (WCHAR*)entaposW, 4 };
1968 static const strval quot = { (WCHAR*)entquotW, 4 };
1969 WCHAR *str = reader_get_strptr(reader, name);
1971 switch (*str)
1973 case 'l':
1974 if (strval_eq(reader, name, &lt)) return '<';
1975 break;
1976 case 'g':
1977 if (strval_eq(reader, name, &gt)) return '>';
1978 break;
1979 case 'a':
1980 if (strval_eq(reader, name, &amp))
1981 return '&';
1982 else if (strval_eq(reader, name, &apos))
1983 return '\'';
1984 break;
1985 case 'q':
1986 if (strval_eq(reader, name, &quot)) return '\"';
1987 break;
1988 default:
1992 return 0;
1995 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1996 [67] Reference ::= EntityRef | CharRef
1997 [68] EntityRef ::= '&' Name ';' */
1998 static HRESULT reader_parse_reference(xmlreader *reader)
2000 encoded_buffer *buffer = &reader->input->buffer->utf16;
2001 WCHAR *start = reader_get_ptr(reader), *ptr;
2002 UINT cur = reader_get_cur(reader);
2003 WCHAR ch = 0;
2004 int len;
2006 /* skip '&' */
2007 reader_skipn(reader, 1);
2008 ptr = reader_get_ptr(reader);
2010 if (*ptr == '#')
2012 reader_skipn(reader, 1);
2013 ptr = reader_get_ptr(reader);
2015 /* hex char or decimal */
2016 if (*ptr == 'x')
2018 reader_skipn(reader, 1);
2019 ptr = reader_get_ptr(reader);
2021 while (*ptr != ';')
2023 if ((*ptr >= '0' && *ptr <= '9'))
2024 ch = ch*16 + *ptr - '0';
2025 else if ((*ptr >= 'a' && *ptr <= 'f'))
2026 ch = ch*16 + *ptr - 'a' + 10;
2027 else if ((*ptr >= 'A' && *ptr <= 'F'))
2028 ch = ch*16 + *ptr - 'A' + 10;
2029 else
2030 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2031 reader_skipn(reader, 1);
2032 ptr = reader_get_ptr(reader);
2035 else
2037 while (*ptr != ';')
2039 if ((*ptr >= '0' && *ptr <= '9'))
2041 ch = ch*10 + *ptr - '0';
2042 reader_skipn(reader, 1);
2043 ptr = reader_get_ptr(reader);
2045 else
2046 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2050 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2052 /* normalize */
2053 if (is_wchar_space(ch)) ch = ' ';
2055 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2056 memmove(start+1, ptr+1, len);
2057 buffer->cur = cur + 1;
2059 *start = ch;
2061 else
2063 strval name;
2064 HRESULT hr;
2066 hr = reader_parse_name(reader, &name);
2067 if (FAILED(hr)) return hr;
2069 ptr = reader_get_ptr(reader);
2070 if (*ptr != ';') return WC_E_SEMICOLON;
2072 /* predefined entities resolve to a single character */
2073 ch = get_predefined_entity(reader, &name);
2074 if (ch)
2076 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2077 memmove(start+1, ptr+1, len);
2078 buffer->cur = cur + 1;
2080 *start = ch;
2082 else
2084 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2085 return WC_E_UNDECLAREDENTITY;
2090 return S_OK;
2093 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2094 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2096 WCHAR *ptr, quote;
2097 UINT start;
2099 ptr = reader_get_ptr(reader);
2101 /* skip opening quote */
2102 quote = *ptr;
2103 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2104 reader_skipn(reader, 1);
2106 ptr = reader_get_ptr(reader);
2107 start = reader_get_cur(reader);
2108 while (*ptr)
2110 if (*ptr == '<') return WC_E_LESSTHAN;
2112 if (*ptr == quote)
2114 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2115 /* skip closing quote */
2116 reader_skipn(reader, 1);
2117 return S_OK;
2120 if (*ptr == '&')
2122 HRESULT hr = reader_parse_reference(reader);
2123 if (FAILED(hr)) return hr;
2125 else
2127 reader_normalize_space(reader, ptr);
2128 reader_skipn(reader, 1);
2130 ptr = reader_get_ptr(reader);
2133 return WC_E_QUOTE;
2136 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2137 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2138 [3 NS] DefaultAttName ::= 'xmlns'
2139 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2140 static HRESULT reader_parse_attribute(xmlreader *reader)
2142 strval prefix, local, qname, value;
2143 BOOL ns = FALSE, nsdef = FALSE;
2144 HRESULT hr;
2146 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2147 if (FAILED(hr)) return hr;
2149 if (strval_eq(reader, &prefix, &strval_xmlns))
2150 ns = TRUE;
2152 if (strval_eq(reader, &qname, &strval_xmlns))
2153 ns = nsdef = TRUE;
2155 hr = reader_parse_eq(reader);
2156 if (FAILED(hr)) return hr;
2158 hr = reader_parse_attvalue(reader, &value);
2159 if (FAILED(hr)) return hr;
2161 if (ns)
2162 reader_push_ns(reader, nsdef ? &strval_xmlns : &local, &value, nsdef);
2164 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2165 return reader_add_attr(reader, &prefix, &local, &value);
2168 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2169 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2170 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2172 HRESULT hr;
2174 hr = reader_parse_qname(reader, prefix, local, qname);
2175 if (FAILED(hr)) return hr;
2177 while (1)
2179 static const WCHAR endW[] = {'/','>',0};
2181 reader_skipspaces(reader);
2183 /* empty element */
2184 if ((*empty = !reader_cmp(reader, endW)))
2186 /* skip '/>' */
2187 reader_skipn(reader, 2);
2188 reader->is_empty_element = TRUE;
2189 reader->empty_element.prefix = *prefix;
2190 reader->empty_element.localname = *local;
2191 reader->empty_element.qname = *qname;
2192 reader_mark_ns_nodes(reader, &reader->empty_element);
2193 return S_OK;
2196 /* got a start tag */
2197 if (!reader_cmp(reader, gtW))
2199 /* skip '>' */
2200 reader_skipn(reader, 1);
2201 return reader_push_element(reader, prefix, local, qname);
2204 hr = reader_parse_attribute(reader);
2205 if (FAILED(hr)) return hr;
2208 return S_OK;
2211 /* [39] element ::= EmptyElemTag | STag content ETag */
2212 static HRESULT reader_parse_element(xmlreader *reader)
2214 HRESULT hr;
2216 switch (reader->resumestate)
2218 case XmlReadResumeState_Initial:
2219 /* check if we are really on element */
2220 if (reader_cmp(reader, ltW)) return S_FALSE;
2222 /* skip '<' */
2223 reader_skipn(reader, 1);
2225 reader_shrink(reader);
2226 reader->resumestate = XmlReadResumeState_STag;
2227 case XmlReadResumeState_STag:
2229 strval qname, prefix, local;
2230 int empty = 0;
2232 /* this handles empty elements too */
2233 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2234 if (FAILED(hr)) return hr;
2236 /* FIXME: need to check for defined namespace to reject invalid prefix,
2237 currently reject all prefixes */
2238 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
2240 /* if we got empty element and stack is empty go straight to Misc */
2241 if (empty && list_empty(&reader->elements))
2242 reader->instate = XmlReadInState_MiscEnd;
2243 else
2244 reader->instate = XmlReadInState_Content;
2246 reader->nodetype = XmlNodeType_Element;
2247 reader->resumestate = XmlReadResumeState_Initial;
2248 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2249 reader_set_strvalue(reader, StringValue_LocalName, &local);
2250 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2251 break;
2253 default:
2254 hr = E_FAIL;
2257 return hr;
2260 /* [13 NS] ETag ::= '</' QName S? '>' */
2261 static HRESULT reader_parse_endtag(xmlreader *reader)
2263 strval prefix, local, qname;
2264 struct element *elem;
2265 HRESULT hr;
2267 /* skip '</' */
2268 reader_skipn(reader, 2);
2270 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2271 if (FAILED(hr)) return hr;
2273 reader_skipspaces(reader);
2275 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2277 /* skip '>' */
2278 reader_skipn(reader, 1);
2280 /* Element stack should never be empty at this point, cause we shouldn't get to
2281 content parsing if it's empty. */
2282 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2283 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2285 reader->nodetype = XmlNodeType_EndElement;
2286 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2287 reader_set_strvalue(reader, StringValue_LocalName, &local);
2288 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2290 return S_OK;
2293 /* [18] CDSect ::= CDStart CData CDEnd
2294 [19] CDStart ::= '<![CDATA['
2295 [20] CData ::= (Char* - (Char* ']]>' Char*))
2296 [21] CDEnd ::= ']]>' */
2297 static HRESULT reader_parse_cdata(xmlreader *reader)
2299 WCHAR *ptr;
2300 UINT start;
2302 if (reader->resumestate == XmlReadResumeState_CDATA)
2304 start = reader->resume[XmlReadResume_Body];
2305 ptr = reader_get_ptr(reader);
2307 else
2309 /* skip markup '<![CDATA[' */
2310 reader_skipn(reader, 9);
2311 reader_shrink(reader);
2312 ptr = reader_get_ptr(reader);
2313 start = reader_get_cur(reader);
2314 reader->nodetype = XmlNodeType_CDATA;
2315 reader->resume[XmlReadResume_Body] = start;
2316 reader->resumestate = XmlReadResumeState_CDATA;
2317 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2318 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2319 reader_set_strvalue(reader, StringValue_Value, NULL);
2322 while (*ptr)
2324 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2326 strval value;
2328 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2330 /* skip ']]>' */
2331 reader_skipn(reader, 3);
2332 TRACE("%s\n", debug_strval(reader, &value));
2334 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2335 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2336 reader_set_strvalue(reader, StringValue_Value, &value);
2337 reader->resume[XmlReadResume_Body] = 0;
2338 reader->resumestate = XmlReadResumeState_Initial;
2339 return S_OK;
2341 else
2343 /* Value normalization is not fully implemented, rules are:
2345 - single '\r' -> '\n';
2346 - sequence '\r\n' -> '\n', in this case value length changes;
2348 if (*ptr == '\r') *ptr = '\n';
2349 reader_skipn(reader, 1);
2350 ptr++;
2354 return S_OK;
2357 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2358 static HRESULT reader_parse_chardata(xmlreader *reader)
2360 WCHAR *ptr;
2361 UINT start;
2363 if (reader->resumestate == XmlReadResumeState_CharData)
2365 start = reader->resume[XmlReadResume_Body];
2366 ptr = reader_get_ptr(reader);
2368 else
2370 reader_shrink(reader);
2371 ptr = reader_get_ptr(reader);
2372 start = reader_get_cur(reader);
2373 /* There's no text */
2374 if (!*ptr || *ptr == '<') return S_OK;
2375 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2376 reader->resume[XmlReadResume_Body] = start;
2377 reader->resumestate = XmlReadResumeState_CharData;
2378 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2379 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2380 reader_set_strvalue(reader, StringValue_Value, NULL);
2383 while (*ptr)
2385 /* CDATA closing sequence ']]>' is not allowed */
2386 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2387 return WC_E_CDSECTEND;
2389 /* Found next markup part */
2390 if (ptr[0] == '<')
2392 strval value;
2394 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2395 reader_set_strvalue(reader, StringValue_Value, &value);
2396 reader->resume[XmlReadResume_Body] = 0;
2397 reader->resumestate = XmlReadResumeState_Initial;
2398 return S_OK;
2401 reader_skipn(reader, 1);
2403 /* this covers a case when text has leading whitespace chars */
2404 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2405 ptr++;
2408 return S_OK;
2411 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2412 static HRESULT reader_parse_content(xmlreader *reader)
2414 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2415 static const WCHAR etagW[] = {'<','/',0};
2416 static const WCHAR ampW[] = {'&',0};
2418 if (reader->resumestate != XmlReadResumeState_Initial)
2420 switch (reader->resumestate)
2422 case XmlReadResumeState_CDATA:
2423 return reader_parse_cdata(reader);
2424 case XmlReadResumeState_Comment:
2425 return reader_parse_comment(reader);
2426 case XmlReadResumeState_PIBody:
2427 case XmlReadResumeState_PITarget:
2428 return reader_parse_pi(reader);
2429 case XmlReadResumeState_CharData:
2430 return reader_parse_chardata(reader);
2431 default:
2432 ERR("unknown resume state %d\n", reader->resumestate);
2436 reader_shrink(reader);
2438 /* handle end tag here, it indicates end of content as well */
2439 if (!reader_cmp(reader, etagW))
2440 return reader_parse_endtag(reader);
2442 if (!reader_cmp(reader, commentW))
2443 return reader_parse_comment(reader);
2445 if (!reader_cmp(reader, piW))
2446 return reader_parse_pi(reader);
2448 if (!reader_cmp(reader, cdstartW))
2449 return reader_parse_cdata(reader);
2451 if (!reader_cmp(reader, ampW))
2452 return reader_parse_reference(reader);
2454 if (!reader_cmp(reader, ltW))
2455 return reader_parse_element(reader);
2457 /* what's left must be CharData */
2458 return reader_parse_chardata(reader);
2461 static HRESULT reader_parse_nextnode(xmlreader *reader)
2463 XmlNodeType nodetype = reader_get_nodetype(reader);
2464 HRESULT hr;
2466 if (!is_reader_pending(reader))
2467 reader_clear_attrs(reader);
2469 /* When moving from EndElement or empty element, pop its own namespace defitions */
2470 if (nodetype == XmlNodeType_Element && reader->is_empty_element)
2471 reader_pop_ns_nodes(reader, &reader->empty_element);
2472 else if (nodetype == XmlNodeType_EndElement)
2473 reader_pop_element(reader);
2475 while (1)
2477 switch (reader->instate)
2479 /* if it's a first call for a new input we need to detect stream encoding */
2480 case XmlReadInState_Initial:
2482 xml_encoding enc;
2484 hr = readerinput_growraw(reader->input);
2485 if (FAILED(hr)) return hr;
2487 /* try to detect encoding by BOM or data and set input code page */
2488 hr = readerinput_detectencoding(reader->input, &enc);
2489 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2490 if (FAILED(hr)) return hr;
2492 /* always switch first time cause we have to put something in */
2493 readerinput_switchencoding(reader->input, enc);
2495 /* parse xml declaration */
2496 hr = reader_parse_xmldecl(reader);
2497 if (FAILED(hr)) return hr;
2499 readerinput_shrinkraw(reader->input, -1);
2500 reader->instate = XmlReadInState_Misc_DTD;
2501 if (hr == S_OK) return hr;
2503 break;
2504 case XmlReadInState_Misc_DTD:
2505 hr = reader_parse_misc(reader);
2506 if (FAILED(hr)) return hr;
2508 if (hr == S_FALSE)
2509 reader->instate = XmlReadInState_DTD;
2510 else
2511 return hr;
2512 break;
2513 case XmlReadInState_DTD:
2514 hr = reader_parse_dtd(reader);
2515 if (FAILED(hr)) return hr;
2517 if (hr == S_OK)
2519 reader->instate = XmlReadInState_DTD_Misc;
2520 return hr;
2522 else
2523 reader->instate = XmlReadInState_Element;
2524 break;
2525 case XmlReadInState_DTD_Misc:
2526 hr = reader_parse_misc(reader);
2527 if (FAILED(hr)) return hr;
2529 if (hr == S_FALSE)
2530 reader->instate = XmlReadInState_Element;
2531 else
2532 return hr;
2533 break;
2534 case XmlReadInState_Element:
2535 return reader_parse_element(reader);
2536 case XmlReadInState_Content:
2537 return reader_parse_content(reader);
2538 case XmlReadInState_MiscEnd:
2539 hr = reader_parse_misc(reader);
2540 if (FAILED(hr)) return hr;
2542 if (hr == S_FALSE)
2543 reader->instate = XmlReadInState_Eof;
2544 return hr;
2545 case XmlReadInState_Eof:
2546 return S_FALSE;
2547 default:
2548 FIXME("internal state %d not handled\n", reader->instate);
2549 return E_NOTIMPL;
2553 return E_NOTIMPL;
2556 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2558 xmlreader *This = impl_from_IXmlReader(iface);
2560 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2562 if (IsEqualGUID(riid, &IID_IUnknown) ||
2563 IsEqualGUID(riid, &IID_IXmlReader))
2565 *ppvObject = iface;
2567 else
2569 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2570 *ppvObject = NULL;
2571 return E_NOINTERFACE;
2574 IXmlReader_AddRef(iface);
2576 return S_OK;
2579 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2581 xmlreader *This = impl_from_IXmlReader(iface);
2582 ULONG ref = InterlockedIncrement(&This->ref);
2583 TRACE("(%p)->(%d)\n", This, ref);
2584 return ref;
2587 static void reader_clear_ns(xmlreader *reader)
2589 struct ns *ns, *ns2;
2591 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2592 reader_free_strvalued(reader, &ns->prefix);
2593 reader_free_strvalued(reader, &ns->uri);
2594 reader_free(reader, ns);
2597 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2598 reader_free_strvalued(reader, &ns->uri);
2599 reader_free(reader, ns);
2603 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2605 xmlreader *This = impl_from_IXmlReader(iface);
2606 LONG ref = InterlockedDecrement(&This->ref);
2608 TRACE("(%p)->(%d)\n", This, ref);
2610 if (ref == 0)
2612 IMalloc *imalloc = This->imalloc;
2613 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2614 if (This->resolver) IXmlResolver_Release(This->resolver);
2615 if (This->mlang) IUnknown_Release(This->mlang);
2616 reader_clear_attrs(This);
2617 reader_clear_ns(This);
2618 reader_clear_elements(This);
2619 reader_free_strvalues(This);
2620 reader_free(This, This);
2621 if (imalloc) IMalloc_Release(imalloc);
2624 return ref;
2627 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2629 xmlreader *This = impl_from_IXmlReader(iface);
2630 IXmlReaderInput *readerinput;
2631 HRESULT hr;
2633 TRACE("(%p)->(%p)\n", This, input);
2635 if (This->input)
2637 readerinput_release_stream(This->input);
2638 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2639 This->input = NULL;
2642 This->line = This->pos = 0;
2643 reader_clear_elements(This);
2644 This->depth = 0;
2645 This->resumestate = XmlReadResumeState_Initial;
2646 memset(This->resume, 0, sizeof(This->resume));
2648 /* just reset current input */
2649 if (!input)
2651 This->state = XmlReadState_Initial;
2652 return S_OK;
2655 /* now try IXmlReaderInput, ISequentialStream, IStream */
2656 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2657 if (hr == S_OK)
2659 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2660 This->input = impl_from_IXmlReaderInput(readerinput);
2661 else
2663 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2664 readerinput, readerinput->lpVtbl);
2665 IUnknown_Release(readerinput);
2666 return E_FAIL;
2671 if (hr != S_OK || !readerinput)
2673 /* create IXmlReaderInput basing on supplied interface */
2674 hr = CreateXmlReaderInputWithEncodingName(input,
2675 This->imalloc, NULL, FALSE, NULL, &readerinput);
2676 if (hr != S_OK) return hr;
2677 This->input = impl_from_IXmlReaderInput(readerinput);
2680 /* set stream for supplied IXmlReaderInput */
2681 hr = readerinput_query_for_stream(This->input);
2682 if (hr == S_OK)
2684 This->state = XmlReadState_Initial;
2685 This->instate = XmlReadInState_Initial;
2688 return hr;
2691 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2693 xmlreader *This = impl_from_IXmlReader(iface);
2695 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2697 if (!value) return E_INVALIDARG;
2699 switch (property)
2701 case XmlReaderProperty_MultiLanguage:
2702 *value = (LONG_PTR)This->mlang;
2703 if (This->mlang)
2704 IUnknown_AddRef(This->mlang);
2705 break;
2706 case XmlReaderProperty_XmlResolver:
2707 *value = (LONG_PTR)This->resolver;
2708 if (This->resolver)
2709 IXmlResolver_AddRef(This->resolver);
2710 break;
2711 case XmlReaderProperty_DtdProcessing:
2712 *value = This->dtdmode;
2713 break;
2714 case XmlReaderProperty_ReadState:
2715 *value = This->state;
2716 break;
2717 default:
2718 FIXME("Unimplemented property (%u)\n", property);
2719 return E_NOTIMPL;
2722 return S_OK;
2725 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2727 xmlreader *This = impl_from_IXmlReader(iface);
2729 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2731 switch (property)
2733 case XmlReaderProperty_MultiLanguage:
2734 if (This->mlang)
2735 IUnknown_Release(This->mlang);
2736 This->mlang = (IUnknown*)value;
2737 if (This->mlang)
2738 IUnknown_AddRef(This->mlang);
2739 if (This->mlang)
2740 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2741 break;
2742 case XmlReaderProperty_XmlResolver:
2743 if (This->resolver)
2744 IXmlResolver_Release(This->resolver);
2745 This->resolver = (IXmlResolver*)value;
2746 if (This->resolver)
2747 IXmlResolver_AddRef(This->resolver);
2748 break;
2749 case XmlReaderProperty_DtdProcessing:
2750 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2751 This->dtdmode = value;
2752 break;
2753 case XmlReaderProperty_MaxElementDepth:
2754 FIXME("Ignoring MaxElementDepth %ld\n", value);
2755 break;
2756 default:
2757 FIXME("Unimplemented property (%u)\n", property);
2758 return E_NOTIMPL;
2761 return S_OK;
2764 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2766 xmlreader *This = impl_from_IXmlReader(iface);
2767 XmlNodeType oldtype = This->nodetype;
2768 HRESULT hr;
2770 TRACE("(%p)->(%p)\n", This, nodetype);
2772 if (This->state == XmlReadState_Closed) return S_FALSE;
2774 hr = reader_parse_nextnode(This);
2775 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2776 This->state = XmlReadState_Interactive;
2777 if (hr == S_OK)
2779 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2780 *nodetype = This->nodetype;
2783 return hr;
2786 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2788 xmlreader *This = impl_from_IXmlReader(iface);
2789 TRACE("(%p)->(%p)\n", This, node_type);
2791 *node_type = reader_get_nodetype(This);
2792 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2795 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2797 if (!reader->attr_count)
2798 return S_FALSE;
2800 reader->attr = LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry);
2801 reader_set_strvalue(reader, StringValue_Prefix, &reader->attr->prefix);
2802 reader_set_strvalue(reader, StringValue_LocalName, &reader->attr->localname);
2803 reader_set_strvalue(reader, StringValue_Value, &reader->attr->value);
2805 return S_OK;
2808 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2810 xmlreader *This = impl_from_IXmlReader(iface);
2812 TRACE("(%p)\n", This);
2814 return reader_move_to_first_attribute(This);
2817 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2819 xmlreader *This = impl_from_IXmlReader(iface);
2820 const struct list *next;
2822 TRACE("(%p)\n", This);
2824 if (!This->attr_count) return S_FALSE;
2826 if (!This->attr)
2827 return reader_move_to_first_attribute(This);
2829 next = list_next(&This->attrs, &This->attr->entry);
2830 if (next)
2832 This->attr = LIST_ENTRY(next, struct attribute, entry);
2833 reader_set_strvalue(This, StringValue_Prefix, &This->attr->prefix);
2834 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2835 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2838 return next ? S_OK : S_FALSE;
2841 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2842 LPCWSTR local_name,
2843 LPCWSTR namespaceUri)
2845 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2846 return E_NOTIMPL;
2849 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2851 xmlreader *This = impl_from_IXmlReader(iface);
2853 TRACE("(%p)\n", This);
2855 if (!This->attr_count) return S_FALSE;
2856 This->attr = NULL;
2858 /* FIXME: support other node types with 'attributes' like DTD */
2859 if (This->is_empty_element) {
2860 reader_set_strvalue(This, StringValue_LocalName, &This->empty_element.localname);
2861 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
2863 else {
2864 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2865 if (element) {
2866 reader_set_strvalue(This, StringValue_LocalName, &element->localname);
2867 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
2871 return S_OK;
2874 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2876 xmlreader *This = impl_from_IXmlReader(iface);
2878 TRACE("(%p)->(%p %p)\n", This, name, len);
2879 *name = This->strvalues[StringValue_QualifiedName].str;
2880 if (len) *len = This->strvalues[StringValue_QualifiedName].len;
2881 return S_OK;
2884 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
2886 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
2887 struct ns *ns;
2889 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
2890 if (strval_eq(reader, prefix, &ns->prefix))
2891 return ns;
2894 return NULL;
2897 static struct ns *reader_lookup_nsdef(xmlreader *reader)
2899 if (list_empty(&reader->nsdef))
2900 return NULL;
2902 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
2905 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
2907 xmlreader *This = impl_from_IXmlReader(iface);
2908 const strval *prefix = &This->strvalues[StringValue_Prefix];
2909 XmlNodeType nodetype;
2910 struct ns *ns;
2911 UINT length;
2913 TRACE("(%p %p %p)\n", iface, uri, len);
2915 if (!len)
2916 len = &length;
2918 *uri = NULL;
2919 *len = 0;
2921 switch ((nodetype = reader_get_nodetype(This)))
2923 case XmlNodeType_Attribute:
2925 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2926 '2','0','0','0','/','x','m','l','n','s','/',0};
2927 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2928 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
2929 const strval *local = &This->strvalues[StringValue_LocalName];
2931 /* check for reserved prefixes first */
2932 if ((strval_eq(This, prefix, &strval_empty) && strval_eq(This, local, &strval_xmlns)) ||
2933 strval_eq(This, prefix, &strval_xmlns))
2935 *uri = xmlns_uriW;
2936 *len = sizeof(xmlns_uriW)/sizeof(xmlns_uriW[0]) - 1;
2938 else if (strval_eq(This, prefix, &strval_xml)) {
2939 *uri = xml_uriW;
2940 *len = sizeof(xml_uriW)/sizeof(xml_uriW[0]) - 1;
2943 if (!*uri) {
2944 ns = reader_lookup_ns(This, prefix);
2945 if (ns) {
2946 *uri = ns->uri.str;
2947 *len = ns->uri.len;
2949 else {
2950 *uri = emptyW;
2951 *len = 0;
2955 break;
2956 case XmlNodeType_Element:
2957 case XmlNodeType_EndElement:
2959 ns = reader_lookup_ns(This, prefix);
2961 /* pick top default ns if any */
2962 if (!ns)
2963 ns = reader_lookup_nsdef(This);
2965 if (ns) {
2966 *uri = ns->uri.str;
2967 *len = ns->uri.len;
2969 else {
2970 *uri = emptyW;
2971 *len = 0;
2974 break;
2975 default:
2976 FIXME("Unhandled node type %d\n", nodetype);
2977 return E_NOTIMPL;
2980 return S_OK;
2983 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2985 xmlreader *This = impl_from_IXmlReader(iface);
2987 TRACE("(%p)->(%p %p)\n", This, name, len);
2988 *name = This->strvalues[StringValue_LocalName].str;
2989 if (len) *len = This->strvalues[StringValue_LocalName].len;
2990 return S_OK;
2993 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2995 xmlreader *This = impl_from_IXmlReader(iface);
2997 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2998 *prefix = This->strvalues[StringValue_Prefix].str;
2999 if (len) *len = This->strvalues[StringValue_Prefix].len;
3000 return S_OK;
3003 static BOOL is_namespace_definition(xmlreader *reader)
3005 const strval *local = &reader->strvalues[StringValue_LocalName];
3006 const strval *prefix = &reader->strvalues[StringValue_Prefix];
3008 if (reader_get_nodetype(reader) != XmlNodeType_Attribute)
3009 return FALSE;
3011 return ((strval_eq(reader, prefix, &strval_empty) && strval_eq(reader, local, &strval_xmlns)) ||
3012 strval_eq(reader, prefix, &strval_xmlns));
3015 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3017 xmlreader *reader = impl_from_IXmlReader(iface);
3018 strval *val = &reader->strvalues[StringValue_Value];
3020 TRACE("(%p)->(%p %p)\n", reader, value, len);
3022 *value = NULL;
3024 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
3026 XmlNodeType type;
3027 HRESULT hr;
3029 hr = IXmlReader_Read(iface, &type);
3030 if (FAILED(hr)) return hr;
3032 /* return if still pending, partially read values are not reported */
3033 if (is_reader_pending(reader)) return E_PENDING;
3036 if (!val->str)
3038 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3039 if (!ptr) return E_OUTOFMEMORY;
3040 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3041 ptr[val->len] = 0;
3042 val->str = ptr;
3045 /* For namespace definition attributes return values from namespace list */
3046 if (is_namespace_definition(reader)) {
3047 const strval *local = &reader->strvalues[StringValue_LocalName];
3048 struct ns *ns;
3050 ns = reader_lookup_ns(reader, local);
3051 if (!ns)
3052 ns = reader_lookup_nsdef(reader);
3054 val = &ns->uri;
3057 *value = val->str;
3058 if (len) *len = val->len;
3059 return S_OK;
3062 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3064 xmlreader *reader = impl_from_IXmlReader(iface);
3065 strval *val = &reader->strvalues[StringValue_Value];
3066 UINT len;
3068 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3070 /* Value is already allocated, chunked reads are not possible. */
3071 if (val->str) return S_FALSE;
3073 if (val->len)
3075 len = min(chunk_size, val->len);
3076 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
3077 val->start += len;
3078 val->len -= len;
3079 if (read) *read = len;
3082 return S_OK;
3085 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3086 LPCWSTR *baseUri,
3087 UINT *baseUri_length)
3089 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3090 return E_NOTIMPL;
3093 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3095 FIXME("(%p): stub\n", iface);
3096 return FALSE;
3099 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3101 xmlreader *This = impl_from_IXmlReader(iface);
3102 TRACE("(%p)\n", This);
3103 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3104 when current node is start tag of an element */
3105 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3108 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
3110 xmlreader *This = impl_from_IXmlReader(iface);
3112 TRACE("(%p %p)\n", This, lineNumber);
3114 if (!lineNumber) return E_INVALIDARG;
3116 *lineNumber = This->line;
3118 return S_OK;
3121 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
3123 xmlreader *This = impl_from_IXmlReader(iface);
3125 TRACE("(%p %p)\n", This, linePosition);
3127 if (!linePosition) return E_INVALIDARG;
3129 *linePosition = This->pos;
3131 return S_OK;
3134 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3136 xmlreader *This = impl_from_IXmlReader(iface);
3138 TRACE("(%p)->(%p)\n", This, count);
3140 if (!count) return E_INVALIDARG;
3142 *count = This->attr_count;
3143 return S_OK;
3146 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3148 xmlreader *This = impl_from_IXmlReader(iface);
3149 TRACE("(%p)->(%p)\n", This, depth);
3150 *depth = This->depth;
3151 return S_OK;
3154 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3156 FIXME("(%p): stub\n", iface);
3157 return FALSE;
3160 static const struct IXmlReaderVtbl xmlreader_vtbl =
3162 xmlreader_QueryInterface,
3163 xmlreader_AddRef,
3164 xmlreader_Release,
3165 xmlreader_SetInput,
3166 xmlreader_GetProperty,
3167 xmlreader_SetProperty,
3168 xmlreader_Read,
3169 xmlreader_GetNodeType,
3170 xmlreader_MoveToFirstAttribute,
3171 xmlreader_MoveToNextAttribute,
3172 xmlreader_MoveToAttributeByName,
3173 xmlreader_MoveToElement,
3174 xmlreader_GetQualifiedName,
3175 xmlreader_GetNamespaceUri,
3176 xmlreader_GetLocalName,
3177 xmlreader_GetPrefix,
3178 xmlreader_GetValue,
3179 xmlreader_ReadValueChunk,
3180 xmlreader_GetBaseUri,
3181 xmlreader_IsDefault,
3182 xmlreader_IsEmptyElement,
3183 xmlreader_GetLineNumber,
3184 xmlreader_GetLinePosition,
3185 xmlreader_GetAttributeCount,
3186 xmlreader_GetDepth,
3187 xmlreader_IsEOF
3190 /** IXmlReaderInput **/
3191 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3193 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3195 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3197 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3198 IsEqualGUID(riid, &IID_IUnknown))
3200 *ppvObject = iface;
3202 else
3204 WARN("interface %s not implemented\n", debugstr_guid(riid));
3205 *ppvObject = NULL;
3206 return E_NOINTERFACE;
3209 IUnknown_AddRef(iface);
3211 return S_OK;
3214 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3216 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3217 ULONG ref = InterlockedIncrement(&This->ref);
3218 TRACE("(%p)->(%d)\n", This, ref);
3219 return ref;
3222 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3224 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3225 LONG ref = InterlockedDecrement(&This->ref);
3227 TRACE("(%p)->(%d)\n", This, ref);
3229 if (ref == 0)
3231 IMalloc *imalloc = This->imalloc;
3232 if (This->input) IUnknown_Release(This->input);
3233 if (This->stream) ISequentialStream_Release(This->stream);
3234 if (This->buffer) free_input_buffer(This->buffer);
3235 readerinput_free(This, This->baseuri);
3236 readerinput_free(This, This);
3237 if (imalloc) IMalloc_Release(imalloc);
3240 return ref;
3243 static const struct IUnknownVtbl xmlreaderinputvtbl =
3245 xmlreaderinput_QueryInterface,
3246 xmlreaderinput_AddRef,
3247 xmlreaderinput_Release
3250 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3252 xmlreader *reader;
3253 int i;
3255 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3257 if (!IsEqualGUID(riid, &IID_IXmlReader))
3259 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
3260 return E_FAIL;
3263 if (imalloc)
3264 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3265 else
3266 reader = heap_alloc(sizeof(*reader));
3267 if(!reader) return E_OUTOFMEMORY;
3269 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3270 reader->ref = 1;
3271 reader->input = NULL;
3272 reader->state = XmlReadState_Closed;
3273 reader->instate = XmlReadInState_Initial;
3274 reader->resumestate = XmlReadResumeState_Initial;
3275 reader->dtdmode = DtdProcessing_Prohibit;
3276 reader->resolver = NULL;
3277 reader->mlang = NULL;
3278 reader->line = reader->pos = 0;
3279 reader->imalloc = imalloc;
3280 if (imalloc) IMalloc_AddRef(imalloc);
3281 reader->nodetype = XmlNodeType_None;
3282 list_init(&reader->attrs);
3283 reader->attr_count = 0;
3284 reader->attr = NULL;
3285 list_init(&reader->nsdef);
3286 list_init(&reader->ns);
3287 list_init(&reader->elements);
3288 reader->depth = 0;
3289 reader->max_depth = 256;
3290 reader->is_empty_element = FALSE;
3291 memset(reader->resume, 0, sizeof(reader->resume));
3293 for (i = 0; i < StringValue_Last; i++)
3294 reader->strvalues[i] = strval_empty;
3296 *obj = &reader->IXmlReader_iface;
3298 TRACE("returning iface %p\n", *obj);
3300 return S_OK;
3303 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3304 IMalloc *imalloc,
3305 LPCWSTR encoding,
3306 BOOL hint,
3307 LPCWSTR base_uri,
3308 IXmlReaderInput **ppInput)
3310 xmlreaderinput *readerinput;
3311 HRESULT hr;
3313 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3314 hint, wine_dbgstr_w(base_uri), ppInput);
3316 if (!stream || !ppInput) return E_INVALIDARG;
3318 if (imalloc)
3319 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3320 else
3321 readerinput = heap_alloc(sizeof(*readerinput));
3322 if(!readerinput) return E_OUTOFMEMORY;
3324 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3325 readerinput->ref = 1;
3326 readerinput->imalloc = imalloc;
3327 readerinput->stream = NULL;
3328 if (imalloc) IMalloc_AddRef(imalloc);
3329 readerinput->encoding = parse_encoding_name(encoding, -1);
3330 readerinput->hint = hint;
3331 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3332 readerinput->pending = 0;
3334 hr = alloc_input_buffer(readerinput);
3335 if (hr != S_OK)
3337 readerinput_free(readerinput, readerinput->baseuri);
3338 readerinput_free(readerinput, readerinput);
3339 if (imalloc) IMalloc_Release(imalloc);
3340 return hr;
3342 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3344 *ppInput = &readerinput->IXmlReaderInput_iface;
3346 TRACE("returning iface %p\n", *ppInput);
3348 return S_OK;