xmllite: Keep a list of namespace definitions.
[wine.git] / dlls / xmllite / reader.c
blob500db62bf226cf0dbf125253fa09c31e714ff07c
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW[] = {'\"',0};
90 static const WCHAR quoteW[] = {'\'',0};
91 static const WCHAR ltW[] = {'<',0};
92 static const WCHAR gtW[] = {'>',0};
93 static const WCHAR commentW[] = {'<','!','-','-',0};
94 static const WCHAR piW[] = {'<','?',0};
96 static const char *debugstr_nodetype(XmlNodeType nodetype)
98 static const char * const type_names[] =
100 "None",
101 "Element",
102 "Attribute",
103 "Text",
104 "CDATA",
107 "ProcessingInstruction",
108 "Comment",
110 "DocumentType",
113 "Whitespace",
115 "EndElement",
117 "XmlDeclaration"
120 if (nodetype > _XmlNodeType_Last)
121 return wine_dbg_sprintf("unknown type=%d", nodetype);
123 return type_names[nodetype];
126 static const char *debugstr_reader_prop(XmlReaderProperty prop)
128 static const char * const prop_names[] =
130 "MultiLanguage",
131 "ConformanceLevel",
132 "RandomAccess",
133 "XmlResolver",
134 "DtdProcessing",
135 "ReadState",
136 "MaxElementDepth",
137 "MaxEntityExpansion"
140 if (prop > _XmlReaderProperty_Last)
141 return wine_dbg_sprintf("unknown property=%d", prop);
143 return prop_names[prop];
146 struct xml_encoding_data
148 const WCHAR *name;
149 xml_encoding enc;
150 UINT cp;
153 static const struct xml_encoding_data xml_encoding_map[] = {
154 { utf16W, XmlEncoding_UTF16, ~0 },
155 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
158 const WCHAR *get_encoding_name(xml_encoding encoding)
160 return xml_encoding_map[encoding].name;
163 xml_encoding get_encoding_from_codepage(UINT codepage)
165 int i;
166 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
168 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
170 return XmlEncoding_Unknown;
173 typedef struct
175 char *data;
176 UINT cur;
177 unsigned int allocated;
178 unsigned int written;
179 } encoded_buffer;
181 typedef struct input_buffer input_buffer;
183 typedef struct
185 IXmlReaderInput IXmlReaderInput_iface;
186 LONG ref;
187 /* reference passed on IXmlReaderInput creation, is kept when input is created */
188 IUnknown *input;
189 IMalloc *imalloc;
190 xml_encoding encoding;
191 BOOL hint;
192 WCHAR *baseuri;
193 /* stream reference set after SetInput() call from reader,
194 stored as sequential stream, cause currently
195 optimizations possible with IStream aren't implemented */
196 ISequentialStream *stream;
197 input_buffer *buffer;
198 unsigned int pending : 1;
199 } xmlreaderinput;
201 static const struct IUnknownVtbl xmlreaderinputvtbl;
203 /* Structure to hold parsed string of specific length.
205 Reader stores node value as 'start' pointer, on request
206 a null-terminated version of it is allocated.
208 To init a strval variable use reader_init_strval(),
209 to set strval as a reader value use reader_set_strval().
211 typedef struct
213 WCHAR *str; /* allocated null-terminated string */
214 UINT len; /* length in WCHARs, altered after ReadValueChunk */
215 UINT start; /* input position where value starts */
216 } strval;
218 static WCHAR emptyW[] = {0};
219 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
220 static const strval strval_empty = { emptyW };
221 static const strval strval_xmlns = { xmlnsW, 5 };
223 struct attribute
225 struct list entry;
226 strval prefix;
227 strval localname;
228 strval value;
231 struct element
233 struct list entry;
234 strval prefix;
235 strval localname;
236 strval qname;
239 struct ns
241 struct list entry;
242 strval prefix;
243 strval uri;
244 struct element *element;
247 typedef struct
249 IXmlReader IXmlReader_iface;
250 LONG ref;
251 xmlreaderinput *input;
252 IMalloc *imalloc;
253 XmlReadState state;
254 XmlReaderInternalState instate;
255 XmlReaderResumeState resumestate;
256 XmlNodeType nodetype;
257 DtdProcessing dtdmode;
258 IXmlResolver *resolver;
259 IUnknown *mlang;
260 UINT line, pos; /* reader position in XML stream */
261 struct list attrs; /* attributes list for current node */
262 struct attribute *attr; /* current attribute */
263 UINT attr_count;
264 struct list nsdef;
265 struct list ns;
266 struct list elements;
267 strval strvalues[StringValue_Last];
268 UINT depth;
269 UINT max_depth;
270 BOOL is_empty_element;
271 struct element empty_element;
272 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
273 } xmlreader;
275 struct input_buffer
277 encoded_buffer utf16;
278 encoded_buffer encoded;
279 UINT code_page;
280 xmlreaderinput *input;
283 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
285 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
288 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
290 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
293 /* reader memory allocation functions */
294 static inline void *reader_alloc(xmlreader *reader, size_t len)
296 return m_alloc(reader->imalloc, len);
299 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
301 void *ret = reader_alloc(reader, len);
302 if (ret)
303 memset(ret, 0, len);
304 return ret;
307 static inline void reader_free(xmlreader *reader, void *mem)
309 m_free(reader->imalloc, mem);
312 /* Just return pointer from offset, no attempt to read more. */
313 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
315 encoded_buffer *buffer = &reader->input->buffer->utf16;
316 return (WCHAR*)buffer->data + offset;
319 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
321 return v->str ? v->str : reader_get_ptr2(reader, v->start);
324 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
326 *dest = *src;
328 if (src->str != strval_empty.str)
330 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
331 if (!dest->str) return E_OUTOFMEMORY;
332 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
333 dest->str[dest->len] = 0;
334 dest->start = 0;
337 return S_OK;
340 /* reader input memory allocation functions */
341 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
343 return m_alloc(input->imalloc, len);
346 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
348 return m_realloc(input->imalloc, mem, len);
351 static inline void readerinput_free(xmlreaderinput *input, void *mem)
353 m_free(input->imalloc, mem);
356 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
358 LPWSTR ret = NULL;
360 if(str) {
361 DWORD size;
363 size = (strlenW(str)+1)*sizeof(WCHAR);
364 ret = readerinput_alloc(input, size);
365 if (ret) memcpy(ret, str, size);
368 return ret;
371 static void reader_clear_attrs(xmlreader *reader)
373 struct attribute *attr, *attr2;
374 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
376 reader_free(reader, attr);
378 list_init(&reader->attrs);
379 reader->attr_count = 0;
380 reader->attr = NULL;
383 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
384 while we are on a node with attributes */
385 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *value)
387 struct attribute *attr;
389 attr = reader_alloc(reader, sizeof(*attr));
390 if (!attr) return E_OUTOFMEMORY;
392 if (prefix)
393 attr->prefix = *prefix;
394 else
395 memset(&attr->prefix, 0, sizeof(attr->prefix));
396 attr->localname = *localname;
397 attr->value = *value;
398 list_add_tail(&reader->attrs, &attr->entry);
399 reader->attr_count++;
401 return S_OK;
404 /* This one frees stored string value if needed */
405 static void reader_free_strvalued(xmlreader *reader, strval *v)
407 if (v->str != strval_empty.str)
409 reader_free(reader, v->str);
410 *v = strval_empty;
414 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
416 v->start = start;
417 v->len = len;
418 v->str = NULL;
421 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
423 return debugstr_wn(reader_get_strptr(reader, v), v->len);
426 /* used to initialize from constant string */
427 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
429 v->start = 0;
430 v->len = len;
431 v->str = str;
434 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
436 reader_free_strvalued(reader, &reader->strvalues[type]);
439 static void reader_free_strvalues(xmlreader *reader)
441 int type;
442 for (type = 0; type < StringValue_Last; type++)
443 reader_free_strvalue(reader, type);
446 /* This helper should only be used to test if strings are the same,
447 it doesn't try to sort. */
448 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
450 if (str1->len != str2->len) return 0;
451 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
454 static void reader_clear_elements(xmlreader *reader)
456 struct element *elem, *elem2;
457 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
459 reader_free_strvalued(reader, &elem->qname);
460 reader_free(reader, elem);
462 list_init(&reader->elements);
463 reader->is_empty_element = FALSE;
466 static HRESULT reader_inc_depth(xmlreader *reader)
468 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
469 return S_OK;
472 static void reader_dec_depth(xmlreader *reader)
474 if (reader->depth > 1) reader->depth--;
477 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
479 struct ns *ns;
480 HRESULT hr;
482 ns = reader_alloc(reader, sizeof(*ns));
483 if (!ns) return E_OUTOFMEMORY;
485 if (def)
486 memset(&ns->prefix, 0, sizeof(ns->prefix));
487 else {
488 hr = reader_strvaldup(reader, prefix, &ns->prefix);
489 if (FAILED(hr)) {
490 reader_free(reader, ns);
491 return hr;
495 hr = reader_strvaldup(reader, uri, &ns->uri);
496 if (FAILED(hr)) {
497 reader_free_strvalued(reader, &ns->prefix);
498 reader_free(reader, ns);
499 return hr;
502 ns->element = NULL;
503 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
504 return hr;
507 static void reader_free_element(xmlreader *reader, struct element *element)
509 reader_free_strvalued(reader, &element->prefix);
510 reader_free_strvalued(reader, &element->localname);
511 reader_free_strvalued(reader, &element->qname);
512 reader_free(reader, element);
515 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
516 strval *qname)
518 struct element *element;
519 HRESULT hr;
521 if (!list_empty(&reader->elements))
523 hr = reader_inc_depth(reader);
524 if (FAILED(hr))
525 return hr;
528 element = reader_alloc_zero(reader, sizeof(*element));
529 if (!element)
530 goto failed;
532 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) != S_OK ||
533 (hr = reader_strvaldup(reader, localname, &element->localname)) != S_OK ||
534 (hr = reader_strvaldup(reader, qname, &element->qname)) != S_OK)
536 reader_free_element(reader, element);
537 goto failed;
540 list_add_head(&reader->elements, &element->entry);
541 reader->is_empty_element = FALSE;
543 failed:
544 reader_dec_depth(reader);
545 return hr;
548 static void reader_pop_element(xmlreader *reader)
550 struct element *elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
552 if (elem)
554 list_remove(&elem->entry);
555 reader_free_strvalued(reader, &elem->qname);
556 reader_free_strvalued(reader, &elem->localname);
557 reader_free(reader, elem);
558 reader_dec_depth(reader);
562 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
563 means node value is to be determined. */
564 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
566 strval *v = &reader->strvalues[type];
568 reader_free_strvalue(reader, type);
569 if (!value)
571 v->str = NULL;
572 v->start = 0;
573 v->len = 0;
574 return;
577 if (value->str == strval_empty.str)
578 *v = *value;
579 else
581 if (type == StringValue_Value)
583 /* defer allocation for value string */
584 v->str = NULL;
585 v->start = value->start;
586 v->len = value->len;
588 else
590 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
591 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
592 v->str[value->len] = 0;
593 v->len = value->len;
598 static inline int is_reader_pending(xmlreader *reader)
600 return reader->input->pending;
603 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
605 const int initial_len = 0x2000;
606 buffer->data = readerinput_alloc(input, initial_len);
607 if (!buffer->data) return E_OUTOFMEMORY;
609 memset(buffer->data, 0, 4);
610 buffer->cur = 0;
611 buffer->allocated = initial_len;
612 buffer->written = 0;
614 return S_OK;
617 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
619 readerinput_free(input, buffer->data);
622 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
624 if (encoding == XmlEncoding_Unknown)
626 FIXME("unsupported encoding %d\n", encoding);
627 return E_NOTIMPL;
630 *cp = xml_encoding_map[encoding].cp;
632 return S_OK;
635 xml_encoding parse_encoding_name(const WCHAR *name, int len)
637 int min, max, n, c;
639 if (!name) return XmlEncoding_Unknown;
641 min = 0;
642 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
644 while (min <= max)
646 n = (min+max)/2;
648 if (len != -1)
649 c = strncmpiW(xml_encoding_map[n].name, name, len);
650 else
651 c = strcmpiW(xml_encoding_map[n].name, name);
652 if (!c)
653 return xml_encoding_map[n].enc;
655 if (c > 0)
656 max = n-1;
657 else
658 min = n+1;
661 return XmlEncoding_Unknown;
664 static HRESULT alloc_input_buffer(xmlreaderinput *input)
666 input_buffer *buffer;
667 HRESULT hr;
669 input->buffer = NULL;
671 buffer = readerinput_alloc(input, sizeof(*buffer));
672 if (!buffer) return E_OUTOFMEMORY;
674 buffer->input = input;
675 buffer->code_page = ~0; /* code page is unknown at this point */
676 hr = init_encoded_buffer(input, &buffer->utf16);
677 if (hr != S_OK) {
678 readerinput_free(input, buffer);
679 return hr;
682 hr = init_encoded_buffer(input, &buffer->encoded);
683 if (hr != S_OK) {
684 free_encoded_buffer(input, &buffer->utf16);
685 readerinput_free(input, buffer);
686 return hr;
689 input->buffer = buffer;
690 return S_OK;
693 static void free_input_buffer(input_buffer *buffer)
695 free_encoded_buffer(buffer->input, &buffer->encoded);
696 free_encoded_buffer(buffer->input, &buffer->utf16);
697 readerinput_free(buffer->input, buffer);
700 static void readerinput_release_stream(xmlreaderinput *readerinput)
702 if (readerinput->stream) {
703 ISequentialStream_Release(readerinput->stream);
704 readerinput->stream = NULL;
708 /* Queries already stored interface for IStream/ISequentialStream.
709 Interface supplied on creation will be overwritten */
710 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
712 HRESULT hr;
714 readerinput_release_stream(readerinput);
715 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
716 if (hr != S_OK)
717 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
719 return hr;
722 /* reads a chunk to raw buffer */
723 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
725 encoded_buffer *buffer = &readerinput->buffer->encoded;
726 /* to make sure aligned length won't exceed allocated length */
727 ULONG len = buffer->allocated - buffer->written - 4;
728 ULONG read;
729 HRESULT hr;
731 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
732 variable width encodings like UTF-8 */
733 len = (len + 3) & ~3;
734 /* try to use allocated space or grow */
735 if (buffer->allocated - buffer->written < len)
737 buffer->allocated *= 2;
738 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
739 len = buffer->allocated - buffer->written;
742 read = 0;
743 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
744 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
745 readerinput->pending = hr == E_PENDING;
746 if (FAILED(hr)) return hr;
747 buffer->written += read;
749 return hr;
752 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
753 static void readerinput_grow(xmlreaderinput *readerinput, int length)
755 encoded_buffer *buffer = &readerinput->buffer->utf16;
757 length *= sizeof(WCHAR);
758 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
759 if (buffer->allocated < buffer->written + length + 4)
761 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
762 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
763 buffer->allocated = grown_size;
767 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
769 static const char startA[] = {'<','?'};
770 static const char commentA[] = {'<','!'};
771 encoded_buffer *buffer = &readerinput->buffer->encoded;
772 unsigned char *ptr = (unsigned char*)buffer->data;
774 return !memcmp(buffer->data, startA, sizeof(startA)) ||
775 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
776 /* test start byte */
777 (ptr[0] == '<' &&
779 (ptr[1] && (ptr[1] <= 0x7f)) ||
780 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
781 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
782 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
786 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
788 encoded_buffer *buffer = &readerinput->buffer->encoded;
789 static const WCHAR startW[] = {'<','?'};
790 static const WCHAR commentW[] = {'<','!'};
791 static const char utf8bom[] = {0xef,0xbb,0xbf};
792 static const char utf16lebom[] = {0xff,0xfe};
794 *enc = XmlEncoding_Unknown;
796 if (buffer->written <= 3)
798 HRESULT hr = readerinput_growraw(readerinput);
799 if (FAILED(hr)) return hr;
800 if (buffer->written <= 3) return MX_E_INPUTEND;
803 /* try start symbols if we have enough data to do that, input buffer should contain
804 first chunk already */
805 if (readerinput_is_utf8(readerinput))
806 *enc = XmlEncoding_UTF8;
807 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
808 !memcmp(buffer->data, commentW, sizeof(commentW)))
809 *enc = XmlEncoding_UTF16;
810 /* try with BOM now */
811 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
813 buffer->cur += sizeof(utf8bom);
814 *enc = XmlEncoding_UTF8;
816 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
818 buffer->cur += sizeof(utf16lebom);
819 *enc = XmlEncoding_UTF16;
822 return S_OK;
825 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
827 encoded_buffer *buffer = &readerinput->buffer->encoded;
828 int len = buffer->written;
830 /* complete single byte char */
831 if (!(buffer->data[len-1] & 0x80)) return len;
833 /* find start byte of multibyte char */
834 while (--len && !(buffer->data[len] & 0xc0))
837 return len;
840 /* Returns byte length of complete char sequence for buffer code page,
841 it's relative to current buffer position which is currently used for BOM handling
842 only. */
843 static int readerinput_get_convlen(xmlreaderinput *readerinput)
845 encoded_buffer *buffer = &readerinput->buffer->encoded;
846 int len;
848 if (readerinput->buffer->code_page == CP_UTF8)
849 len = readerinput_get_utf8_convlen(readerinput);
850 else
851 len = buffer->written;
853 TRACE("%d\n", len - buffer->cur);
854 return len - buffer->cur;
857 /* It's possible that raw buffer has some leftovers from last conversion - some char
858 sequence that doesn't represent a full code point. Length argument should be calculated with
859 readerinput_get_convlen(), if it's -1 it will be calculated here. */
860 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
862 encoded_buffer *buffer = &readerinput->buffer->encoded;
864 if (len == -1)
865 len = readerinput_get_convlen(readerinput);
867 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
868 /* everything below cur is lost too */
869 buffer->written -= len + buffer->cur;
870 /* after this point we don't need cur offset really,
871 it's used only to mark where actual data begins when first chunk is read */
872 buffer->cur = 0;
875 /* note that raw buffer content is kept */
876 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
878 encoded_buffer *src = &readerinput->buffer->encoded;
879 encoded_buffer *dest = &readerinput->buffer->utf16;
880 int len, dest_len;
881 HRESULT hr;
882 WCHAR *ptr;
883 UINT cp;
885 hr = get_code_page(enc, &cp);
886 if (FAILED(hr)) return;
888 readerinput->buffer->code_page = cp;
889 len = readerinput_get_convlen(readerinput);
891 TRACE("switching to cp %d\n", cp);
893 /* just copy in this case */
894 if (enc == XmlEncoding_UTF16)
896 readerinput_grow(readerinput, len);
897 memcpy(dest->data, src->data + src->cur, len);
898 dest->written += len*sizeof(WCHAR);
899 return;
902 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
903 readerinput_grow(readerinput, dest_len);
904 ptr = (WCHAR*)dest->data;
905 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
906 ptr[dest_len] = 0;
907 dest->written += dest_len*sizeof(WCHAR);
910 /* shrinks parsed data a buffer begins with */
911 static void reader_shrink(xmlreader *reader)
913 encoded_buffer *buffer = &reader->input->buffer->utf16;
915 /* avoid to move too often using threshold shrink length */
916 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
918 buffer->written -= buffer->cur*sizeof(WCHAR);
919 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
920 buffer->cur = 0;
921 *(WCHAR*)&buffer->data[buffer->written] = 0;
925 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
926 It won't attempt to shrink but will grow destination buffer if needed */
927 static HRESULT reader_more(xmlreader *reader)
929 xmlreaderinput *readerinput = reader->input;
930 encoded_buffer *src = &readerinput->buffer->encoded;
931 encoded_buffer *dest = &readerinput->buffer->utf16;
932 UINT cp = readerinput->buffer->code_page;
933 int len, dest_len;
934 HRESULT hr;
935 WCHAR *ptr;
937 /* get some raw data from stream first */
938 hr = readerinput_growraw(readerinput);
939 len = readerinput_get_convlen(readerinput);
941 /* just copy for UTF-16 case */
942 if (cp == ~0)
944 readerinput_grow(readerinput, len);
945 memcpy(dest->data + dest->written, src->data + src->cur, len);
946 dest->written += len*sizeof(WCHAR);
947 return hr;
950 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
951 readerinput_grow(readerinput, dest_len);
952 ptr = (WCHAR*)(dest->data + dest->written);
953 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
954 ptr[dest_len] = 0;
955 dest->written += dest_len*sizeof(WCHAR);
956 /* get rid of processed data */
957 readerinput_shrinkraw(readerinput, len);
959 return hr;
962 static inline UINT reader_get_cur(xmlreader *reader)
964 return reader->input->buffer->utf16.cur;
967 static inline WCHAR *reader_get_ptr(xmlreader *reader)
969 encoded_buffer *buffer = &reader->input->buffer->utf16;
970 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
971 if (!*ptr) reader_more(reader);
972 return (WCHAR*)buffer->data + buffer->cur;
975 static int reader_cmp(xmlreader *reader, const WCHAR *str)
977 int i=0;
978 const WCHAR *ptr = reader_get_ptr(reader);
979 while (str[i])
981 if (!ptr[i])
983 reader_more(reader);
984 ptr = reader_get_ptr(reader);
986 if (str[i] != ptr[i])
987 return ptr[i] - str[i];
988 i++;
990 return 0;
993 /* moves cursor n WCHARs forward */
994 static void reader_skipn(xmlreader *reader, int n)
996 encoded_buffer *buffer = &reader->input->buffer->utf16;
997 const WCHAR *ptr = reader_get_ptr(reader);
999 while (*ptr++ && n--)
1001 buffer->cur++;
1002 reader->pos++;
1006 static inline BOOL is_wchar_space(WCHAR ch)
1008 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1011 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1012 static int reader_skipspaces(xmlreader *reader)
1014 encoded_buffer *buffer = &reader->input->buffer->utf16;
1015 const WCHAR *ptr = reader_get_ptr(reader);
1016 UINT start = reader_get_cur(reader);
1018 while (is_wchar_space(*ptr))
1020 if (*ptr == '\r')
1021 reader->pos = 0;
1022 else if (*ptr == '\n')
1024 reader->line++;
1025 reader->pos = 0;
1027 else
1028 reader->pos++;
1030 buffer->cur++;
1031 ptr = reader_get_ptr(reader);
1034 return reader_get_cur(reader) - start;
1037 /* [26] VersionNum ::= '1.' [0-9]+ */
1038 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1040 static const WCHAR onedotW[] = {'1','.',0};
1041 WCHAR *ptr, *ptr2;
1042 UINT start;
1044 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1046 start = reader_get_cur(reader);
1047 /* skip "1." */
1048 reader_skipn(reader, 2);
1050 ptr2 = ptr = reader_get_ptr(reader);
1051 while (*ptr >= '0' && *ptr <= '9')
1053 reader_skipn(reader, 1);
1054 ptr = reader_get_ptr(reader);
1057 if (ptr2 == ptr) return WC_E_DIGIT;
1058 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1059 TRACE("version=%s\n", debug_strval(reader, val));
1060 return S_OK;
1063 /* [25] Eq ::= S? '=' S? */
1064 static HRESULT reader_parse_eq(xmlreader *reader)
1066 static const WCHAR eqW[] = {'=',0};
1067 reader_skipspaces(reader);
1068 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1069 /* skip '=' */
1070 reader_skipn(reader, 1);
1071 reader_skipspaces(reader);
1072 return S_OK;
1075 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1076 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1078 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1079 strval val, name;
1080 HRESULT hr;
1082 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1084 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1085 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1086 /* skip 'version' */
1087 reader_skipn(reader, 7);
1089 hr = reader_parse_eq(reader);
1090 if (FAILED(hr)) return hr;
1092 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1093 return WC_E_QUOTE;
1094 /* skip "'"|'"' */
1095 reader_skipn(reader, 1);
1097 hr = reader_parse_versionnum(reader, &val);
1098 if (FAILED(hr)) return hr;
1100 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1101 return WC_E_QUOTE;
1103 /* skip "'"|'"' */
1104 reader_skipn(reader, 1);
1106 return reader_add_attr(reader, NULL, &name, &val);
1109 /* ([A-Za-z0-9._] | '-') */
1110 static inline BOOL is_wchar_encname(WCHAR ch)
1112 return ((ch >= 'A' && ch <= 'Z') ||
1113 (ch >= 'a' && ch <= 'z') ||
1114 (ch >= '0' && ch <= '9') ||
1115 (ch == '.') || (ch == '_') ||
1116 (ch == '-'));
1119 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1120 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1122 WCHAR *start = reader_get_ptr(reader), *ptr;
1123 xml_encoding enc;
1124 int len;
1126 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1127 return WC_E_ENCNAME;
1129 val->start = reader_get_cur(reader);
1131 ptr = start;
1132 while (is_wchar_encname(*++ptr))
1135 len = ptr - start;
1136 enc = parse_encoding_name(start, len);
1137 TRACE("encoding name %s\n", debugstr_wn(start, len));
1138 val->str = start;
1139 val->len = len;
1141 if (enc == XmlEncoding_Unknown)
1142 return WC_E_ENCNAME;
1144 /* skip encoding name */
1145 reader_skipn(reader, len);
1146 return S_OK;
1149 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1150 static HRESULT reader_parse_encdecl(xmlreader *reader)
1152 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1153 strval name, val;
1154 HRESULT hr;
1156 if (!reader_skipspaces(reader)) return S_FALSE;
1158 if (reader_cmp(reader, encodingW)) return S_FALSE;
1159 name.str = reader_get_ptr(reader);
1160 name.start = reader_get_cur(reader);
1161 name.len = 8;
1162 /* skip 'encoding' */
1163 reader_skipn(reader, 8);
1165 hr = reader_parse_eq(reader);
1166 if (FAILED(hr)) return hr;
1168 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1169 return WC_E_QUOTE;
1170 /* skip "'"|'"' */
1171 reader_skipn(reader, 1);
1173 hr = reader_parse_encname(reader, &val);
1174 if (FAILED(hr)) return hr;
1176 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1177 return WC_E_QUOTE;
1179 /* skip "'"|'"' */
1180 reader_skipn(reader, 1);
1182 return reader_add_attr(reader, NULL, &name, &val);
1185 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1186 static HRESULT reader_parse_sddecl(xmlreader *reader)
1188 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1189 static const WCHAR yesW[] = {'y','e','s',0};
1190 static const WCHAR noW[] = {'n','o',0};
1191 strval name, val;
1192 UINT start;
1193 HRESULT hr;
1195 if (!reader_skipspaces(reader)) return S_FALSE;
1197 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1198 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1199 /* skip 'standalone' */
1200 reader_skipn(reader, 10);
1202 hr = reader_parse_eq(reader);
1203 if (FAILED(hr)) return hr;
1205 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1206 return WC_E_QUOTE;
1207 /* skip "'"|'"' */
1208 reader_skipn(reader, 1);
1210 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1211 return WC_E_XMLDECL;
1213 start = reader_get_cur(reader);
1214 /* skip 'yes'|'no' */
1215 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1216 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1217 TRACE("standalone=%s\n", debug_strval(reader, &val));
1219 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1220 return WC_E_QUOTE;
1221 /* skip "'"|'"' */
1222 reader_skipn(reader, 1);
1224 return reader_add_attr(reader, NULL, &name, &val);
1227 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1228 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1230 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1231 static const WCHAR declcloseW[] = {'?','>',0};
1232 HRESULT hr;
1234 /* check if we have "<?xml " */
1235 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1237 reader_skipn(reader, 5);
1238 hr = reader_parse_versioninfo(reader);
1239 if (FAILED(hr))
1240 return hr;
1242 hr = reader_parse_encdecl(reader);
1243 if (FAILED(hr))
1244 return hr;
1246 hr = reader_parse_sddecl(reader);
1247 if (FAILED(hr))
1248 return hr;
1250 reader_skipspaces(reader);
1251 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1252 reader_skipn(reader, 2);
1254 reader_inc_depth(reader);
1255 reader->nodetype = XmlNodeType_XmlDeclaration;
1256 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1257 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1258 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1260 return S_OK;
1263 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1264 static HRESULT reader_parse_comment(xmlreader *reader)
1266 WCHAR *ptr;
1267 UINT start;
1269 if (reader->resumestate == XmlReadResumeState_Comment)
1271 start = reader->resume[XmlReadResume_Body];
1272 ptr = reader_get_ptr(reader);
1274 else
1276 /* skip '<!--' */
1277 reader_skipn(reader, 4);
1278 reader_shrink(reader);
1279 ptr = reader_get_ptr(reader);
1280 start = reader_get_cur(reader);
1281 reader->nodetype = XmlNodeType_Comment;
1282 reader->resume[XmlReadResume_Body] = start;
1283 reader->resumestate = XmlReadResumeState_Comment;
1284 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1285 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1286 reader_set_strvalue(reader, StringValue_Value, NULL);
1289 /* will exit when there's no more data, it won't attempt to
1290 read more from stream */
1291 while (*ptr)
1293 if (ptr[0] == '-')
1295 if (ptr[1] == '-')
1297 if (ptr[2] == '>')
1299 strval value;
1301 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1302 TRACE("%s\n", debug_strval(reader, &value));
1304 /* skip rest of markup '->' */
1305 reader_skipn(reader, 3);
1307 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1308 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1309 reader_set_strvalue(reader, StringValue_Value, &value);
1310 reader->resume[XmlReadResume_Body] = 0;
1311 reader->resumestate = XmlReadResumeState_Initial;
1312 return S_OK;
1314 else
1315 return WC_E_COMMENT;
1319 reader_skipn(reader, 1);
1320 ptr++;
1323 return S_OK;
1326 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1327 static inline BOOL is_char(WCHAR ch)
1329 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1330 (ch >= 0x20 && ch <= 0xd7ff) ||
1331 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1332 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1333 (ch >= 0xe000 && ch <= 0xfffd);
1336 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1337 static inline BOOL is_pubchar(WCHAR ch)
1339 return (ch == ' ') ||
1340 (ch >= 'a' && ch <= 'z') ||
1341 (ch >= 'A' && ch <= 'Z') ||
1342 (ch >= '0' && ch <= '9') ||
1343 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1344 (ch == '=') || (ch == '?') ||
1345 (ch == '@') || (ch == '!') ||
1346 (ch >= '#' && ch <= '%') || /* #$% */
1347 (ch == '_') || (ch == '\r') || (ch == '\n');
1350 static inline BOOL is_namestartchar(WCHAR ch)
1352 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1353 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1354 (ch >= 0xc0 && ch <= 0xd6) ||
1355 (ch >= 0xd8 && ch <= 0xf6) ||
1356 (ch >= 0xf8 && ch <= 0x2ff) ||
1357 (ch >= 0x370 && ch <= 0x37d) ||
1358 (ch >= 0x37f && ch <= 0x1fff) ||
1359 (ch >= 0x200c && ch <= 0x200d) ||
1360 (ch >= 0x2070 && ch <= 0x218f) ||
1361 (ch >= 0x2c00 && ch <= 0x2fef) ||
1362 (ch >= 0x3001 && ch <= 0xd7ff) ||
1363 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1364 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1365 (ch >= 0xf900 && ch <= 0xfdcf) ||
1366 (ch >= 0xfdf0 && ch <= 0xfffd);
1369 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1370 static inline BOOL is_ncnamechar(WCHAR ch)
1372 return (ch >= 'A' && ch <= 'Z') ||
1373 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1374 (ch == '-') || (ch == '.') ||
1375 (ch >= '0' && ch <= '9') ||
1376 (ch == 0xb7) ||
1377 (ch >= 0xc0 && ch <= 0xd6) ||
1378 (ch >= 0xd8 && ch <= 0xf6) ||
1379 (ch >= 0xf8 && ch <= 0x2ff) ||
1380 (ch >= 0x300 && ch <= 0x36f) ||
1381 (ch >= 0x370 && ch <= 0x37d) ||
1382 (ch >= 0x37f && ch <= 0x1fff) ||
1383 (ch >= 0x200c && ch <= 0x200d) ||
1384 (ch >= 0x203f && ch <= 0x2040) ||
1385 (ch >= 0x2070 && ch <= 0x218f) ||
1386 (ch >= 0x2c00 && ch <= 0x2fef) ||
1387 (ch >= 0x3001 && ch <= 0xd7ff) ||
1388 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1389 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1390 (ch >= 0xf900 && ch <= 0xfdcf) ||
1391 (ch >= 0xfdf0 && ch <= 0xfffd);
1394 static inline BOOL is_namechar(WCHAR ch)
1396 return (ch == ':') || is_ncnamechar(ch);
1399 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1401 /* When we're on attribute always return attribute type, container node type is kept.
1402 Note that container is not necessarily an element, and attribute doesn't mean it's
1403 an attribute in XML spec terms. */
1404 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1407 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1408 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1409 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1410 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1411 [5] Name ::= NameStartChar (NameChar)* */
1412 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1414 WCHAR *ptr;
1415 UINT start;
1417 if (reader->resume[XmlReadResume_Name])
1419 start = reader->resume[XmlReadResume_Name];
1420 ptr = reader_get_ptr(reader);
1422 else
1424 ptr = reader_get_ptr(reader);
1425 start = reader_get_cur(reader);
1426 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1429 while (is_namechar(*ptr))
1431 reader_skipn(reader, 1);
1432 ptr = reader_get_ptr(reader);
1435 if (is_reader_pending(reader))
1437 reader->resume[XmlReadResume_Name] = start;
1438 return E_PENDING;
1440 else
1441 reader->resume[XmlReadResume_Name] = 0;
1443 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1444 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1446 return S_OK;
1449 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1450 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1452 static const WCHAR xmlW[] = {'x','m','l'};
1453 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1454 strval name;
1455 WCHAR *ptr;
1456 HRESULT hr;
1457 UINT i;
1459 hr = reader_parse_name(reader, &name);
1460 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1462 /* now that we got name check for illegal content */
1463 if (strval_eq(reader, &name, &xmlval))
1464 return WC_E_LEADINGXML;
1466 /* PITarget can't be a qualified name */
1467 ptr = reader_get_strptr(reader, &name);
1468 for (i = 0; i < name.len; i++)
1469 if (ptr[i] == ':')
1470 return i ? NC_E_NAMECOLON : WC_E_PI;
1472 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1473 *target = name;
1474 return S_OK;
1477 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1478 static HRESULT reader_parse_pi(xmlreader *reader)
1480 strval target;
1481 WCHAR *ptr;
1482 UINT start;
1483 HRESULT hr;
1485 switch (reader->resumestate)
1487 case XmlReadResumeState_Initial:
1488 /* skip '<?' */
1489 reader_skipn(reader, 2);
1490 reader_shrink(reader);
1491 reader->resumestate = XmlReadResumeState_PITarget;
1492 case XmlReadResumeState_PITarget:
1493 hr = reader_parse_pitarget(reader, &target);
1494 if (FAILED(hr)) return hr;
1495 reader_set_strvalue(reader, StringValue_LocalName, &target);
1496 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1497 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1498 reader->resumestate = XmlReadResumeState_PIBody;
1499 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1500 default:
1504 start = reader->resume[XmlReadResume_Body];
1505 ptr = reader_get_ptr(reader);
1506 while (*ptr)
1508 if (ptr[0] == '?')
1510 if (ptr[1] == '>')
1512 UINT cur = reader_get_cur(reader);
1513 strval value;
1515 /* strip all leading whitespace chars */
1516 while (start < cur)
1518 ptr = reader_get_ptr2(reader, start);
1519 if (!is_wchar_space(*ptr)) break;
1520 start++;
1523 reader_init_strvalue(start, cur-start, &value);
1525 /* skip '?>' */
1526 reader_skipn(reader, 2);
1527 TRACE("%s\n", debug_strval(reader, &value));
1528 reader->nodetype = XmlNodeType_ProcessingInstruction;
1529 reader->resumestate = XmlReadResumeState_Initial;
1530 reader->resume[XmlReadResume_Body] = 0;
1531 reader_set_strvalue(reader, StringValue_Value, &value);
1532 return S_OK;
1536 reader_skipn(reader, 1);
1537 ptr = reader_get_ptr(reader);
1540 return S_OK;
1543 /* This one is used to parse significant whitespace nodes, like in Misc production */
1544 static HRESULT reader_parse_whitespace(xmlreader *reader)
1546 switch (reader->resumestate)
1548 case XmlReadResumeState_Initial:
1549 reader_shrink(reader);
1550 reader->resumestate = XmlReadResumeState_Whitespace;
1551 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1552 reader->nodetype = XmlNodeType_Whitespace;
1553 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1554 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1555 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1556 /* fallthrough */
1557 case XmlReadResumeState_Whitespace:
1559 strval value;
1560 UINT start;
1562 reader_skipspaces(reader);
1563 if (is_reader_pending(reader)) return S_OK;
1565 start = reader->resume[XmlReadResume_Body];
1566 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1567 reader_set_strvalue(reader, StringValue_Value, &value);
1568 TRACE("%s\n", debug_strval(reader, &value));
1569 reader->resumestate = XmlReadResumeState_Initial;
1571 default:
1575 return S_OK;
1578 /* [27] Misc ::= Comment | PI | S */
1579 static HRESULT reader_parse_misc(xmlreader *reader)
1581 HRESULT hr = S_FALSE;
1583 if (reader->resumestate != XmlReadResumeState_Initial)
1585 hr = reader_more(reader);
1586 if (FAILED(hr)) return hr;
1588 /* finish current node */
1589 switch (reader->resumestate)
1591 case XmlReadResumeState_PITarget:
1592 case XmlReadResumeState_PIBody:
1593 return reader_parse_pi(reader);
1594 case XmlReadResumeState_Comment:
1595 return reader_parse_comment(reader);
1596 case XmlReadResumeState_Whitespace:
1597 return reader_parse_whitespace(reader);
1598 default:
1599 ERR("unknown resume state %d\n", reader->resumestate);
1603 while (1)
1605 const WCHAR *cur = reader_get_ptr(reader);
1607 if (is_wchar_space(*cur))
1608 hr = reader_parse_whitespace(reader);
1609 else if (!reader_cmp(reader, commentW))
1610 hr = reader_parse_comment(reader);
1611 else if (!reader_cmp(reader, piW))
1612 hr = reader_parse_pi(reader);
1613 else
1614 break;
1616 if (hr != S_FALSE) return hr;
1619 return hr;
1622 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1623 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1625 WCHAR *cur = reader_get_ptr(reader), quote;
1626 UINT start;
1628 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1630 quote = *cur;
1631 reader_skipn(reader, 1);
1633 cur = reader_get_ptr(reader);
1634 start = reader_get_cur(reader);
1635 while (is_char(*cur) && *cur != quote)
1637 reader_skipn(reader, 1);
1638 cur = reader_get_ptr(reader);
1640 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1641 if (*cur == quote) reader_skipn(reader, 1);
1643 TRACE("%s\n", debug_strval(reader, literal));
1644 return S_OK;
1647 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1648 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1649 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1651 WCHAR *cur = reader_get_ptr(reader), quote;
1652 UINT start;
1654 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1656 quote = *cur;
1657 reader_skipn(reader, 1);
1659 start = reader_get_cur(reader);
1660 cur = reader_get_ptr(reader);
1661 while (is_pubchar(*cur) && *cur != quote)
1663 reader_skipn(reader, 1);
1664 cur = reader_get_ptr(reader);
1666 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1667 if (*cur == quote) reader_skipn(reader, 1);
1669 TRACE("%s\n", debug_strval(reader, literal));
1670 return S_OK;
1673 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1674 static HRESULT reader_parse_externalid(xmlreader *reader)
1676 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1677 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1678 strval name, sys;
1679 HRESULT hr;
1680 int cnt;
1682 if (!reader_cmp(reader, publicW)) {
1683 strval pub;
1685 /* public id */
1686 reader_skipn(reader, 6);
1687 cnt = reader_skipspaces(reader);
1688 if (!cnt) return WC_E_WHITESPACE;
1690 hr = reader_parse_pub_literal(reader, &pub);
1691 if (FAILED(hr)) return hr;
1693 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1694 hr = reader_add_attr(reader, NULL, &name, &pub);
1695 if (FAILED(hr)) return hr;
1697 cnt = reader_skipspaces(reader);
1698 if (!cnt) return S_OK;
1700 /* optional system id */
1701 hr = reader_parse_sys_literal(reader, &sys);
1702 if (FAILED(hr)) return S_OK;
1704 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1705 hr = reader_add_attr(reader, NULL, &name, &sys);
1706 if (FAILED(hr)) return hr;
1708 return S_OK;
1709 } else if (!reader_cmp(reader, systemW)) {
1710 /* system id */
1711 reader_skipn(reader, 6);
1712 cnt = reader_skipspaces(reader);
1713 if (!cnt) return WC_E_WHITESPACE;
1715 hr = reader_parse_sys_literal(reader, &sys);
1716 if (FAILED(hr)) return hr;
1718 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1719 return reader_add_attr(reader, NULL, &name, &sys);
1722 return S_FALSE;
1725 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1726 static HRESULT reader_parse_dtd(xmlreader *reader)
1728 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1729 strval name;
1730 WCHAR *cur;
1731 HRESULT hr;
1733 /* check if we have "<!DOCTYPE" */
1734 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1735 reader_shrink(reader);
1737 /* DTD processing is not allowed by default */
1738 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1740 reader_skipn(reader, 9);
1741 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1743 /* name */
1744 hr = reader_parse_name(reader, &name);
1745 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1747 reader_skipspaces(reader);
1749 hr = reader_parse_externalid(reader);
1750 if (FAILED(hr)) return hr;
1752 reader_skipspaces(reader);
1754 cur = reader_get_ptr(reader);
1755 if (*cur != '>')
1757 FIXME("internal subset parsing not implemented\n");
1758 return E_NOTIMPL;
1761 /* skip '>' */
1762 reader_skipn(reader, 1);
1764 reader->nodetype = XmlNodeType_DocumentType;
1765 reader_set_strvalue(reader, StringValue_LocalName, &name);
1766 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1768 return S_OK;
1771 /* [11 NS] LocalPart ::= NCName */
1772 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1774 WCHAR *ptr;
1775 UINT start;
1777 if (reader->resume[XmlReadResume_Local])
1779 start = reader->resume[XmlReadResume_Local];
1780 ptr = reader_get_ptr(reader);
1782 else
1784 ptr = reader_get_ptr(reader);
1785 start = reader_get_cur(reader);
1788 while (is_ncnamechar(*ptr))
1790 reader_skipn(reader, 1);
1791 ptr = reader_get_ptr(reader);
1794 if (is_reader_pending(reader))
1796 reader->resume[XmlReadResume_Local] = start;
1797 return E_PENDING;
1799 else
1800 reader->resume[XmlReadResume_Local] = 0;
1802 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1804 return S_OK;
1807 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1808 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1809 [9 NS] UnprefixedName ::= LocalPart
1810 [10 NS] Prefix ::= NCName */
1811 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1813 WCHAR *ptr;
1814 UINT start;
1815 HRESULT hr;
1817 if (reader->resume[XmlReadResume_Name])
1819 start = reader->resume[XmlReadResume_Name];
1820 ptr = reader_get_ptr(reader);
1822 else
1824 ptr = reader_get_ptr(reader);
1825 start = reader_get_cur(reader);
1826 reader->resume[XmlReadResume_Name] = start;
1827 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1830 if (reader->resume[XmlReadResume_Local])
1832 hr = reader_parse_local(reader, local);
1833 if (FAILED(hr)) return hr;
1835 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1836 local->start - reader->resume[XmlReadResume_Name] - 1,
1837 prefix);
1839 else
1841 /* skip prefix part */
1842 while (is_ncnamechar(*ptr))
1844 reader_skipn(reader, 1);
1845 ptr = reader_get_ptr(reader);
1848 if (is_reader_pending(reader)) return E_PENDING;
1850 /* got a qualified name */
1851 if (*ptr == ':')
1853 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1855 /* skip ':' */
1856 reader_skipn(reader, 1);
1857 hr = reader_parse_local(reader, local);
1858 if (FAILED(hr)) return hr;
1860 else
1862 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1863 reader_init_strvalue(0, 0, prefix);
1867 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1869 if (prefix->len)
1870 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1871 else
1872 TRACE("ncname %s\n", debug_strval(reader, local));
1874 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1875 /* count ':' too */
1876 (prefix->len ? prefix->len + 1 : 0) + local->len,
1877 qname);
1879 reader->resume[XmlReadResume_Name] = 0;
1880 reader->resume[XmlReadResume_Local] = 0;
1882 return S_OK;
1885 /* Applies normalization rules to a single char, used for attribute values.
1887 Rules include 2 steps:
1889 1) replacing \r\n with a single \n;
1890 2) replacing all whitespace chars with ' '.
1893 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1895 encoded_buffer *buffer = &reader->input->buffer->utf16;
1897 if (!is_wchar_space(*ptr)) return;
1899 if (*ptr == '\r' && *(ptr+1) == '\n')
1901 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1902 memmove(ptr+1, ptr+2, len);
1904 *ptr = ' ';
1907 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1909 static const WCHAR entltW[] = {'l','t'};
1910 static const WCHAR entgtW[] = {'g','t'};
1911 static const WCHAR entampW[] = {'a','m','p'};
1912 static const WCHAR entaposW[] = {'a','p','o','s'};
1913 static const WCHAR entquotW[] = {'q','u','o','t'};
1914 static const strval lt = { (WCHAR*)entltW, 2 };
1915 static const strval gt = { (WCHAR*)entgtW, 2 };
1916 static const strval amp = { (WCHAR*)entampW, 3 };
1917 static const strval apos = { (WCHAR*)entaposW, 4 };
1918 static const strval quot = { (WCHAR*)entquotW, 4 };
1919 WCHAR *str = reader_get_strptr(reader, name);
1921 switch (*str)
1923 case 'l':
1924 if (strval_eq(reader, name, &lt)) return '<';
1925 break;
1926 case 'g':
1927 if (strval_eq(reader, name, &gt)) return '>';
1928 break;
1929 case 'a':
1930 if (strval_eq(reader, name, &amp))
1931 return '&';
1932 else if (strval_eq(reader, name, &apos))
1933 return '\'';
1934 break;
1935 case 'q':
1936 if (strval_eq(reader, name, &quot)) return '\"';
1937 break;
1938 default:
1942 return 0;
1945 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1946 [67] Reference ::= EntityRef | CharRef
1947 [68] EntityRef ::= '&' Name ';' */
1948 static HRESULT reader_parse_reference(xmlreader *reader)
1950 encoded_buffer *buffer = &reader->input->buffer->utf16;
1951 WCHAR *start = reader_get_ptr(reader), *ptr;
1952 UINT cur = reader_get_cur(reader);
1953 WCHAR ch = 0;
1954 int len;
1956 /* skip '&' */
1957 reader_skipn(reader, 1);
1958 ptr = reader_get_ptr(reader);
1960 if (*ptr == '#')
1962 reader_skipn(reader, 1);
1963 ptr = reader_get_ptr(reader);
1965 /* hex char or decimal */
1966 if (*ptr == 'x')
1968 reader_skipn(reader, 1);
1969 ptr = reader_get_ptr(reader);
1971 while (*ptr != ';')
1973 if ((*ptr >= '0' && *ptr <= '9'))
1974 ch = ch*16 + *ptr - '0';
1975 else if ((*ptr >= 'a' && *ptr <= 'f'))
1976 ch = ch*16 + *ptr - 'a' + 10;
1977 else if ((*ptr >= 'A' && *ptr <= 'F'))
1978 ch = ch*16 + *ptr - 'A' + 10;
1979 else
1980 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
1981 reader_skipn(reader, 1);
1982 ptr = reader_get_ptr(reader);
1985 else
1987 while (*ptr != ';')
1989 if ((*ptr >= '0' && *ptr <= '9'))
1991 ch = ch*10 + *ptr - '0';
1992 reader_skipn(reader, 1);
1993 ptr = reader_get_ptr(reader);
1995 else
1996 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2000 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2002 /* normalize */
2003 if (is_wchar_space(ch)) ch = ' ';
2005 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2006 memmove(start+1, ptr+1, len);
2007 buffer->cur = cur + 1;
2009 *start = ch;
2011 else
2013 strval name;
2014 HRESULT hr;
2016 hr = reader_parse_name(reader, &name);
2017 if (FAILED(hr)) return hr;
2019 ptr = reader_get_ptr(reader);
2020 if (*ptr != ';') return WC_E_SEMICOLON;
2022 /* predefined entities resolve to a single character */
2023 ch = get_predefined_entity(reader, &name);
2024 if (ch)
2026 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2027 memmove(start+1, ptr+1, len);
2028 buffer->cur = cur + 1;
2030 *start = ch;
2032 else
2034 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2035 return WC_E_UNDECLAREDENTITY;
2040 return S_OK;
2043 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2044 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2046 WCHAR *ptr, quote;
2047 UINT start;
2049 ptr = reader_get_ptr(reader);
2051 /* skip opening quote */
2052 quote = *ptr;
2053 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2054 reader_skipn(reader, 1);
2056 ptr = reader_get_ptr(reader);
2057 start = reader_get_cur(reader);
2058 while (*ptr)
2060 if (*ptr == '<') return WC_E_LESSTHAN;
2062 if (*ptr == quote)
2064 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2065 /* skip closing quote */
2066 reader_skipn(reader, 1);
2067 return S_OK;
2070 if (*ptr == '&')
2072 HRESULT hr = reader_parse_reference(reader);
2073 if (FAILED(hr)) return hr;
2075 else
2077 reader_normalize_space(reader, ptr);
2078 reader_skipn(reader, 1);
2080 ptr = reader_get_ptr(reader);
2083 return WC_E_QUOTE;
2086 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2087 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2088 [3 NS] DefaultAttName ::= 'xmlns'
2089 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2090 static HRESULT reader_parse_attribute(xmlreader *reader)
2092 strval prefix, local, qname, value;
2093 BOOL ns = FALSE, nsdef = FALSE;
2094 HRESULT hr;
2096 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2097 if (FAILED(hr)) return hr;
2099 if (strval_eq(reader, &prefix, &strval_xmlns))
2100 ns = TRUE;
2102 if (strval_eq(reader, &qname, &strval_xmlns))
2103 ns = nsdef = TRUE;
2105 hr = reader_parse_eq(reader);
2106 if (FAILED(hr)) return hr;
2108 hr = reader_parse_attvalue(reader, &value);
2109 if (FAILED(hr)) return hr;
2111 if (ns)
2112 reader_push_ns(reader, nsdef ? &strval_xmlns : &local, &value, nsdef);
2114 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2115 return reader_add_attr(reader, &prefix, &local, &value);
2118 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2119 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2120 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2122 HRESULT hr;
2124 hr = reader_parse_qname(reader, prefix, local, qname);
2125 if (FAILED(hr)) return hr;
2127 while (1)
2129 static const WCHAR endW[] = {'/','>',0};
2131 reader_skipspaces(reader);
2133 /* empty element */
2134 if ((*empty = !reader_cmp(reader, endW)))
2136 /* skip '/>' */
2137 reader_skipn(reader, 2);
2138 reader->is_empty_element = TRUE;
2139 reader->empty_element.prefix = *prefix;
2140 reader->empty_element.localname = *local;
2141 reader->empty_element.qname = *qname;
2142 return S_OK;
2145 /* got a start tag */
2146 if (!reader_cmp(reader, gtW))
2148 /* skip '>' */
2149 reader_skipn(reader, 1);
2150 return reader_push_element(reader, prefix, local, qname);
2153 hr = reader_parse_attribute(reader);
2154 if (FAILED(hr)) return hr;
2157 return S_OK;
2160 /* [39] element ::= EmptyElemTag | STag content ETag */
2161 static HRESULT reader_parse_element(xmlreader *reader)
2163 HRESULT hr;
2165 switch (reader->resumestate)
2167 case XmlReadResumeState_Initial:
2168 /* check if we are really on element */
2169 if (reader_cmp(reader, ltW)) return S_FALSE;
2171 /* skip '<' */
2172 reader_skipn(reader, 1);
2174 reader_shrink(reader);
2175 reader->resumestate = XmlReadResumeState_STag;
2176 case XmlReadResumeState_STag:
2178 strval qname, prefix, local;
2179 int empty = 0;
2181 /* this handles empty elements too */
2182 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2183 if (FAILED(hr)) return hr;
2185 /* FIXME: need to check for defined namespace to reject invalid prefix,
2186 currently reject all prefixes */
2187 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
2189 /* if we got empty element and stack is empty go straight to Misc */
2190 if (empty && list_empty(&reader->elements))
2191 reader->instate = XmlReadInState_MiscEnd;
2192 else
2193 reader->instate = XmlReadInState_Content;
2195 reader->nodetype = XmlNodeType_Element;
2196 reader->resumestate = XmlReadResumeState_Initial;
2197 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2198 reader_set_strvalue(reader, StringValue_LocalName, &local);
2199 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2200 break;
2202 default:
2203 hr = E_FAIL;
2206 return hr;
2209 /* [13 NS] ETag ::= '</' QName S? '>' */
2210 static HRESULT reader_parse_endtag(xmlreader *reader)
2212 strval prefix, local, qname;
2213 struct element *elem;
2214 HRESULT hr;
2216 /* skip '</' */
2217 reader_skipn(reader, 2);
2219 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2220 if (FAILED(hr)) return hr;
2222 reader_skipspaces(reader);
2224 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2226 /* skip '>' */
2227 reader_skipn(reader, 1);
2229 /* Element stack should never be empty at this point, cause we shouldn't get to
2230 content parsing if it's empty. */
2231 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2232 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2234 reader_pop_element(reader);
2236 /* It was a root element, the rest is expected as Misc */
2237 if (list_empty(&reader->elements))
2238 reader->instate = XmlReadInState_MiscEnd;
2240 reader->nodetype = XmlNodeType_EndElement;
2241 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2242 reader_set_strvalue(reader, StringValue_LocalName, &local);
2243 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2245 return S_OK;
2248 /* [18] CDSect ::= CDStart CData CDEnd
2249 [19] CDStart ::= '<![CDATA['
2250 [20] CData ::= (Char* - (Char* ']]>' Char*))
2251 [21] CDEnd ::= ']]>' */
2252 static HRESULT reader_parse_cdata(xmlreader *reader)
2254 WCHAR *ptr;
2255 UINT start;
2257 if (reader->resumestate == XmlReadResumeState_CDATA)
2259 start = reader->resume[XmlReadResume_Body];
2260 ptr = reader_get_ptr(reader);
2262 else
2264 /* skip markup '<![CDATA[' */
2265 reader_skipn(reader, 9);
2266 reader_shrink(reader);
2267 ptr = reader_get_ptr(reader);
2268 start = reader_get_cur(reader);
2269 reader->nodetype = XmlNodeType_CDATA;
2270 reader->resume[XmlReadResume_Body] = start;
2271 reader->resumestate = XmlReadResumeState_CDATA;
2272 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2273 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2274 reader_set_strvalue(reader, StringValue_Value, NULL);
2277 while (*ptr)
2279 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2281 strval value;
2283 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2285 /* skip ']]>' */
2286 reader_skipn(reader, 3);
2287 TRACE("%s\n", debug_strval(reader, &value));
2289 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2290 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2291 reader_set_strvalue(reader, StringValue_Value, &value);
2292 reader->resume[XmlReadResume_Body] = 0;
2293 reader->resumestate = XmlReadResumeState_Initial;
2294 return S_OK;
2296 else
2298 /* Value normalization is not fully implemented, rules are:
2300 - single '\r' -> '\n';
2301 - sequence '\r\n' -> '\n', in this case value length changes;
2303 if (*ptr == '\r') *ptr = '\n';
2304 reader_skipn(reader, 1);
2305 ptr++;
2309 return S_OK;
2312 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2313 static HRESULT reader_parse_chardata(xmlreader *reader)
2315 WCHAR *ptr;
2316 UINT start;
2318 if (reader->resumestate == XmlReadResumeState_CharData)
2320 start = reader->resume[XmlReadResume_Body];
2321 ptr = reader_get_ptr(reader);
2323 else
2325 reader_shrink(reader);
2326 ptr = reader_get_ptr(reader);
2327 start = reader_get_cur(reader);
2328 /* There's no text */
2329 if (!*ptr || *ptr == '<') return S_OK;
2330 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2331 reader->resume[XmlReadResume_Body] = start;
2332 reader->resumestate = XmlReadResumeState_CharData;
2333 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2334 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2335 reader_set_strvalue(reader, StringValue_Value, NULL);
2338 while (*ptr)
2340 /* CDATA closing sequence ']]>' is not allowed */
2341 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2342 return WC_E_CDSECTEND;
2344 /* Found next markup part */
2345 if (ptr[0] == '<')
2347 strval value;
2349 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2350 reader_set_strvalue(reader, StringValue_Value, &value);
2351 reader->resume[XmlReadResume_Body] = 0;
2352 reader->resumestate = XmlReadResumeState_Initial;
2353 return S_OK;
2356 reader_skipn(reader, 1);
2358 /* this covers a case when text has leading whitespace chars */
2359 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2360 ptr++;
2363 return S_OK;
2366 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2367 static HRESULT reader_parse_content(xmlreader *reader)
2369 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2370 static const WCHAR etagW[] = {'<','/',0};
2371 static const WCHAR ampW[] = {'&',0};
2373 if (reader->resumestate != XmlReadResumeState_Initial)
2375 switch (reader->resumestate)
2377 case XmlReadResumeState_CDATA:
2378 return reader_parse_cdata(reader);
2379 case XmlReadResumeState_Comment:
2380 return reader_parse_comment(reader);
2381 case XmlReadResumeState_PIBody:
2382 case XmlReadResumeState_PITarget:
2383 return reader_parse_pi(reader);
2384 case XmlReadResumeState_CharData:
2385 return reader_parse_chardata(reader);
2386 default:
2387 ERR("unknown resume state %d\n", reader->resumestate);
2391 reader_shrink(reader);
2393 /* handle end tag here, it indicates end of content as well */
2394 if (!reader_cmp(reader, etagW))
2395 return reader_parse_endtag(reader);
2397 if (!reader_cmp(reader, commentW))
2398 return reader_parse_comment(reader);
2400 if (!reader_cmp(reader, piW))
2401 return reader_parse_pi(reader);
2403 if (!reader_cmp(reader, cdstartW))
2404 return reader_parse_cdata(reader);
2406 if (!reader_cmp(reader, ampW))
2407 return reader_parse_reference(reader);
2409 if (!reader_cmp(reader, ltW))
2410 return reader_parse_element(reader);
2412 /* what's left must be CharData */
2413 return reader_parse_chardata(reader);
2416 static HRESULT reader_parse_nextnode(xmlreader *reader)
2418 HRESULT hr;
2420 if (!is_reader_pending(reader))
2421 reader_clear_attrs(reader);
2423 while (1)
2425 switch (reader->instate)
2427 /* if it's a first call for a new input we need to detect stream encoding */
2428 case XmlReadInState_Initial:
2430 xml_encoding enc;
2432 hr = readerinput_growraw(reader->input);
2433 if (FAILED(hr)) return hr;
2435 /* try to detect encoding by BOM or data and set input code page */
2436 hr = readerinput_detectencoding(reader->input, &enc);
2437 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2438 if (FAILED(hr)) return hr;
2440 /* always switch first time cause we have to put something in */
2441 readerinput_switchencoding(reader->input, enc);
2443 /* parse xml declaration */
2444 hr = reader_parse_xmldecl(reader);
2445 if (FAILED(hr)) return hr;
2447 readerinput_shrinkraw(reader->input, -1);
2448 reader->instate = XmlReadInState_Misc_DTD;
2449 if (hr == S_OK) return hr;
2451 break;
2452 case XmlReadInState_Misc_DTD:
2453 hr = reader_parse_misc(reader);
2454 if (FAILED(hr)) return hr;
2456 if (hr == S_FALSE)
2457 reader->instate = XmlReadInState_DTD;
2458 else
2459 return hr;
2460 break;
2461 case XmlReadInState_DTD:
2462 hr = reader_parse_dtd(reader);
2463 if (FAILED(hr)) return hr;
2465 if (hr == S_OK)
2467 reader->instate = XmlReadInState_DTD_Misc;
2468 return hr;
2470 else
2471 reader->instate = XmlReadInState_Element;
2472 break;
2473 case XmlReadInState_DTD_Misc:
2474 hr = reader_parse_misc(reader);
2475 if (FAILED(hr)) return hr;
2477 if (hr == S_FALSE)
2478 reader->instate = XmlReadInState_Element;
2479 else
2480 return hr;
2481 break;
2482 case XmlReadInState_Element:
2483 return reader_parse_element(reader);
2484 case XmlReadInState_Content:
2485 return reader_parse_content(reader);
2486 case XmlReadInState_MiscEnd:
2487 hr = reader_parse_misc(reader);
2488 if (FAILED(hr)) return hr;
2490 if (hr == S_FALSE)
2491 reader->instate = XmlReadInState_Eof;
2492 return hr;
2493 case XmlReadInState_Eof:
2494 return S_FALSE;
2495 default:
2496 FIXME("internal state %d not handled\n", reader->instate);
2497 return E_NOTIMPL;
2501 return E_NOTIMPL;
2504 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2506 xmlreader *This = impl_from_IXmlReader(iface);
2508 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2510 if (IsEqualGUID(riid, &IID_IUnknown) ||
2511 IsEqualGUID(riid, &IID_IXmlReader))
2513 *ppvObject = iface;
2515 else
2517 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2518 *ppvObject = NULL;
2519 return E_NOINTERFACE;
2522 IXmlReader_AddRef(iface);
2524 return S_OK;
2527 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2529 xmlreader *This = impl_from_IXmlReader(iface);
2530 ULONG ref = InterlockedIncrement(&This->ref);
2531 TRACE("(%p)->(%d)\n", This, ref);
2532 return ref;
2535 static void reader_clear_ns(xmlreader *reader)
2537 struct ns *ns, *ns2;
2539 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2540 reader_free_strvalued(reader, &ns->prefix);
2541 reader_free_strvalued(reader, &ns->uri);
2542 reader_free(reader, ns);
2545 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2546 reader_free_strvalued(reader, &ns->uri);
2547 reader_free(reader, ns);
2551 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2553 xmlreader *This = impl_from_IXmlReader(iface);
2554 LONG ref = InterlockedDecrement(&This->ref);
2556 TRACE("(%p)->(%d)\n", This, ref);
2558 if (ref == 0)
2560 IMalloc *imalloc = This->imalloc;
2561 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2562 if (This->resolver) IXmlResolver_Release(This->resolver);
2563 if (This->mlang) IUnknown_Release(This->mlang);
2564 reader_clear_attrs(This);
2565 reader_clear_ns(This);
2566 reader_clear_elements(This);
2567 reader_free_strvalues(This);
2568 reader_free(This, This);
2569 if (imalloc) IMalloc_Release(imalloc);
2572 return ref;
2575 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2577 xmlreader *This = impl_from_IXmlReader(iface);
2578 IXmlReaderInput *readerinput;
2579 HRESULT hr;
2581 TRACE("(%p)->(%p)\n", This, input);
2583 if (This->input)
2585 readerinput_release_stream(This->input);
2586 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2587 This->input = NULL;
2590 This->line = This->pos = 0;
2591 reader_clear_elements(This);
2592 This->depth = 0;
2593 This->resumestate = XmlReadResumeState_Initial;
2594 memset(This->resume, 0, sizeof(This->resume));
2596 /* just reset current input */
2597 if (!input)
2599 This->state = XmlReadState_Initial;
2600 return S_OK;
2603 /* now try IXmlReaderInput, ISequentialStream, IStream */
2604 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2605 if (hr == S_OK)
2607 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2608 This->input = impl_from_IXmlReaderInput(readerinput);
2609 else
2611 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2612 readerinput, readerinput->lpVtbl);
2613 IUnknown_Release(readerinput);
2614 return E_FAIL;
2619 if (hr != S_OK || !readerinput)
2621 /* create IXmlReaderInput basing on supplied interface */
2622 hr = CreateXmlReaderInputWithEncodingName(input,
2623 This->imalloc, NULL, FALSE, NULL, &readerinput);
2624 if (hr != S_OK) return hr;
2625 This->input = impl_from_IXmlReaderInput(readerinput);
2628 /* set stream for supplied IXmlReaderInput */
2629 hr = readerinput_query_for_stream(This->input);
2630 if (hr == S_OK)
2632 This->state = XmlReadState_Initial;
2633 This->instate = XmlReadInState_Initial;
2636 return hr;
2639 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2641 xmlreader *This = impl_from_IXmlReader(iface);
2643 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2645 if (!value) return E_INVALIDARG;
2647 switch (property)
2649 case XmlReaderProperty_MultiLanguage:
2650 *value = (LONG_PTR)This->mlang;
2651 if (This->mlang)
2652 IUnknown_AddRef(This->mlang);
2653 break;
2654 case XmlReaderProperty_XmlResolver:
2655 *value = (LONG_PTR)This->resolver;
2656 if (This->resolver)
2657 IXmlResolver_AddRef(This->resolver);
2658 break;
2659 case XmlReaderProperty_DtdProcessing:
2660 *value = This->dtdmode;
2661 break;
2662 case XmlReaderProperty_ReadState:
2663 *value = This->state;
2664 break;
2665 default:
2666 FIXME("Unimplemented property (%u)\n", property);
2667 return E_NOTIMPL;
2670 return S_OK;
2673 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2675 xmlreader *This = impl_from_IXmlReader(iface);
2677 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2679 switch (property)
2681 case XmlReaderProperty_MultiLanguage:
2682 if (This->mlang)
2683 IUnknown_Release(This->mlang);
2684 This->mlang = (IUnknown*)value;
2685 if (This->mlang)
2686 IUnknown_AddRef(This->mlang);
2687 if (This->mlang)
2688 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2689 break;
2690 case XmlReaderProperty_XmlResolver:
2691 if (This->resolver)
2692 IXmlResolver_Release(This->resolver);
2693 This->resolver = (IXmlResolver*)value;
2694 if (This->resolver)
2695 IXmlResolver_AddRef(This->resolver);
2696 break;
2697 case XmlReaderProperty_DtdProcessing:
2698 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2699 This->dtdmode = value;
2700 break;
2701 case XmlReaderProperty_MaxElementDepth:
2702 FIXME("Ignoring MaxElementDepth %ld\n", value);
2703 break;
2704 default:
2705 FIXME("Unimplemented property (%u)\n", property);
2706 return E_NOTIMPL;
2709 return S_OK;
2712 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2714 xmlreader *This = impl_from_IXmlReader(iface);
2715 XmlNodeType oldtype = This->nodetype;
2716 HRESULT hr;
2718 TRACE("(%p)->(%p)\n", This, nodetype);
2720 if (This->state == XmlReadState_Closed) return S_FALSE;
2722 hr = reader_parse_nextnode(This);
2723 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2724 This->state = XmlReadState_Interactive;
2725 if (hr == S_OK)
2727 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2728 *nodetype = This->nodetype;
2731 return hr;
2734 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2736 xmlreader *This = impl_from_IXmlReader(iface);
2737 TRACE("(%p)->(%p)\n", This, node_type);
2739 *node_type = reader_get_nodetype(This);
2740 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2743 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2745 xmlreader *This = impl_from_IXmlReader(iface);
2747 TRACE("(%p)\n", This);
2749 if (!This->attr_count) return S_FALSE;
2750 This->attr = LIST_ENTRY(list_head(&This->attrs), struct attribute, entry);
2751 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2752 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2754 return S_OK;
2757 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2759 xmlreader *This = impl_from_IXmlReader(iface);
2760 const struct list *next;
2762 TRACE("(%p)\n", This);
2764 if (!This->attr_count) return S_FALSE;
2766 if (!This->attr)
2767 return IXmlReader_MoveToFirstAttribute(iface);
2769 next = list_next(&This->attrs, &This->attr->entry);
2770 if (next)
2772 This->attr = LIST_ENTRY(next, struct attribute, entry);
2773 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2774 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2777 return next ? S_OK : S_FALSE;
2780 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2781 LPCWSTR local_name,
2782 LPCWSTR namespaceUri)
2784 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2785 return E_NOTIMPL;
2788 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2790 xmlreader *This = impl_from_IXmlReader(iface);
2792 TRACE("(%p)\n", This);
2794 if (!This->attr_count) return S_FALSE;
2795 This->attr = NULL;
2797 /* FIXME: support other node types with 'attributes' like DTD */
2798 if (This->is_empty_element) {
2799 reader_set_strvalue(This, StringValue_LocalName, &This->empty_element.localname);
2800 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
2802 else {
2803 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2804 if (element) {
2805 reader_set_strvalue(This, StringValue_LocalName, &element->localname);
2806 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
2810 return S_OK;
2813 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2815 xmlreader *This = impl_from_IXmlReader(iface);
2817 TRACE("(%p)->(%p %p)\n", This, name, len);
2818 *name = This->strvalues[StringValue_QualifiedName].str;
2819 if (len) *len = This->strvalues[StringValue_QualifiedName].len;
2820 return S_OK;
2823 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface,
2824 LPCWSTR *namespaceUri,
2825 UINT *namespaceUri_length)
2827 FIXME("(%p %p %p): stub\n", iface, namespaceUri, namespaceUri_length);
2828 return E_NOTIMPL;
2831 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2833 xmlreader *This = impl_from_IXmlReader(iface);
2835 TRACE("(%p)->(%p %p)\n", This, name, len);
2836 *name = This->strvalues[StringValue_LocalName].str;
2837 if (len) *len = This->strvalues[StringValue_LocalName].len;
2838 return S_OK;
2841 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2843 xmlreader *This = impl_from_IXmlReader(iface);
2845 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2846 *prefix = This->strvalues[StringValue_Prefix].str;
2847 if (len) *len = This->strvalues[StringValue_Prefix].len;
2848 return S_OK;
2851 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
2853 xmlreader *reader = impl_from_IXmlReader(iface);
2854 strval *val = &reader->strvalues[StringValue_Value];
2856 TRACE("(%p)->(%p %p)\n", reader, value, len);
2858 *value = NULL;
2860 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
2862 XmlNodeType type;
2863 HRESULT hr;
2865 hr = IXmlReader_Read(iface, &type);
2866 if (FAILED(hr)) return hr;
2868 /* return if still pending, partially read values are not reported */
2869 if (is_reader_pending(reader)) return E_PENDING;
2872 if (!val->str)
2874 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
2875 if (!ptr) return E_OUTOFMEMORY;
2876 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
2877 ptr[val->len] = 0;
2878 val->str = ptr;
2881 *value = val->str;
2882 if (len) *len = val->len;
2883 return S_OK;
2886 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
2888 xmlreader *reader = impl_from_IXmlReader(iface);
2889 strval *val = &reader->strvalues[StringValue_Value];
2890 UINT len;
2892 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
2894 /* Value is already allocated, chunked reads are not possible. */
2895 if (val->str) return S_FALSE;
2897 if (val->len)
2899 len = min(chunk_size, val->len);
2900 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
2901 val->start += len;
2902 val->len -= len;
2903 if (read) *read = len;
2906 return S_OK;
2909 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
2910 LPCWSTR *baseUri,
2911 UINT *baseUri_length)
2913 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
2914 return E_NOTIMPL;
2917 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
2919 FIXME("(%p): stub\n", iface);
2920 return FALSE;
2923 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
2925 xmlreader *This = impl_from_IXmlReader(iface);
2926 TRACE("(%p)\n", This);
2927 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2928 when current node is start tag of an element */
2929 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
2932 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
2934 xmlreader *This = impl_from_IXmlReader(iface);
2936 TRACE("(%p %p)\n", This, lineNumber);
2938 if (!lineNumber) return E_INVALIDARG;
2940 *lineNumber = This->line;
2942 return S_OK;
2945 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
2947 xmlreader *This = impl_from_IXmlReader(iface);
2949 TRACE("(%p %p)\n", This, linePosition);
2951 if (!linePosition) return E_INVALIDARG;
2953 *linePosition = This->pos;
2955 return S_OK;
2958 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
2960 xmlreader *This = impl_from_IXmlReader(iface);
2962 TRACE("(%p)->(%p)\n", This, count);
2964 if (!count) return E_INVALIDARG;
2966 *count = This->attr_count;
2967 return S_OK;
2970 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
2972 xmlreader *This = impl_from_IXmlReader(iface);
2973 TRACE("(%p)->(%p)\n", This, depth);
2974 *depth = This->depth;
2975 return S_OK;
2978 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
2980 FIXME("(%p): stub\n", iface);
2981 return FALSE;
2984 static const struct IXmlReaderVtbl xmlreader_vtbl =
2986 xmlreader_QueryInterface,
2987 xmlreader_AddRef,
2988 xmlreader_Release,
2989 xmlreader_SetInput,
2990 xmlreader_GetProperty,
2991 xmlreader_SetProperty,
2992 xmlreader_Read,
2993 xmlreader_GetNodeType,
2994 xmlreader_MoveToFirstAttribute,
2995 xmlreader_MoveToNextAttribute,
2996 xmlreader_MoveToAttributeByName,
2997 xmlreader_MoveToElement,
2998 xmlreader_GetQualifiedName,
2999 xmlreader_GetNamespaceUri,
3000 xmlreader_GetLocalName,
3001 xmlreader_GetPrefix,
3002 xmlreader_GetValue,
3003 xmlreader_ReadValueChunk,
3004 xmlreader_GetBaseUri,
3005 xmlreader_IsDefault,
3006 xmlreader_IsEmptyElement,
3007 xmlreader_GetLineNumber,
3008 xmlreader_GetLinePosition,
3009 xmlreader_GetAttributeCount,
3010 xmlreader_GetDepth,
3011 xmlreader_IsEOF
3014 /** IXmlReaderInput **/
3015 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3017 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3019 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3021 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3022 IsEqualGUID(riid, &IID_IUnknown))
3024 *ppvObject = iface;
3026 else
3028 WARN("interface %s not implemented\n", debugstr_guid(riid));
3029 *ppvObject = NULL;
3030 return E_NOINTERFACE;
3033 IUnknown_AddRef(iface);
3035 return S_OK;
3038 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3040 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3041 ULONG ref = InterlockedIncrement(&This->ref);
3042 TRACE("(%p)->(%d)\n", This, ref);
3043 return ref;
3046 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3048 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3049 LONG ref = InterlockedDecrement(&This->ref);
3051 TRACE("(%p)->(%d)\n", This, ref);
3053 if (ref == 0)
3055 IMalloc *imalloc = This->imalloc;
3056 if (This->input) IUnknown_Release(This->input);
3057 if (This->stream) ISequentialStream_Release(This->stream);
3058 if (This->buffer) free_input_buffer(This->buffer);
3059 readerinput_free(This, This->baseuri);
3060 readerinput_free(This, This);
3061 if (imalloc) IMalloc_Release(imalloc);
3064 return ref;
3067 static const struct IUnknownVtbl xmlreaderinputvtbl =
3069 xmlreaderinput_QueryInterface,
3070 xmlreaderinput_AddRef,
3071 xmlreaderinput_Release
3074 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3076 xmlreader *reader;
3077 int i;
3079 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3081 if (!IsEqualGUID(riid, &IID_IXmlReader))
3083 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
3084 return E_FAIL;
3087 if (imalloc)
3088 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3089 else
3090 reader = heap_alloc(sizeof(*reader));
3091 if(!reader) return E_OUTOFMEMORY;
3093 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3094 reader->ref = 1;
3095 reader->input = NULL;
3096 reader->state = XmlReadState_Closed;
3097 reader->instate = XmlReadInState_Initial;
3098 reader->resumestate = XmlReadResumeState_Initial;
3099 reader->dtdmode = DtdProcessing_Prohibit;
3100 reader->resolver = NULL;
3101 reader->mlang = NULL;
3102 reader->line = reader->pos = 0;
3103 reader->imalloc = imalloc;
3104 if (imalloc) IMalloc_AddRef(imalloc);
3105 reader->nodetype = XmlNodeType_None;
3106 list_init(&reader->attrs);
3107 reader->attr_count = 0;
3108 reader->attr = NULL;
3109 list_init(&reader->nsdef);
3110 list_init(&reader->ns);
3111 list_init(&reader->elements);
3112 reader->depth = 0;
3113 reader->max_depth = 256;
3114 reader->is_empty_element = FALSE;
3115 memset(reader->resume, 0, sizeof(reader->resume));
3117 for (i = 0; i < StringValue_Last; i++)
3118 reader->strvalues[i] = strval_empty;
3120 *obj = &reader->IXmlReader_iface;
3122 TRACE("returning iface %p\n", *obj);
3124 return S_OK;
3127 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3128 IMalloc *imalloc,
3129 LPCWSTR encoding,
3130 BOOL hint,
3131 LPCWSTR base_uri,
3132 IXmlReaderInput **ppInput)
3134 xmlreaderinput *readerinput;
3135 HRESULT hr;
3137 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3138 hint, wine_dbgstr_w(base_uri), ppInput);
3140 if (!stream || !ppInput) return E_INVALIDARG;
3142 if (imalloc)
3143 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3144 else
3145 readerinput = heap_alloc(sizeof(*readerinput));
3146 if(!readerinput) return E_OUTOFMEMORY;
3148 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3149 readerinput->ref = 1;
3150 readerinput->imalloc = imalloc;
3151 readerinput->stream = NULL;
3152 if (imalloc) IMalloc_AddRef(imalloc);
3153 readerinput->encoding = parse_encoding_name(encoding, -1);
3154 readerinput->hint = hint;
3155 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3156 readerinput->pending = 0;
3158 hr = alloc_input_buffer(readerinput);
3159 if (hr != S_OK)
3161 readerinput_free(readerinput, readerinput->baseuri);
3162 readerinput_free(readerinput, readerinput);
3163 if (imalloc) IMalloc_Release(imalloc);
3164 return hr;
3166 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3168 *ppInput = &readerinput->IXmlReaderInput_iface;
3170 TRACE("returning iface %p\n", *ppInput);
3172 return S_OK;