xmllite: Free prefix and local name too when clearing element list.
[wine.git] / dlls / xmllite / reader.c
blob09c8e30623deb6a44b6af7b5982b77155b5c4fd0
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include "windef.h"
26 #include "winbase.h"
27 #include "initguid.h"
28 #include "objbase.h"
29 #include "xmllite.h"
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW[] = {'\"',0};
90 static const WCHAR quoteW[] = {'\'',0};
91 static const WCHAR ltW[] = {'<',0};
92 static const WCHAR gtW[] = {'>',0};
93 static const WCHAR commentW[] = {'<','!','-','-',0};
94 static const WCHAR piW[] = {'<','?',0};
96 static const char *debugstr_nodetype(XmlNodeType nodetype)
98 static const char * const type_names[] =
100 "None",
101 "Element",
102 "Attribute",
103 "Text",
104 "CDATA",
107 "ProcessingInstruction",
108 "Comment",
110 "DocumentType",
113 "Whitespace",
115 "EndElement",
117 "XmlDeclaration"
120 if (nodetype > _XmlNodeType_Last)
121 return wine_dbg_sprintf("unknown type=%d", nodetype);
123 return type_names[nodetype];
126 static const char *debugstr_reader_prop(XmlReaderProperty prop)
128 static const char * const prop_names[] =
130 "MultiLanguage",
131 "ConformanceLevel",
132 "RandomAccess",
133 "XmlResolver",
134 "DtdProcessing",
135 "ReadState",
136 "MaxElementDepth",
137 "MaxEntityExpansion"
140 if (prop > _XmlReaderProperty_Last)
141 return wine_dbg_sprintf("unknown property=%d", prop);
143 return prop_names[prop];
146 struct xml_encoding_data
148 const WCHAR *name;
149 xml_encoding enc;
150 UINT cp;
153 static const struct xml_encoding_data xml_encoding_map[] = {
154 { utf16W, XmlEncoding_UTF16, ~0 },
155 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
158 const WCHAR *get_encoding_name(xml_encoding encoding)
160 return xml_encoding_map[encoding].name;
163 xml_encoding get_encoding_from_codepage(UINT codepage)
165 int i;
166 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
168 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
170 return XmlEncoding_Unknown;
173 typedef struct
175 char *data;
176 UINT cur;
177 unsigned int allocated;
178 unsigned int written;
179 } encoded_buffer;
181 typedef struct input_buffer input_buffer;
183 typedef struct
185 IXmlReaderInput IXmlReaderInput_iface;
186 LONG ref;
187 /* reference passed on IXmlReaderInput creation, is kept when input is created */
188 IUnknown *input;
189 IMalloc *imalloc;
190 xml_encoding encoding;
191 BOOL hint;
192 WCHAR *baseuri;
193 /* stream reference set after SetInput() call from reader,
194 stored as sequential stream, cause currently
195 optimizations possible with IStream aren't implemented */
196 ISequentialStream *stream;
197 input_buffer *buffer;
198 unsigned int pending : 1;
199 } xmlreaderinput;
201 static const struct IUnknownVtbl xmlreaderinputvtbl;
203 /* Structure to hold parsed string of specific length.
205 Reader stores node value as 'start' pointer, on request
206 a null-terminated version of it is allocated.
208 To init a strval variable use reader_init_strval(),
209 to set strval as a reader value use reader_set_strval().
211 typedef struct
213 WCHAR *str; /* allocated null-terminated string */
214 UINT len; /* length in WCHARs, altered after ReadValueChunk */
215 UINT start; /* input position where value starts */
216 } strval;
218 static WCHAR emptyW[] = {0};
219 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
220 static const strval strval_empty = { emptyW };
221 static const strval strval_xmlns = { xmlnsW, 5 };
223 struct attribute
225 struct list entry;
226 strval prefix;
227 strval localname;
228 strval value;
231 struct element
233 struct list entry;
234 strval prefix;
235 strval localname;
236 strval qname;
239 struct ns
241 struct list entry;
242 strval prefix;
243 strval uri;
244 struct element *element;
247 typedef struct
249 IXmlReader IXmlReader_iface;
250 LONG ref;
251 xmlreaderinput *input;
252 IMalloc *imalloc;
253 XmlReadState state;
254 XmlReaderInternalState instate;
255 XmlReaderResumeState resumestate;
256 XmlNodeType nodetype;
257 DtdProcessing dtdmode;
258 IXmlResolver *resolver;
259 IUnknown *mlang;
260 UINT line, pos; /* reader position in XML stream */
261 struct list attrs; /* attributes list for current node */
262 struct attribute *attr; /* current attribute */
263 UINT attr_count;
264 struct list nsdef;
265 struct list ns;
266 struct list elements;
267 strval strvalues[StringValue_Last];
268 UINT depth;
269 UINT max_depth;
270 BOOL is_empty_element;
271 struct element empty_element;
272 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
273 } xmlreader;
275 struct input_buffer
277 encoded_buffer utf16;
278 encoded_buffer encoded;
279 UINT code_page;
280 xmlreaderinput *input;
283 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
285 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
288 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
290 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
293 /* reader memory allocation functions */
294 static inline void *reader_alloc(xmlreader *reader, size_t len)
296 return m_alloc(reader->imalloc, len);
299 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
301 void *ret = reader_alloc(reader, len);
302 if (ret)
303 memset(ret, 0, len);
304 return ret;
307 static inline void reader_free(xmlreader *reader, void *mem)
309 m_free(reader->imalloc, mem);
312 /* Just return pointer from offset, no attempt to read more. */
313 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
315 encoded_buffer *buffer = &reader->input->buffer->utf16;
316 return (WCHAR*)buffer->data + offset;
319 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
321 return v->str ? v->str : reader_get_ptr2(reader, v->start);
324 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
326 *dest = *src;
328 if (src->str != strval_empty.str)
330 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
331 if (!dest->str) return E_OUTOFMEMORY;
332 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
333 dest->str[dest->len] = 0;
334 dest->start = 0;
337 return S_OK;
340 /* reader input memory allocation functions */
341 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
343 return m_alloc(input->imalloc, len);
346 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
348 return m_realloc(input->imalloc, mem, len);
351 static inline void readerinput_free(xmlreaderinput *input, void *mem)
353 m_free(input->imalloc, mem);
356 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
358 LPWSTR ret = NULL;
360 if(str) {
361 DWORD size;
363 size = (strlenW(str)+1)*sizeof(WCHAR);
364 ret = readerinput_alloc(input, size);
365 if (ret) memcpy(ret, str, size);
368 return ret;
371 static void reader_clear_attrs(xmlreader *reader)
373 struct attribute *attr, *attr2;
374 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
376 reader_free(reader, attr);
378 list_init(&reader->attrs);
379 reader->attr_count = 0;
380 reader->attr = NULL;
383 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
384 while we are on a node with attributes */
385 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *value)
387 struct attribute *attr;
389 attr = reader_alloc(reader, sizeof(*attr));
390 if (!attr) return E_OUTOFMEMORY;
392 if (prefix)
393 attr->prefix = *prefix;
394 else
395 memset(&attr->prefix, 0, sizeof(attr->prefix));
396 attr->localname = *localname;
397 attr->value = *value;
398 list_add_tail(&reader->attrs, &attr->entry);
399 reader->attr_count++;
401 return S_OK;
404 /* This one frees stored string value if needed */
405 static void reader_free_strvalued(xmlreader *reader, strval *v)
407 if (v->str != strval_empty.str)
409 reader_free(reader, v->str);
410 *v = strval_empty;
414 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
416 v->start = start;
417 v->len = len;
418 v->str = NULL;
421 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
423 return debugstr_wn(reader_get_strptr(reader, v), v->len);
426 /* used to initialize from constant string */
427 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
429 v->start = 0;
430 v->len = len;
431 v->str = str;
434 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
436 reader_free_strvalued(reader, &reader->strvalues[type]);
439 static void reader_free_strvalues(xmlreader *reader)
441 int type;
442 for (type = 0; type < StringValue_Last; type++)
443 reader_free_strvalue(reader, type);
446 /* This helper should only be used to test if strings are the same,
447 it doesn't try to sort. */
448 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
450 if (str1->len != str2->len) return 0;
451 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
454 static void reader_clear_elements(xmlreader *reader)
456 struct element *elem, *elem2;
457 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
459 reader_free_strvalued(reader, &elem->prefix);
460 reader_free_strvalued(reader, &elem->localname);
461 reader_free_strvalued(reader, &elem->qname);
462 reader_free(reader, elem);
464 list_init(&reader->elements);
465 reader->is_empty_element = FALSE;
468 static HRESULT reader_inc_depth(xmlreader *reader)
470 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
471 return S_OK;
474 static void reader_dec_depth(xmlreader *reader)
476 if (reader->depth > 1) reader->depth--;
479 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
481 struct ns *ns;
482 HRESULT hr;
484 ns = reader_alloc(reader, sizeof(*ns));
485 if (!ns) return E_OUTOFMEMORY;
487 if (def)
488 memset(&ns->prefix, 0, sizeof(ns->prefix));
489 else {
490 hr = reader_strvaldup(reader, prefix, &ns->prefix);
491 if (FAILED(hr)) {
492 reader_free(reader, ns);
493 return hr;
497 hr = reader_strvaldup(reader, uri, &ns->uri);
498 if (FAILED(hr)) {
499 reader_free_strvalued(reader, &ns->prefix);
500 reader_free(reader, ns);
501 return hr;
504 ns->element = NULL;
505 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
506 return hr;
509 static void reader_free_element(xmlreader *reader, struct element *element)
511 reader_free_strvalued(reader, &element->prefix);
512 reader_free_strvalued(reader, &element->localname);
513 reader_free_strvalued(reader, &element->qname);
514 reader_free(reader, element);
517 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
519 struct ns *ns;
521 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
522 if (ns->element)
523 break;
524 ns->element = element;
527 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
528 if (ns->element)
529 break;
530 ns->element = element;
534 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
535 strval *qname)
537 struct element *element;
538 HRESULT hr;
540 if (!list_empty(&reader->elements))
542 hr = reader_inc_depth(reader);
543 if (FAILED(hr))
544 return hr;
547 element = reader_alloc_zero(reader, sizeof(*element));
548 if (!element)
549 goto failed;
551 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) != S_OK ||
552 (hr = reader_strvaldup(reader, localname, &element->localname)) != S_OK ||
553 (hr = reader_strvaldup(reader, qname, &element->qname)) != S_OK)
555 reader_free_element(reader, element);
556 goto failed;
559 list_add_head(&reader->elements, &element->entry);
560 reader_mark_ns_nodes(reader, element);
561 reader->is_empty_element = FALSE;
563 failed:
564 reader_dec_depth(reader);
565 return hr;
568 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
570 struct ns *ns, *ns2;
572 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
573 if (ns->element != element)
574 break;
576 list_remove(&ns->entry);
577 reader_free_strvalued(reader, &ns->prefix);
578 reader_free_strvalued(reader, &ns->uri);
579 reader_free(reader, ns);
582 if (!list_empty(&reader->nsdef)) {
583 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
584 if (ns->element == element) {
585 list_remove(&ns->entry);
586 reader_free_strvalued(reader, &ns->prefix);
587 reader_free_strvalued(reader, &ns->uri);
588 reader_free(reader, ns);
593 static void reader_pop_element(xmlreader *reader)
595 struct element *element;
597 if (list_empty(&reader->elements))
598 return;
600 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
601 list_remove(&element->entry);
603 reader_pop_ns_nodes(reader, element);
604 reader_free_element(reader, element);
605 reader_dec_depth(reader);
607 /* It was a root element, the rest is expected as Misc */
608 if (list_empty(&reader->elements))
609 reader->instate = XmlReadInState_MiscEnd;
612 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
613 means node value is to be determined. */
614 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
616 strval *v = &reader->strvalues[type];
618 reader_free_strvalue(reader, type);
619 if (!value)
621 v->str = NULL;
622 v->start = 0;
623 v->len = 0;
624 return;
627 if (value->str == strval_empty.str)
628 *v = *value;
629 else
631 if (type == StringValue_Value)
633 /* defer allocation for value string */
634 v->str = NULL;
635 v->start = value->start;
636 v->len = value->len;
638 else
640 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
641 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
642 v->str[value->len] = 0;
643 v->len = value->len;
648 static inline int is_reader_pending(xmlreader *reader)
650 return reader->input->pending;
653 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
655 const int initial_len = 0x2000;
656 buffer->data = readerinput_alloc(input, initial_len);
657 if (!buffer->data) return E_OUTOFMEMORY;
659 memset(buffer->data, 0, 4);
660 buffer->cur = 0;
661 buffer->allocated = initial_len;
662 buffer->written = 0;
664 return S_OK;
667 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
669 readerinput_free(input, buffer->data);
672 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
674 if (encoding == XmlEncoding_Unknown)
676 FIXME("unsupported encoding %d\n", encoding);
677 return E_NOTIMPL;
680 *cp = xml_encoding_map[encoding].cp;
682 return S_OK;
685 xml_encoding parse_encoding_name(const WCHAR *name, int len)
687 int min, max, n, c;
689 if (!name) return XmlEncoding_Unknown;
691 min = 0;
692 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
694 while (min <= max)
696 n = (min+max)/2;
698 if (len != -1)
699 c = strncmpiW(xml_encoding_map[n].name, name, len);
700 else
701 c = strcmpiW(xml_encoding_map[n].name, name);
702 if (!c)
703 return xml_encoding_map[n].enc;
705 if (c > 0)
706 max = n-1;
707 else
708 min = n+1;
711 return XmlEncoding_Unknown;
714 static HRESULT alloc_input_buffer(xmlreaderinput *input)
716 input_buffer *buffer;
717 HRESULT hr;
719 input->buffer = NULL;
721 buffer = readerinput_alloc(input, sizeof(*buffer));
722 if (!buffer) return E_OUTOFMEMORY;
724 buffer->input = input;
725 buffer->code_page = ~0; /* code page is unknown at this point */
726 hr = init_encoded_buffer(input, &buffer->utf16);
727 if (hr != S_OK) {
728 readerinput_free(input, buffer);
729 return hr;
732 hr = init_encoded_buffer(input, &buffer->encoded);
733 if (hr != S_OK) {
734 free_encoded_buffer(input, &buffer->utf16);
735 readerinput_free(input, buffer);
736 return hr;
739 input->buffer = buffer;
740 return S_OK;
743 static void free_input_buffer(input_buffer *buffer)
745 free_encoded_buffer(buffer->input, &buffer->encoded);
746 free_encoded_buffer(buffer->input, &buffer->utf16);
747 readerinput_free(buffer->input, buffer);
750 static void readerinput_release_stream(xmlreaderinput *readerinput)
752 if (readerinput->stream) {
753 ISequentialStream_Release(readerinput->stream);
754 readerinput->stream = NULL;
758 /* Queries already stored interface for IStream/ISequentialStream.
759 Interface supplied on creation will be overwritten */
760 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
762 HRESULT hr;
764 readerinput_release_stream(readerinput);
765 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
766 if (hr != S_OK)
767 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
769 return hr;
772 /* reads a chunk to raw buffer */
773 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
775 encoded_buffer *buffer = &readerinput->buffer->encoded;
776 /* to make sure aligned length won't exceed allocated length */
777 ULONG len = buffer->allocated - buffer->written - 4;
778 ULONG read;
779 HRESULT hr;
781 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
782 variable width encodings like UTF-8 */
783 len = (len + 3) & ~3;
784 /* try to use allocated space or grow */
785 if (buffer->allocated - buffer->written < len)
787 buffer->allocated *= 2;
788 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
789 len = buffer->allocated - buffer->written;
792 read = 0;
793 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
794 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
795 readerinput->pending = hr == E_PENDING;
796 if (FAILED(hr)) return hr;
797 buffer->written += read;
799 return hr;
802 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
803 static void readerinput_grow(xmlreaderinput *readerinput, int length)
805 encoded_buffer *buffer = &readerinput->buffer->utf16;
807 length *= sizeof(WCHAR);
808 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
809 if (buffer->allocated < buffer->written + length + 4)
811 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
812 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
813 buffer->allocated = grown_size;
817 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
819 static const char startA[] = {'<','?'};
820 static const char commentA[] = {'<','!'};
821 encoded_buffer *buffer = &readerinput->buffer->encoded;
822 unsigned char *ptr = (unsigned char*)buffer->data;
824 return !memcmp(buffer->data, startA, sizeof(startA)) ||
825 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
826 /* test start byte */
827 (ptr[0] == '<' &&
829 (ptr[1] && (ptr[1] <= 0x7f)) ||
830 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
831 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
832 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
836 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
838 encoded_buffer *buffer = &readerinput->buffer->encoded;
839 static const WCHAR startW[] = {'<','?'};
840 static const WCHAR commentW[] = {'<','!'};
841 static const char utf8bom[] = {0xef,0xbb,0xbf};
842 static const char utf16lebom[] = {0xff,0xfe};
844 *enc = XmlEncoding_Unknown;
846 if (buffer->written <= 3)
848 HRESULT hr = readerinput_growraw(readerinput);
849 if (FAILED(hr)) return hr;
850 if (buffer->written <= 3) return MX_E_INPUTEND;
853 /* try start symbols if we have enough data to do that, input buffer should contain
854 first chunk already */
855 if (readerinput_is_utf8(readerinput))
856 *enc = XmlEncoding_UTF8;
857 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
858 !memcmp(buffer->data, commentW, sizeof(commentW)))
859 *enc = XmlEncoding_UTF16;
860 /* try with BOM now */
861 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
863 buffer->cur += sizeof(utf8bom);
864 *enc = XmlEncoding_UTF8;
866 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
868 buffer->cur += sizeof(utf16lebom);
869 *enc = XmlEncoding_UTF16;
872 return S_OK;
875 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
877 encoded_buffer *buffer = &readerinput->buffer->encoded;
878 int len = buffer->written;
880 /* complete single byte char */
881 if (!(buffer->data[len-1] & 0x80)) return len;
883 /* find start byte of multibyte char */
884 while (--len && !(buffer->data[len] & 0xc0))
887 return len;
890 /* Returns byte length of complete char sequence for buffer code page,
891 it's relative to current buffer position which is currently used for BOM handling
892 only. */
893 static int readerinput_get_convlen(xmlreaderinput *readerinput)
895 encoded_buffer *buffer = &readerinput->buffer->encoded;
896 int len;
898 if (readerinput->buffer->code_page == CP_UTF8)
899 len = readerinput_get_utf8_convlen(readerinput);
900 else
901 len = buffer->written;
903 TRACE("%d\n", len - buffer->cur);
904 return len - buffer->cur;
907 /* It's possible that raw buffer has some leftovers from last conversion - some char
908 sequence that doesn't represent a full code point. Length argument should be calculated with
909 readerinput_get_convlen(), if it's -1 it will be calculated here. */
910 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
912 encoded_buffer *buffer = &readerinput->buffer->encoded;
914 if (len == -1)
915 len = readerinput_get_convlen(readerinput);
917 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
918 /* everything below cur is lost too */
919 buffer->written -= len + buffer->cur;
920 /* after this point we don't need cur offset really,
921 it's used only to mark where actual data begins when first chunk is read */
922 buffer->cur = 0;
925 /* note that raw buffer content is kept */
926 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
928 encoded_buffer *src = &readerinput->buffer->encoded;
929 encoded_buffer *dest = &readerinput->buffer->utf16;
930 int len, dest_len;
931 HRESULT hr;
932 WCHAR *ptr;
933 UINT cp;
935 hr = get_code_page(enc, &cp);
936 if (FAILED(hr)) return;
938 readerinput->buffer->code_page = cp;
939 len = readerinput_get_convlen(readerinput);
941 TRACE("switching to cp %d\n", cp);
943 /* just copy in this case */
944 if (enc == XmlEncoding_UTF16)
946 readerinput_grow(readerinput, len);
947 memcpy(dest->data, src->data + src->cur, len);
948 dest->written += len*sizeof(WCHAR);
949 return;
952 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
953 readerinput_grow(readerinput, dest_len);
954 ptr = (WCHAR*)dest->data;
955 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
956 ptr[dest_len] = 0;
957 dest->written += dest_len*sizeof(WCHAR);
960 /* shrinks parsed data a buffer begins with */
961 static void reader_shrink(xmlreader *reader)
963 encoded_buffer *buffer = &reader->input->buffer->utf16;
965 /* avoid to move too often using threshold shrink length */
966 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
968 buffer->written -= buffer->cur*sizeof(WCHAR);
969 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
970 buffer->cur = 0;
971 *(WCHAR*)&buffer->data[buffer->written] = 0;
975 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
976 It won't attempt to shrink but will grow destination buffer if needed */
977 static HRESULT reader_more(xmlreader *reader)
979 xmlreaderinput *readerinput = reader->input;
980 encoded_buffer *src = &readerinput->buffer->encoded;
981 encoded_buffer *dest = &readerinput->buffer->utf16;
982 UINT cp = readerinput->buffer->code_page;
983 int len, dest_len;
984 HRESULT hr;
985 WCHAR *ptr;
987 /* get some raw data from stream first */
988 hr = readerinput_growraw(readerinput);
989 len = readerinput_get_convlen(readerinput);
991 /* just copy for UTF-16 case */
992 if (cp == ~0)
994 readerinput_grow(readerinput, len);
995 memcpy(dest->data + dest->written, src->data + src->cur, len);
996 dest->written += len*sizeof(WCHAR);
997 return hr;
1000 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1001 readerinput_grow(readerinput, dest_len);
1002 ptr = (WCHAR*)(dest->data + dest->written);
1003 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1004 ptr[dest_len] = 0;
1005 dest->written += dest_len*sizeof(WCHAR);
1006 /* get rid of processed data */
1007 readerinput_shrinkraw(readerinput, len);
1009 return hr;
1012 static inline UINT reader_get_cur(xmlreader *reader)
1014 return reader->input->buffer->utf16.cur;
1017 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1019 encoded_buffer *buffer = &reader->input->buffer->utf16;
1020 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1021 if (!*ptr) reader_more(reader);
1022 return (WCHAR*)buffer->data + buffer->cur;
1025 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1027 int i=0;
1028 const WCHAR *ptr = reader_get_ptr(reader);
1029 while (str[i])
1031 if (!ptr[i])
1033 reader_more(reader);
1034 ptr = reader_get_ptr(reader);
1036 if (str[i] != ptr[i])
1037 return ptr[i] - str[i];
1038 i++;
1040 return 0;
1043 /* moves cursor n WCHARs forward */
1044 static void reader_skipn(xmlreader *reader, int n)
1046 encoded_buffer *buffer = &reader->input->buffer->utf16;
1047 const WCHAR *ptr = reader_get_ptr(reader);
1049 while (*ptr++ && n--)
1051 buffer->cur++;
1052 reader->pos++;
1056 static inline BOOL is_wchar_space(WCHAR ch)
1058 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1061 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1062 static int reader_skipspaces(xmlreader *reader)
1064 encoded_buffer *buffer = &reader->input->buffer->utf16;
1065 const WCHAR *ptr = reader_get_ptr(reader);
1066 UINT start = reader_get_cur(reader);
1068 while (is_wchar_space(*ptr))
1070 if (*ptr == '\r')
1071 reader->pos = 0;
1072 else if (*ptr == '\n')
1074 reader->line++;
1075 reader->pos = 0;
1077 else
1078 reader->pos++;
1080 buffer->cur++;
1081 ptr = reader_get_ptr(reader);
1084 return reader_get_cur(reader) - start;
1087 /* [26] VersionNum ::= '1.' [0-9]+ */
1088 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1090 static const WCHAR onedotW[] = {'1','.',0};
1091 WCHAR *ptr, *ptr2;
1092 UINT start;
1094 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1096 start = reader_get_cur(reader);
1097 /* skip "1." */
1098 reader_skipn(reader, 2);
1100 ptr2 = ptr = reader_get_ptr(reader);
1101 while (*ptr >= '0' && *ptr <= '9')
1103 reader_skipn(reader, 1);
1104 ptr = reader_get_ptr(reader);
1107 if (ptr2 == ptr) return WC_E_DIGIT;
1108 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1109 TRACE("version=%s\n", debug_strval(reader, val));
1110 return S_OK;
1113 /* [25] Eq ::= S? '=' S? */
1114 static HRESULT reader_parse_eq(xmlreader *reader)
1116 static const WCHAR eqW[] = {'=',0};
1117 reader_skipspaces(reader);
1118 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1119 /* skip '=' */
1120 reader_skipn(reader, 1);
1121 reader_skipspaces(reader);
1122 return S_OK;
1125 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1126 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1128 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1129 strval val, name;
1130 HRESULT hr;
1132 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1134 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1135 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1136 /* skip 'version' */
1137 reader_skipn(reader, 7);
1139 hr = reader_parse_eq(reader);
1140 if (FAILED(hr)) return hr;
1142 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1143 return WC_E_QUOTE;
1144 /* skip "'"|'"' */
1145 reader_skipn(reader, 1);
1147 hr = reader_parse_versionnum(reader, &val);
1148 if (FAILED(hr)) return hr;
1150 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1151 return WC_E_QUOTE;
1153 /* skip "'"|'"' */
1154 reader_skipn(reader, 1);
1156 return reader_add_attr(reader, NULL, &name, &val);
1159 /* ([A-Za-z0-9._] | '-') */
1160 static inline BOOL is_wchar_encname(WCHAR ch)
1162 return ((ch >= 'A' && ch <= 'Z') ||
1163 (ch >= 'a' && ch <= 'z') ||
1164 (ch >= '0' && ch <= '9') ||
1165 (ch == '.') || (ch == '_') ||
1166 (ch == '-'));
1169 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1170 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1172 WCHAR *start = reader_get_ptr(reader), *ptr;
1173 xml_encoding enc;
1174 int len;
1176 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1177 return WC_E_ENCNAME;
1179 val->start = reader_get_cur(reader);
1181 ptr = start;
1182 while (is_wchar_encname(*++ptr))
1185 len = ptr - start;
1186 enc = parse_encoding_name(start, len);
1187 TRACE("encoding name %s\n", debugstr_wn(start, len));
1188 val->str = start;
1189 val->len = len;
1191 if (enc == XmlEncoding_Unknown)
1192 return WC_E_ENCNAME;
1194 /* skip encoding name */
1195 reader_skipn(reader, len);
1196 return S_OK;
1199 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1200 static HRESULT reader_parse_encdecl(xmlreader *reader)
1202 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1203 strval name, val;
1204 HRESULT hr;
1206 if (!reader_skipspaces(reader)) return S_FALSE;
1208 if (reader_cmp(reader, encodingW)) return S_FALSE;
1209 name.str = reader_get_ptr(reader);
1210 name.start = reader_get_cur(reader);
1211 name.len = 8;
1212 /* skip 'encoding' */
1213 reader_skipn(reader, 8);
1215 hr = reader_parse_eq(reader);
1216 if (FAILED(hr)) return hr;
1218 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1219 return WC_E_QUOTE;
1220 /* skip "'"|'"' */
1221 reader_skipn(reader, 1);
1223 hr = reader_parse_encname(reader, &val);
1224 if (FAILED(hr)) return hr;
1226 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1227 return WC_E_QUOTE;
1229 /* skip "'"|'"' */
1230 reader_skipn(reader, 1);
1232 return reader_add_attr(reader, NULL, &name, &val);
1235 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1236 static HRESULT reader_parse_sddecl(xmlreader *reader)
1238 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1239 static const WCHAR yesW[] = {'y','e','s',0};
1240 static const WCHAR noW[] = {'n','o',0};
1241 strval name, val;
1242 UINT start;
1243 HRESULT hr;
1245 if (!reader_skipspaces(reader)) return S_FALSE;
1247 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1248 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1249 /* skip 'standalone' */
1250 reader_skipn(reader, 10);
1252 hr = reader_parse_eq(reader);
1253 if (FAILED(hr)) return hr;
1255 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1256 return WC_E_QUOTE;
1257 /* skip "'"|'"' */
1258 reader_skipn(reader, 1);
1260 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1261 return WC_E_XMLDECL;
1263 start = reader_get_cur(reader);
1264 /* skip 'yes'|'no' */
1265 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1266 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1267 TRACE("standalone=%s\n", debug_strval(reader, &val));
1269 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1270 return WC_E_QUOTE;
1271 /* skip "'"|'"' */
1272 reader_skipn(reader, 1);
1274 return reader_add_attr(reader, NULL, &name, &val);
1277 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1278 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1280 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1281 static const WCHAR declcloseW[] = {'?','>',0};
1282 HRESULT hr;
1284 /* check if we have "<?xml " */
1285 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1287 reader_skipn(reader, 5);
1288 hr = reader_parse_versioninfo(reader);
1289 if (FAILED(hr))
1290 return hr;
1292 hr = reader_parse_encdecl(reader);
1293 if (FAILED(hr))
1294 return hr;
1296 hr = reader_parse_sddecl(reader);
1297 if (FAILED(hr))
1298 return hr;
1300 reader_skipspaces(reader);
1301 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1302 reader_skipn(reader, 2);
1304 reader_inc_depth(reader);
1305 reader->nodetype = XmlNodeType_XmlDeclaration;
1306 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1307 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1308 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1310 return S_OK;
1313 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1314 static HRESULT reader_parse_comment(xmlreader *reader)
1316 WCHAR *ptr;
1317 UINT start;
1319 if (reader->resumestate == XmlReadResumeState_Comment)
1321 start = reader->resume[XmlReadResume_Body];
1322 ptr = reader_get_ptr(reader);
1324 else
1326 /* skip '<!--' */
1327 reader_skipn(reader, 4);
1328 reader_shrink(reader);
1329 ptr = reader_get_ptr(reader);
1330 start = reader_get_cur(reader);
1331 reader->nodetype = XmlNodeType_Comment;
1332 reader->resume[XmlReadResume_Body] = start;
1333 reader->resumestate = XmlReadResumeState_Comment;
1334 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1335 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1336 reader_set_strvalue(reader, StringValue_Value, NULL);
1339 /* will exit when there's no more data, it won't attempt to
1340 read more from stream */
1341 while (*ptr)
1343 if (ptr[0] == '-')
1345 if (ptr[1] == '-')
1347 if (ptr[2] == '>')
1349 strval value;
1351 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1352 TRACE("%s\n", debug_strval(reader, &value));
1354 /* skip rest of markup '->' */
1355 reader_skipn(reader, 3);
1357 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1358 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1359 reader_set_strvalue(reader, StringValue_Value, &value);
1360 reader->resume[XmlReadResume_Body] = 0;
1361 reader->resumestate = XmlReadResumeState_Initial;
1362 return S_OK;
1364 else
1365 return WC_E_COMMENT;
1369 reader_skipn(reader, 1);
1370 ptr++;
1373 return S_OK;
1376 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1377 static inline BOOL is_char(WCHAR ch)
1379 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1380 (ch >= 0x20 && ch <= 0xd7ff) ||
1381 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1382 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1383 (ch >= 0xe000 && ch <= 0xfffd);
1386 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1387 static inline BOOL is_pubchar(WCHAR ch)
1389 return (ch == ' ') ||
1390 (ch >= 'a' && ch <= 'z') ||
1391 (ch >= 'A' && ch <= 'Z') ||
1392 (ch >= '0' && ch <= '9') ||
1393 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1394 (ch == '=') || (ch == '?') ||
1395 (ch == '@') || (ch == '!') ||
1396 (ch >= '#' && ch <= '%') || /* #$% */
1397 (ch == '_') || (ch == '\r') || (ch == '\n');
1400 static inline BOOL is_namestartchar(WCHAR ch)
1402 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1403 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1404 (ch >= 0xc0 && ch <= 0xd6) ||
1405 (ch >= 0xd8 && ch <= 0xf6) ||
1406 (ch >= 0xf8 && ch <= 0x2ff) ||
1407 (ch >= 0x370 && ch <= 0x37d) ||
1408 (ch >= 0x37f && ch <= 0x1fff) ||
1409 (ch >= 0x200c && ch <= 0x200d) ||
1410 (ch >= 0x2070 && ch <= 0x218f) ||
1411 (ch >= 0x2c00 && ch <= 0x2fef) ||
1412 (ch >= 0x3001 && ch <= 0xd7ff) ||
1413 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1414 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1415 (ch >= 0xf900 && ch <= 0xfdcf) ||
1416 (ch >= 0xfdf0 && ch <= 0xfffd);
1419 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1420 static inline BOOL is_ncnamechar(WCHAR ch)
1422 return (ch >= 'A' && ch <= 'Z') ||
1423 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1424 (ch == '-') || (ch == '.') ||
1425 (ch >= '0' && ch <= '9') ||
1426 (ch == 0xb7) ||
1427 (ch >= 0xc0 && ch <= 0xd6) ||
1428 (ch >= 0xd8 && ch <= 0xf6) ||
1429 (ch >= 0xf8 && ch <= 0x2ff) ||
1430 (ch >= 0x300 && ch <= 0x36f) ||
1431 (ch >= 0x370 && ch <= 0x37d) ||
1432 (ch >= 0x37f && ch <= 0x1fff) ||
1433 (ch >= 0x200c && ch <= 0x200d) ||
1434 (ch >= 0x203f && ch <= 0x2040) ||
1435 (ch >= 0x2070 && ch <= 0x218f) ||
1436 (ch >= 0x2c00 && ch <= 0x2fef) ||
1437 (ch >= 0x3001 && ch <= 0xd7ff) ||
1438 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1439 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1440 (ch >= 0xf900 && ch <= 0xfdcf) ||
1441 (ch >= 0xfdf0 && ch <= 0xfffd);
1444 static inline BOOL is_namechar(WCHAR ch)
1446 return (ch == ':') || is_ncnamechar(ch);
1449 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1451 /* When we're on attribute always return attribute type, container node type is kept.
1452 Note that container is not necessarily an element, and attribute doesn't mean it's
1453 an attribute in XML spec terms. */
1454 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1457 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1458 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1459 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1460 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1461 [5] Name ::= NameStartChar (NameChar)* */
1462 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1464 WCHAR *ptr;
1465 UINT start;
1467 if (reader->resume[XmlReadResume_Name])
1469 start = reader->resume[XmlReadResume_Name];
1470 ptr = reader_get_ptr(reader);
1472 else
1474 ptr = reader_get_ptr(reader);
1475 start = reader_get_cur(reader);
1476 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1479 while (is_namechar(*ptr))
1481 reader_skipn(reader, 1);
1482 ptr = reader_get_ptr(reader);
1485 if (is_reader_pending(reader))
1487 reader->resume[XmlReadResume_Name] = start;
1488 return E_PENDING;
1490 else
1491 reader->resume[XmlReadResume_Name] = 0;
1493 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1494 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1496 return S_OK;
1499 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1500 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1502 static const WCHAR xmlW[] = {'x','m','l'};
1503 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1504 strval name;
1505 WCHAR *ptr;
1506 HRESULT hr;
1507 UINT i;
1509 hr = reader_parse_name(reader, &name);
1510 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1512 /* now that we got name check for illegal content */
1513 if (strval_eq(reader, &name, &xmlval))
1514 return WC_E_LEADINGXML;
1516 /* PITarget can't be a qualified name */
1517 ptr = reader_get_strptr(reader, &name);
1518 for (i = 0; i < name.len; i++)
1519 if (ptr[i] == ':')
1520 return i ? NC_E_NAMECOLON : WC_E_PI;
1522 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1523 *target = name;
1524 return S_OK;
1527 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1528 static HRESULT reader_parse_pi(xmlreader *reader)
1530 strval target;
1531 WCHAR *ptr;
1532 UINT start;
1533 HRESULT hr;
1535 switch (reader->resumestate)
1537 case XmlReadResumeState_Initial:
1538 /* skip '<?' */
1539 reader_skipn(reader, 2);
1540 reader_shrink(reader);
1541 reader->resumestate = XmlReadResumeState_PITarget;
1542 case XmlReadResumeState_PITarget:
1543 hr = reader_parse_pitarget(reader, &target);
1544 if (FAILED(hr)) return hr;
1545 reader_set_strvalue(reader, StringValue_LocalName, &target);
1546 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1547 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1548 reader->resumestate = XmlReadResumeState_PIBody;
1549 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1550 default:
1554 start = reader->resume[XmlReadResume_Body];
1555 ptr = reader_get_ptr(reader);
1556 while (*ptr)
1558 if (ptr[0] == '?')
1560 if (ptr[1] == '>')
1562 UINT cur = reader_get_cur(reader);
1563 strval value;
1565 /* strip all leading whitespace chars */
1566 while (start < cur)
1568 ptr = reader_get_ptr2(reader, start);
1569 if (!is_wchar_space(*ptr)) break;
1570 start++;
1573 reader_init_strvalue(start, cur-start, &value);
1575 /* skip '?>' */
1576 reader_skipn(reader, 2);
1577 TRACE("%s\n", debug_strval(reader, &value));
1578 reader->nodetype = XmlNodeType_ProcessingInstruction;
1579 reader->resumestate = XmlReadResumeState_Initial;
1580 reader->resume[XmlReadResume_Body] = 0;
1581 reader_set_strvalue(reader, StringValue_Value, &value);
1582 return S_OK;
1586 reader_skipn(reader, 1);
1587 ptr = reader_get_ptr(reader);
1590 return S_OK;
1593 /* This one is used to parse significant whitespace nodes, like in Misc production */
1594 static HRESULT reader_parse_whitespace(xmlreader *reader)
1596 switch (reader->resumestate)
1598 case XmlReadResumeState_Initial:
1599 reader_shrink(reader);
1600 reader->resumestate = XmlReadResumeState_Whitespace;
1601 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1602 reader->nodetype = XmlNodeType_Whitespace;
1603 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1604 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1605 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1606 /* fallthrough */
1607 case XmlReadResumeState_Whitespace:
1609 strval value;
1610 UINT start;
1612 reader_skipspaces(reader);
1613 if (is_reader_pending(reader)) return S_OK;
1615 start = reader->resume[XmlReadResume_Body];
1616 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1617 reader_set_strvalue(reader, StringValue_Value, &value);
1618 TRACE("%s\n", debug_strval(reader, &value));
1619 reader->resumestate = XmlReadResumeState_Initial;
1621 default:
1625 return S_OK;
1628 /* [27] Misc ::= Comment | PI | S */
1629 static HRESULT reader_parse_misc(xmlreader *reader)
1631 HRESULT hr = S_FALSE;
1633 if (reader->resumestate != XmlReadResumeState_Initial)
1635 hr = reader_more(reader);
1636 if (FAILED(hr)) return hr;
1638 /* finish current node */
1639 switch (reader->resumestate)
1641 case XmlReadResumeState_PITarget:
1642 case XmlReadResumeState_PIBody:
1643 return reader_parse_pi(reader);
1644 case XmlReadResumeState_Comment:
1645 return reader_parse_comment(reader);
1646 case XmlReadResumeState_Whitespace:
1647 return reader_parse_whitespace(reader);
1648 default:
1649 ERR("unknown resume state %d\n", reader->resumestate);
1653 while (1)
1655 const WCHAR *cur = reader_get_ptr(reader);
1657 if (is_wchar_space(*cur))
1658 hr = reader_parse_whitespace(reader);
1659 else if (!reader_cmp(reader, commentW))
1660 hr = reader_parse_comment(reader);
1661 else if (!reader_cmp(reader, piW))
1662 hr = reader_parse_pi(reader);
1663 else
1664 break;
1666 if (hr != S_FALSE) return hr;
1669 return hr;
1672 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1673 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1675 WCHAR *cur = reader_get_ptr(reader), quote;
1676 UINT start;
1678 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1680 quote = *cur;
1681 reader_skipn(reader, 1);
1683 cur = reader_get_ptr(reader);
1684 start = reader_get_cur(reader);
1685 while (is_char(*cur) && *cur != quote)
1687 reader_skipn(reader, 1);
1688 cur = reader_get_ptr(reader);
1690 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1691 if (*cur == quote) reader_skipn(reader, 1);
1693 TRACE("%s\n", debug_strval(reader, literal));
1694 return S_OK;
1697 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1698 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1699 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1701 WCHAR *cur = reader_get_ptr(reader), quote;
1702 UINT start;
1704 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1706 quote = *cur;
1707 reader_skipn(reader, 1);
1709 start = reader_get_cur(reader);
1710 cur = reader_get_ptr(reader);
1711 while (is_pubchar(*cur) && *cur != quote)
1713 reader_skipn(reader, 1);
1714 cur = reader_get_ptr(reader);
1716 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1717 if (*cur == quote) reader_skipn(reader, 1);
1719 TRACE("%s\n", debug_strval(reader, literal));
1720 return S_OK;
1723 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1724 static HRESULT reader_parse_externalid(xmlreader *reader)
1726 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1727 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1728 strval name, sys;
1729 HRESULT hr;
1730 int cnt;
1732 if (!reader_cmp(reader, publicW)) {
1733 strval pub;
1735 /* public id */
1736 reader_skipn(reader, 6);
1737 cnt = reader_skipspaces(reader);
1738 if (!cnt) return WC_E_WHITESPACE;
1740 hr = reader_parse_pub_literal(reader, &pub);
1741 if (FAILED(hr)) return hr;
1743 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1744 hr = reader_add_attr(reader, NULL, &name, &pub);
1745 if (FAILED(hr)) return hr;
1747 cnt = reader_skipspaces(reader);
1748 if (!cnt) return S_OK;
1750 /* optional system id */
1751 hr = reader_parse_sys_literal(reader, &sys);
1752 if (FAILED(hr)) return S_OK;
1754 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1755 hr = reader_add_attr(reader, NULL, &name, &sys);
1756 if (FAILED(hr)) return hr;
1758 return S_OK;
1759 } else if (!reader_cmp(reader, systemW)) {
1760 /* system id */
1761 reader_skipn(reader, 6);
1762 cnt = reader_skipspaces(reader);
1763 if (!cnt) return WC_E_WHITESPACE;
1765 hr = reader_parse_sys_literal(reader, &sys);
1766 if (FAILED(hr)) return hr;
1768 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1769 return reader_add_attr(reader, NULL, &name, &sys);
1772 return S_FALSE;
1775 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1776 static HRESULT reader_parse_dtd(xmlreader *reader)
1778 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1779 strval name;
1780 WCHAR *cur;
1781 HRESULT hr;
1783 /* check if we have "<!DOCTYPE" */
1784 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1785 reader_shrink(reader);
1787 /* DTD processing is not allowed by default */
1788 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1790 reader_skipn(reader, 9);
1791 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1793 /* name */
1794 hr = reader_parse_name(reader, &name);
1795 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1797 reader_skipspaces(reader);
1799 hr = reader_parse_externalid(reader);
1800 if (FAILED(hr)) return hr;
1802 reader_skipspaces(reader);
1804 cur = reader_get_ptr(reader);
1805 if (*cur != '>')
1807 FIXME("internal subset parsing not implemented\n");
1808 return E_NOTIMPL;
1811 /* skip '>' */
1812 reader_skipn(reader, 1);
1814 reader->nodetype = XmlNodeType_DocumentType;
1815 reader_set_strvalue(reader, StringValue_LocalName, &name);
1816 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1818 return S_OK;
1821 /* [11 NS] LocalPart ::= NCName */
1822 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1824 WCHAR *ptr;
1825 UINT start;
1827 if (reader->resume[XmlReadResume_Local])
1829 start = reader->resume[XmlReadResume_Local];
1830 ptr = reader_get_ptr(reader);
1832 else
1834 ptr = reader_get_ptr(reader);
1835 start = reader_get_cur(reader);
1838 while (is_ncnamechar(*ptr))
1840 reader_skipn(reader, 1);
1841 ptr = reader_get_ptr(reader);
1844 if (is_reader_pending(reader))
1846 reader->resume[XmlReadResume_Local] = start;
1847 return E_PENDING;
1849 else
1850 reader->resume[XmlReadResume_Local] = 0;
1852 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1854 return S_OK;
1857 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1858 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1859 [9 NS] UnprefixedName ::= LocalPart
1860 [10 NS] Prefix ::= NCName */
1861 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1863 WCHAR *ptr;
1864 UINT start;
1865 HRESULT hr;
1867 if (reader->resume[XmlReadResume_Name])
1869 start = reader->resume[XmlReadResume_Name];
1870 ptr = reader_get_ptr(reader);
1872 else
1874 ptr = reader_get_ptr(reader);
1875 start = reader_get_cur(reader);
1876 reader->resume[XmlReadResume_Name] = start;
1877 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1880 if (reader->resume[XmlReadResume_Local])
1882 hr = reader_parse_local(reader, local);
1883 if (FAILED(hr)) return hr;
1885 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1886 local->start - reader->resume[XmlReadResume_Name] - 1,
1887 prefix);
1889 else
1891 /* skip prefix part */
1892 while (is_ncnamechar(*ptr))
1894 reader_skipn(reader, 1);
1895 ptr = reader_get_ptr(reader);
1898 if (is_reader_pending(reader)) return E_PENDING;
1900 /* got a qualified name */
1901 if (*ptr == ':')
1903 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1905 /* skip ':' */
1906 reader_skipn(reader, 1);
1907 hr = reader_parse_local(reader, local);
1908 if (FAILED(hr)) return hr;
1910 else
1912 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1913 reader_init_strvalue(0, 0, prefix);
1917 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1919 if (prefix->len)
1920 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1921 else
1922 TRACE("ncname %s\n", debug_strval(reader, local));
1924 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1925 /* count ':' too */
1926 (prefix->len ? prefix->len + 1 : 0) + local->len,
1927 qname);
1929 reader->resume[XmlReadResume_Name] = 0;
1930 reader->resume[XmlReadResume_Local] = 0;
1932 return S_OK;
1935 /* Applies normalization rules to a single char, used for attribute values.
1937 Rules include 2 steps:
1939 1) replacing \r\n with a single \n;
1940 2) replacing all whitespace chars with ' '.
1943 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1945 encoded_buffer *buffer = &reader->input->buffer->utf16;
1947 if (!is_wchar_space(*ptr)) return;
1949 if (*ptr == '\r' && *(ptr+1) == '\n')
1951 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1952 memmove(ptr+1, ptr+2, len);
1954 *ptr = ' ';
1957 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1959 static const WCHAR entltW[] = {'l','t'};
1960 static const WCHAR entgtW[] = {'g','t'};
1961 static const WCHAR entampW[] = {'a','m','p'};
1962 static const WCHAR entaposW[] = {'a','p','o','s'};
1963 static const WCHAR entquotW[] = {'q','u','o','t'};
1964 static const strval lt = { (WCHAR*)entltW, 2 };
1965 static const strval gt = { (WCHAR*)entgtW, 2 };
1966 static const strval amp = { (WCHAR*)entampW, 3 };
1967 static const strval apos = { (WCHAR*)entaposW, 4 };
1968 static const strval quot = { (WCHAR*)entquotW, 4 };
1969 WCHAR *str = reader_get_strptr(reader, name);
1971 switch (*str)
1973 case 'l':
1974 if (strval_eq(reader, name, &lt)) return '<';
1975 break;
1976 case 'g':
1977 if (strval_eq(reader, name, &gt)) return '>';
1978 break;
1979 case 'a':
1980 if (strval_eq(reader, name, &amp))
1981 return '&';
1982 else if (strval_eq(reader, name, &apos))
1983 return '\'';
1984 break;
1985 case 'q':
1986 if (strval_eq(reader, name, &quot)) return '\"';
1987 break;
1988 default:
1992 return 0;
1995 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1996 [67] Reference ::= EntityRef | CharRef
1997 [68] EntityRef ::= '&' Name ';' */
1998 static HRESULT reader_parse_reference(xmlreader *reader)
2000 encoded_buffer *buffer = &reader->input->buffer->utf16;
2001 WCHAR *start = reader_get_ptr(reader), *ptr;
2002 UINT cur = reader_get_cur(reader);
2003 WCHAR ch = 0;
2004 int len;
2006 /* skip '&' */
2007 reader_skipn(reader, 1);
2008 ptr = reader_get_ptr(reader);
2010 if (*ptr == '#')
2012 reader_skipn(reader, 1);
2013 ptr = reader_get_ptr(reader);
2015 /* hex char or decimal */
2016 if (*ptr == 'x')
2018 reader_skipn(reader, 1);
2019 ptr = reader_get_ptr(reader);
2021 while (*ptr != ';')
2023 if ((*ptr >= '0' && *ptr <= '9'))
2024 ch = ch*16 + *ptr - '0';
2025 else if ((*ptr >= 'a' && *ptr <= 'f'))
2026 ch = ch*16 + *ptr - 'a' + 10;
2027 else if ((*ptr >= 'A' && *ptr <= 'F'))
2028 ch = ch*16 + *ptr - 'A' + 10;
2029 else
2030 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2031 reader_skipn(reader, 1);
2032 ptr = reader_get_ptr(reader);
2035 else
2037 while (*ptr != ';')
2039 if ((*ptr >= '0' && *ptr <= '9'))
2041 ch = ch*10 + *ptr - '0';
2042 reader_skipn(reader, 1);
2043 ptr = reader_get_ptr(reader);
2045 else
2046 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2050 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2052 /* normalize */
2053 if (is_wchar_space(ch)) ch = ' ';
2055 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2056 memmove(start+1, ptr+1, len);
2057 buffer->cur = cur + 1;
2059 *start = ch;
2061 else
2063 strval name;
2064 HRESULT hr;
2066 hr = reader_parse_name(reader, &name);
2067 if (FAILED(hr)) return hr;
2069 ptr = reader_get_ptr(reader);
2070 if (*ptr != ';') return WC_E_SEMICOLON;
2072 /* predefined entities resolve to a single character */
2073 ch = get_predefined_entity(reader, &name);
2074 if (ch)
2076 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2077 memmove(start+1, ptr+1, len);
2078 buffer->cur = cur + 1;
2080 *start = ch;
2082 else
2084 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2085 return WC_E_UNDECLAREDENTITY;
2090 return S_OK;
2093 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2094 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2096 WCHAR *ptr, quote;
2097 UINT start;
2099 ptr = reader_get_ptr(reader);
2101 /* skip opening quote */
2102 quote = *ptr;
2103 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2104 reader_skipn(reader, 1);
2106 ptr = reader_get_ptr(reader);
2107 start = reader_get_cur(reader);
2108 while (*ptr)
2110 if (*ptr == '<') return WC_E_LESSTHAN;
2112 if (*ptr == quote)
2114 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2115 /* skip closing quote */
2116 reader_skipn(reader, 1);
2117 return S_OK;
2120 if (*ptr == '&')
2122 HRESULT hr = reader_parse_reference(reader);
2123 if (FAILED(hr)) return hr;
2125 else
2127 reader_normalize_space(reader, ptr);
2128 reader_skipn(reader, 1);
2130 ptr = reader_get_ptr(reader);
2133 return WC_E_QUOTE;
2136 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2137 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2138 [3 NS] DefaultAttName ::= 'xmlns'
2139 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2140 static HRESULT reader_parse_attribute(xmlreader *reader)
2142 strval prefix, local, qname, value;
2143 BOOL ns = FALSE, nsdef = FALSE;
2144 HRESULT hr;
2146 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2147 if (FAILED(hr)) return hr;
2149 if (strval_eq(reader, &prefix, &strval_xmlns))
2150 ns = TRUE;
2152 if (strval_eq(reader, &qname, &strval_xmlns))
2153 ns = nsdef = TRUE;
2155 hr = reader_parse_eq(reader);
2156 if (FAILED(hr)) return hr;
2158 hr = reader_parse_attvalue(reader, &value);
2159 if (FAILED(hr)) return hr;
2161 if (ns)
2162 reader_push_ns(reader, nsdef ? &strval_xmlns : &local, &value, nsdef);
2164 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2165 return reader_add_attr(reader, &prefix, &local, &value);
2168 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2169 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2170 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2172 HRESULT hr;
2174 hr = reader_parse_qname(reader, prefix, local, qname);
2175 if (FAILED(hr)) return hr;
2177 while (1)
2179 static const WCHAR endW[] = {'/','>',0};
2181 reader_skipspaces(reader);
2183 /* empty element */
2184 if ((*empty = !reader_cmp(reader, endW)))
2186 /* skip '/>' */
2187 reader_skipn(reader, 2);
2188 reader->is_empty_element = TRUE;
2189 reader->empty_element.prefix = *prefix;
2190 reader->empty_element.localname = *local;
2191 reader->empty_element.qname = *qname;
2192 reader_mark_ns_nodes(reader, &reader->empty_element);
2193 return S_OK;
2196 /* got a start tag */
2197 if (!reader_cmp(reader, gtW))
2199 /* skip '>' */
2200 reader_skipn(reader, 1);
2201 return reader_push_element(reader, prefix, local, qname);
2204 hr = reader_parse_attribute(reader);
2205 if (FAILED(hr)) return hr;
2208 return S_OK;
2211 /* [39] element ::= EmptyElemTag | STag content ETag */
2212 static HRESULT reader_parse_element(xmlreader *reader)
2214 HRESULT hr;
2216 switch (reader->resumestate)
2218 case XmlReadResumeState_Initial:
2219 /* check if we are really on element */
2220 if (reader_cmp(reader, ltW)) return S_FALSE;
2222 /* skip '<' */
2223 reader_skipn(reader, 1);
2225 reader_shrink(reader);
2226 reader->resumestate = XmlReadResumeState_STag;
2227 case XmlReadResumeState_STag:
2229 strval qname, prefix, local;
2230 int empty = 0;
2232 /* this handles empty elements too */
2233 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2234 if (FAILED(hr)) return hr;
2236 /* FIXME: need to check for defined namespace to reject invalid prefix,
2237 currently reject all prefixes */
2238 if (prefix.len) return NC_E_UNDECLAREDPREFIX;
2240 /* if we got empty element and stack is empty go straight to Misc */
2241 if (empty && list_empty(&reader->elements))
2242 reader->instate = XmlReadInState_MiscEnd;
2243 else
2244 reader->instate = XmlReadInState_Content;
2246 reader->nodetype = XmlNodeType_Element;
2247 reader->resumestate = XmlReadResumeState_Initial;
2248 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2249 reader_set_strvalue(reader, StringValue_LocalName, &local);
2250 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2251 break;
2253 default:
2254 hr = E_FAIL;
2257 return hr;
2260 /* [13 NS] ETag ::= '</' QName S? '>' */
2261 static HRESULT reader_parse_endtag(xmlreader *reader)
2263 strval prefix, local, qname;
2264 struct element *elem;
2265 HRESULT hr;
2267 /* skip '</' */
2268 reader_skipn(reader, 2);
2270 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2271 if (FAILED(hr)) return hr;
2273 reader_skipspaces(reader);
2275 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2277 /* skip '>' */
2278 reader_skipn(reader, 1);
2280 /* Element stack should never be empty at this point, cause we shouldn't get to
2281 content parsing if it's empty. */
2282 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2283 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2285 reader->nodetype = XmlNodeType_EndElement;
2286 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2287 reader_set_strvalue(reader, StringValue_LocalName, &local);
2288 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2290 return S_OK;
2293 /* [18] CDSect ::= CDStart CData CDEnd
2294 [19] CDStart ::= '<![CDATA['
2295 [20] CData ::= (Char* - (Char* ']]>' Char*))
2296 [21] CDEnd ::= ']]>' */
2297 static HRESULT reader_parse_cdata(xmlreader *reader)
2299 WCHAR *ptr;
2300 UINT start;
2302 if (reader->resumestate == XmlReadResumeState_CDATA)
2304 start = reader->resume[XmlReadResume_Body];
2305 ptr = reader_get_ptr(reader);
2307 else
2309 /* skip markup '<![CDATA[' */
2310 reader_skipn(reader, 9);
2311 reader_shrink(reader);
2312 ptr = reader_get_ptr(reader);
2313 start = reader_get_cur(reader);
2314 reader->nodetype = XmlNodeType_CDATA;
2315 reader->resume[XmlReadResume_Body] = start;
2316 reader->resumestate = XmlReadResumeState_CDATA;
2317 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2318 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2319 reader_set_strvalue(reader, StringValue_Value, NULL);
2322 while (*ptr)
2324 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2326 strval value;
2328 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2330 /* skip ']]>' */
2331 reader_skipn(reader, 3);
2332 TRACE("%s\n", debug_strval(reader, &value));
2334 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2335 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2336 reader_set_strvalue(reader, StringValue_Value, &value);
2337 reader->resume[XmlReadResume_Body] = 0;
2338 reader->resumestate = XmlReadResumeState_Initial;
2339 return S_OK;
2341 else
2343 /* Value normalization is not fully implemented, rules are:
2345 - single '\r' -> '\n';
2346 - sequence '\r\n' -> '\n', in this case value length changes;
2348 if (*ptr == '\r') *ptr = '\n';
2349 reader_skipn(reader, 1);
2350 ptr++;
2354 return S_OK;
2357 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2358 static HRESULT reader_parse_chardata(xmlreader *reader)
2360 WCHAR *ptr;
2361 UINT start;
2363 if (reader->resumestate == XmlReadResumeState_CharData)
2365 start = reader->resume[XmlReadResume_Body];
2366 ptr = reader_get_ptr(reader);
2368 else
2370 reader_shrink(reader);
2371 ptr = reader_get_ptr(reader);
2372 start = reader_get_cur(reader);
2373 /* There's no text */
2374 if (!*ptr || *ptr == '<') return S_OK;
2375 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2376 reader->resume[XmlReadResume_Body] = start;
2377 reader->resumestate = XmlReadResumeState_CharData;
2378 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2379 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2380 reader_set_strvalue(reader, StringValue_Value, NULL);
2383 while (*ptr)
2385 /* CDATA closing sequence ']]>' is not allowed */
2386 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2387 return WC_E_CDSECTEND;
2389 /* Found next markup part */
2390 if (ptr[0] == '<')
2392 strval value;
2394 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2395 reader_set_strvalue(reader, StringValue_Value, &value);
2396 reader->resume[XmlReadResume_Body] = 0;
2397 reader->resumestate = XmlReadResumeState_Initial;
2398 return S_OK;
2401 reader_skipn(reader, 1);
2403 /* this covers a case when text has leading whitespace chars */
2404 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2405 ptr++;
2408 return S_OK;
2411 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2412 static HRESULT reader_parse_content(xmlreader *reader)
2414 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2415 static const WCHAR etagW[] = {'<','/',0};
2416 static const WCHAR ampW[] = {'&',0};
2418 if (reader->resumestate != XmlReadResumeState_Initial)
2420 switch (reader->resumestate)
2422 case XmlReadResumeState_CDATA:
2423 return reader_parse_cdata(reader);
2424 case XmlReadResumeState_Comment:
2425 return reader_parse_comment(reader);
2426 case XmlReadResumeState_PIBody:
2427 case XmlReadResumeState_PITarget:
2428 return reader_parse_pi(reader);
2429 case XmlReadResumeState_CharData:
2430 return reader_parse_chardata(reader);
2431 default:
2432 ERR("unknown resume state %d\n", reader->resumestate);
2436 reader_shrink(reader);
2438 /* handle end tag here, it indicates end of content as well */
2439 if (!reader_cmp(reader, etagW))
2440 return reader_parse_endtag(reader);
2442 if (!reader_cmp(reader, commentW))
2443 return reader_parse_comment(reader);
2445 if (!reader_cmp(reader, piW))
2446 return reader_parse_pi(reader);
2448 if (!reader_cmp(reader, cdstartW))
2449 return reader_parse_cdata(reader);
2451 if (!reader_cmp(reader, ampW))
2452 return reader_parse_reference(reader);
2454 if (!reader_cmp(reader, ltW))
2455 return reader_parse_element(reader);
2457 /* what's left must be CharData */
2458 return reader_parse_chardata(reader);
2461 static HRESULT reader_parse_nextnode(xmlreader *reader)
2463 XmlNodeType nodetype = reader_get_nodetype(reader);
2464 HRESULT hr;
2466 if (!is_reader_pending(reader))
2467 reader_clear_attrs(reader);
2469 /* When moving from EndElement or empty element, pop its own namespace defitions */
2470 if (nodetype == XmlNodeType_Element && reader->is_empty_element)
2471 reader_pop_ns_nodes(reader, &reader->empty_element);
2472 else if (nodetype == XmlNodeType_EndElement)
2473 reader_pop_element(reader);
2475 while (1)
2477 switch (reader->instate)
2479 /* if it's a first call for a new input we need to detect stream encoding */
2480 case XmlReadInState_Initial:
2482 xml_encoding enc;
2484 hr = readerinput_growraw(reader->input);
2485 if (FAILED(hr)) return hr;
2487 /* try to detect encoding by BOM or data and set input code page */
2488 hr = readerinput_detectencoding(reader->input, &enc);
2489 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2490 if (FAILED(hr)) return hr;
2492 /* always switch first time cause we have to put something in */
2493 readerinput_switchencoding(reader->input, enc);
2495 /* parse xml declaration */
2496 hr = reader_parse_xmldecl(reader);
2497 if (FAILED(hr)) return hr;
2499 readerinput_shrinkraw(reader->input, -1);
2500 reader->instate = XmlReadInState_Misc_DTD;
2501 if (hr == S_OK) return hr;
2503 break;
2504 case XmlReadInState_Misc_DTD:
2505 hr = reader_parse_misc(reader);
2506 if (FAILED(hr)) return hr;
2508 if (hr == S_FALSE)
2509 reader->instate = XmlReadInState_DTD;
2510 else
2511 return hr;
2512 break;
2513 case XmlReadInState_DTD:
2514 hr = reader_parse_dtd(reader);
2515 if (FAILED(hr)) return hr;
2517 if (hr == S_OK)
2519 reader->instate = XmlReadInState_DTD_Misc;
2520 return hr;
2522 else
2523 reader->instate = XmlReadInState_Element;
2524 break;
2525 case XmlReadInState_DTD_Misc:
2526 hr = reader_parse_misc(reader);
2527 if (FAILED(hr)) return hr;
2529 if (hr == S_FALSE)
2530 reader->instate = XmlReadInState_Element;
2531 else
2532 return hr;
2533 break;
2534 case XmlReadInState_Element:
2535 return reader_parse_element(reader);
2536 case XmlReadInState_Content:
2537 return reader_parse_content(reader);
2538 case XmlReadInState_MiscEnd:
2539 hr = reader_parse_misc(reader);
2540 if (FAILED(hr)) return hr;
2542 if (hr == S_FALSE)
2543 reader->instate = XmlReadInState_Eof;
2544 return hr;
2545 case XmlReadInState_Eof:
2546 return S_FALSE;
2547 default:
2548 FIXME("internal state %d not handled\n", reader->instate);
2549 return E_NOTIMPL;
2553 return E_NOTIMPL;
2556 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2558 xmlreader *This = impl_from_IXmlReader(iface);
2560 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2562 if (IsEqualGUID(riid, &IID_IUnknown) ||
2563 IsEqualGUID(riid, &IID_IXmlReader))
2565 *ppvObject = iface;
2567 else
2569 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2570 *ppvObject = NULL;
2571 return E_NOINTERFACE;
2574 IXmlReader_AddRef(iface);
2576 return S_OK;
2579 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2581 xmlreader *This = impl_from_IXmlReader(iface);
2582 ULONG ref = InterlockedIncrement(&This->ref);
2583 TRACE("(%p)->(%d)\n", This, ref);
2584 return ref;
2587 static void reader_clear_ns(xmlreader *reader)
2589 struct ns *ns, *ns2;
2591 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2592 reader_free_strvalued(reader, &ns->prefix);
2593 reader_free_strvalued(reader, &ns->uri);
2594 reader_free(reader, ns);
2597 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2598 reader_free_strvalued(reader, &ns->uri);
2599 reader_free(reader, ns);
2603 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2605 xmlreader *This = impl_from_IXmlReader(iface);
2606 LONG ref = InterlockedDecrement(&This->ref);
2608 TRACE("(%p)->(%d)\n", This, ref);
2610 if (ref == 0)
2612 IMalloc *imalloc = This->imalloc;
2613 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2614 if (This->resolver) IXmlResolver_Release(This->resolver);
2615 if (This->mlang) IUnknown_Release(This->mlang);
2616 reader_clear_attrs(This);
2617 reader_clear_ns(This);
2618 reader_clear_elements(This);
2619 reader_free_strvalues(This);
2620 reader_free(This, This);
2621 if (imalloc) IMalloc_Release(imalloc);
2624 return ref;
2627 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2629 xmlreader *This = impl_from_IXmlReader(iface);
2630 IXmlReaderInput *readerinput;
2631 HRESULT hr;
2633 TRACE("(%p)->(%p)\n", This, input);
2635 if (This->input)
2637 readerinput_release_stream(This->input);
2638 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2639 This->input = NULL;
2642 This->line = This->pos = 0;
2643 reader_clear_elements(This);
2644 This->depth = 0;
2645 This->resumestate = XmlReadResumeState_Initial;
2646 memset(This->resume, 0, sizeof(This->resume));
2648 /* just reset current input */
2649 if (!input)
2651 This->state = XmlReadState_Initial;
2652 return S_OK;
2655 /* now try IXmlReaderInput, ISequentialStream, IStream */
2656 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2657 if (hr == S_OK)
2659 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2660 This->input = impl_from_IXmlReaderInput(readerinput);
2661 else
2663 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2664 readerinput, readerinput->lpVtbl);
2665 IUnknown_Release(readerinput);
2666 return E_FAIL;
2671 if (hr != S_OK || !readerinput)
2673 /* create IXmlReaderInput basing on supplied interface */
2674 hr = CreateXmlReaderInputWithEncodingName(input,
2675 This->imalloc, NULL, FALSE, NULL, &readerinput);
2676 if (hr != S_OK) return hr;
2677 This->input = impl_from_IXmlReaderInput(readerinput);
2680 /* set stream for supplied IXmlReaderInput */
2681 hr = readerinput_query_for_stream(This->input);
2682 if (hr == S_OK)
2684 This->state = XmlReadState_Initial;
2685 This->instate = XmlReadInState_Initial;
2688 return hr;
2691 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2693 xmlreader *This = impl_from_IXmlReader(iface);
2695 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2697 if (!value) return E_INVALIDARG;
2699 switch (property)
2701 case XmlReaderProperty_MultiLanguage:
2702 *value = (LONG_PTR)This->mlang;
2703 if (This->mlang)
2704 IUnknown_AddRef(This->mlang);
2705 break;
2706 case XmlReaderProperty_XmlResolver:
2707 *value = (LONG_PTR)This->resolver;
2708 if (This->resolver)
2709 IXmlResolver_AddRef(This->resolver);
2710 break;
2711 case XmlReaderProperty_DtdProcessing:
2712 *value = This->dtdmode;
2713 break;
2714 case XmlReaderProperty_ReadState:
2715 *value = This->state;
2716 break;
2717 default:
2718 FIXME("Unimplemented property (%u)\n", property);
2719 return E_NOTIMPL;
2722 return S_OK;
2725 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2727 xmlreader *This = impl_from_IXmlReader(iface);
2729 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2731 switch (property)
2733 case XmlReaderProperty_MultiLanguage:
2734 if (This->mlang)
2735 IUnknown_Release(This->mlang);
2736 This->mlang = (IUnknown*)value;
2737 if (This->mlang)
2738 IUnknown_AddRef(This->mlang);
2739 if (This->mlang)
2740 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2741 break;
2742 case XmlReaderProperty_XmlResolver:
2743 if (This->resolver)
2744 IXmlResolver_Release(This->resolver);
2745 This->resolver = (IXmlResolver*)value;
2746 if (This->resolver)
2747 IXmlResolver_AddRef(This->resolver);
2748 break;
2749 case XmlReaderProperty_DtdProcessing:
2750 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2751 This->dtdmode = value;
2752 break;
2753 case XmlReaderProperty_MaxElementDepth:
2754 FIXME("Ignoring MaxElementDepth %ld\n", value);
2755 break;
2756 default:
2757 FIXME("Unimplemented property (%u)\n", property);
2758 return E_NOTIMPL;
2761 return S_OK;
2764 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2766 xmlreader *This = impl_from_IXmlReader(iface);
2767 XmlNodeType oldtype = This->nodetype;
2768 HRESULT hr;
2770 TRACE("(%p)->(%p)\n", This, nodetype);
2772 if (This->state == XmlReadState_Closed) return S_FALSE;
2774 hr = reader_parse_nextnode(This);
2775 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2776 This->state = XmlReadState_Interactive;
2777 if (hr == S_OK)
2779 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2780 *nodetype = This->nodetype;
2783 return hr;
2786 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2788 xmlreader *This = impl_from_IXmlReader(iface);
2789 TRACE("(%p)->(%p)\n", This, node_type);
2791 *node_type = reader_get_nodetype(This);
2792 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2795 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2797 if (!reader->attr_count)
2798 return S_FALSE;
2800 reader->attr = LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry);
2801 reader_set_strvalue(reader, StringValue_Prefix, &reader->attr->prefix);
2802 reader_set_strvalue(reader, StringValue_LocalName, &reader->attr->localname);
2803 reader_set_strvalue(reader, StringValue_Value, &reader->attr->value);
2805 return S_OK;
2808 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2810 xmlreader *This = impl_from_IXmlReader(iface);
2812 TRACE("(%p)\n", This);
2814 return reader_move_to_first_attribute(This);
2817 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2819 xmlreader *This = impl_from_IXmlReader(iface);
2820 const struct list *next;
2822 TRACE("(%p)\n", This);
2824 if (!This->attr_count) return S_FALSE;
2826 if (!This->attr)
2827 return reader_move_to_first_attribute(This);
2829 next = list_next(&This->attrs, &This->attr->entry);
2830 if (next)
2832 This->attr = LIST_ENTRY(next, struct attribute, entry);
2833 reader_set_strvalue(This, StringValue_Prefix, &This->attr->prefix);
2834 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2835 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2838 return next ? S_OK : S_FALSE;
2841 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2842 LPCWSTR local_name,
2843 LPCWSTR namespaceUri)
2845 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2846 return E_NOTIMPL;
2849 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2851 xmlreader *This = impl_from_IXmlReader(iface);
2853 TRACE("(%p)\n", This);
2855 if (!This->attr_count) return S_FALSE;
2856 This->attr = NULL;
2858 /* FIXME: support other node types with 'attributes' like DTD */
2859 if (This->is_empty_element) {
2860 reader_set_strvalue(This, StringValue_LocalName, &This->empty_element.localname);
2861 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
2863 else {
2864 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2865 if (element) {
2866 reader_set_strvalue(This, StringValue_LocalName, &element->localname);
2867 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
2871 return S_OK;
2874 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2876 xmlreader *This = impl_from_IXmlReader(iface);
2878 TRACE("(%p)->(%p %p)\n", This, name, len);
2879 *name = This->strvalues[StringValue_QualifiedName].str;
2880 if (len) *len = This->strvalues[StringValue_QualifiedName].len;
2881 return S_OK;
2884 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface,
2885 LPCWSTR *namespaceUri,
2886 UINT *namespaceUri_length)
2888 FIXME("(%p %p %p): stub\n", iface, namespaceUri, namespaceUri_length);
2889 return E_NOTIMPL;
2892 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2894 xmlreader *This = impl_from_IXmlReader(iface);
2896 TRACE("(%p)->(%p %p)\n", This, name, len);
2897 *name = This->strvalues[StringValue_LocalName].str;
2898 if (len) *len = This->strvalues[StringValue_LocalName].len;
2899 return S_OK;
2902 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2904 xmlreader *This = impl_from_IXmlReader(iface);
2906 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2907 *prefix = This->strvalues[StringValue_Prefix].str;
2908 if (len) *len = This->strvalues[StringValue_Prefix].len;
2909 return S_OK;
2912 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
2914 xmlreader *reader = impl_from_IXmlReader(iface);
2915 strval *val = &reader->strvalues[StringValue_Value];
2917 TRACE("(%p)->(%p %p)\n", reader, value, len);
2919 *value = NULL;
2921 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
2923 XmlNodeType type;
2924 HRESULT hr;
2926 hr = IXmlReader_Read(iface, &type);
2927 if (FAILED(hr)) return hr;
2929 /* return if still pending, partially read values are not reported */
2930 if (is_reader_pending(reader)) return E_PENDING;
2933 if (!val->str)
2935 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
2936 if (!ptr) return E_OUTOFMEMORY;
2937 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
2938 ptr[val->len] = 0;
2939 val->str = ptr;
2942 *value = val->str;
2943 if (len) *len = val->len;
2944 return S_OK;
2947 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
2949 xmlreader *reader = impl_from_IXmlReader(iface);
2950 strval *val = &reader->strvalues[StringValue_Value];
2951 UINT len;
2953 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
2955 /* Value is already allocated, chunked reads are not possible. */
2956 if (val->str) return S_FALSE;
2958 if (val->len)
2960 len = min(chunk_size, val->len);
2961 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
2962 val->start += len;
2963 val->len -= len;
2964 if (read) *read = len;
2967 return S_OK;
2970 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
2971 LPCWSTR *baseUri,
2972 UINT *baseUri_length)
2974 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
2975 return E_NOTIMPL;
2978 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
2980 FIXME("(%p): stub\n", iface);
2981 return FALSE;
2984 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
2986 xmlreader *This = impl_from_IXmlReader(iface);
2987 TRACE("(%p)\n", This);
2988 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2989 when current node is start tag of an element */
2990 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
2993 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
2995 xmlreader *This = impl_from_IXmlReader(iface);
2997 TRACE("(%p %p)\n", This, lineNumber);
2999 if (!lineNumber) return E_INVALIDARG;
3001 *lineNumber = This->line;
3003 return S_OK;
3006 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
3008 xmlreader *This = impl_from_IXmlReader(iface);
3010 TRACE("(%p %p)\n", This, linePosition);
3012 if (!linePosition) return E_INVALIDARG;
3014 *linePosition = This->pos;
3016 return S_OK;
3019 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3021 xmlreader *This = impl_from_IXmlReader(iface);
3023 TRACE("(%p)->(%p)\n", This, count);
3025 if (!count) return E_INVALIDARG;
3027 *count = This->attr_count;
3028 return S_OK;
3031 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3033 xmlreader *This = impl_from_IXmlReader(iface);
3034 TRACE("(%p)->(%p)\n", This, depth);
3035 *depth = This->depth;
3036 return S_OK;
3039 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3041 FIXME("(%p): stub\n", iface);
3042 return FALSE;
3045 static const struct IXmlReaderVtbl xmlreader_vtbl =
3047 xmlreader_QueryInterface,
3048 xmlreader_AddRef,
3049 xmlreader_Release,
3050 xmlreader_SetInput,
3051 xmlreader_GetProperty,
3052 xmlreader_SetProperty,
3053 xmlreader_Read,
3054 xmlreader_GetNodeType,
3055 xmlreader_MoveToFirstAttribute,
3056 xmlreader_MoveToNextAttribute,
3057 xmlreader_MoveToAttributeByName,
3058 xmlreader_MoveToElement,
3059 xmlreader_GetQualifiedName,
3060 xmlreader_GetNamespaceUri,
3061 xmlreader_GetLocalName,
3062 xmlreader_GetPrefix,
3063 xmlreader_GetValue,
3064 xmlreader_ReadValueChunk,
3065 xmlreader_GetBaseUri,
3066 xmlreader_IsDefault,
3067 xmlreader_IsEmptyElement,
3068 xmlreader_GetLineNumber,
3069 xmlreader_GetLinePosition,
3070 xmlreader_GetAttributeCount,
3071 xmlreader_GetDepth,
3072 xmlreader_IsEOF
3075 /** IXmlReaderInput **/
3076 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3078 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3080 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3082 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3083 IsEqualGUID(riid, &IID_IUnknown))
3085 *ppvObject = iface;
3087 else
3089 WARN("interface %s not implemented\n", debugstr_guid(riid));
3090 *ppvObject = NULL;
3091 return E_NOINTERFACE;
3094 IUnknown_AddRef(iface);
3096 return S_OK;
3099 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3101 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3102 ULONG ref = InterlockedIncrement(&This->ref);
3103 TRACE("(%p)->(%d)\n", This, ref);
3104 return ref;
3107 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3109 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3110 LONG ref = InterlockedDecrement(&This->ref);
3112 TRACE("(%p)->(%d)\n", This, ref);
3114 if (ref == 0)
3116 IMalloc *imalloc = This->imalloc;
3117 if (This->input) IUnknown_Release(This->input);
3118 if (This->stream) ISequentialStream_Release(This->stream);
3119 if (This->buffer) free_input_buffer(This->buffer);
3120 readerinput_free(This, This->baseuri);
3121 readerinput_free(This, This);
3122 if (imalloc) IMalloc_Release(imalloc);
3125 return ref;
3128 static const struct IUnknownVtbl xmlreaderinputvtbl =
3130 xmlreaderinput_QueryInterface,
3131 xmlreaderinput_AddRef,
3132 xmlreaderinput_Release
3135 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3137 xmlreader *reader;
3138 int i;
3140 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3142 if (!IsEqualGUID(riid, &IID_IXmlReader))
3144 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
3145 return E_FAIL;
3148 if (imalloc)
3149 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3150 else
3151 reader = heap_alloc(sizeof(*reader));
3152 if(!reader) return E_OUTOFMEMORY;
3154 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3155 reader->ref = 1;
3156 reader->input = NULL;
3157 reader->state = XmlReadState_Closed;
3158 reader->instate = XmlReadInState_Initial;
3159 reader->resumestate = XmlReadResumeState_Initial;
3160 reader->dtdmode = DtdProcessing_Prohibit;
3161 reader->resolver = NULL;
3162 reader->mlang = NULL;
3163 reader->line = reader->pos = 0;
3164 reader->imalloc = imalloc;
3165 if (imalloc) IMalloc_AddRef(imalloc);
3166 reader->nodetype = XmlNodeType_None;
3167 list_init(&reader->attrs);
3168 reader->attr_count = 0;
3169 reader->attr = NULL;
3170 list_init(&reader->nsdef);
3171 list_init(&reader->ns);
3172 list_init(&reader->elements);
3173 reader->depth = 0;
3174 reader->max_depth = 256;
3175 reader->is_empty_element = FALSE;
3176 memset(reader->resume, 0, sizeof(reader->resume));
3178 for (i = 0; i < StringValue_Last; i++)
3179 reader->strvalues[i] = strval_empty;
3181 *obj = &reader->IXmlReader_iface;
3183 TRACE("returning iface %p\n", *obj);
3185 return S_OK;
3188 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3189 IMalloc *imalloc,
3190 LPCWSTR encoding,
3191 BOOL hint,
3192 LPCWSTR base_uri,
3193 IXmlReaderInput **ppInput)
3195 xmlreaderinput *readerinput;
3196 HRESULT hr;
3198 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3199 hint, wine_dbgstr_w(base_uri), ppInput);
3201 if (!stream || !ppInput) return E_INVALIDARG;
3203 if (imalloc)
3204 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3205 else
3206 readerinput = heap_alloc(sizeof(*readerinput));
3207 if(!readerinput) return E_OUTOFMEMORY;
3209 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3210 readerinput->ref = 1;
3211 readerinput->imalloc = imalloc;
3212 readerinput->stream = NULL;
3213 if (imalloc) IMalloc_AddRef(imalloc);
3214 readerinput->encoding = parse_encoding_name(encoding, -1);
3215 readerinput->hint = hint;
3216 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3217 readerinput->pending = 0;
3219 hr = alloc_input_buffer(readerinput);
3220 if (hr != S_OK)
3222 readerinput_free(readerinput, readerinput->baseuri);
3223 readerinput_free(readerinput, readerinput);
3224 if (imalloc) IMalloc_Release(imalloc);
3225 return hr;
3227 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3229 *ppInput = &readerinput->IXmlReaderInput_iface;
3231 TRACE("returning iface %p\n", *ppInput);
3233 return S_OK;