xmllite: Drop superfluous casts to self.
[wine.git] / dlls / xmllite / reader.c
blob13d841eb94d52c4244ffb2f9dce663c7828090a5
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "initguid.h"
29 #include "objbase.h"
30 #include "xmllite.h"
31 #include "xmllite_private.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 BOOL is_namestartchar(WCHAR ch);
88 static const char *debugstr_nodetype(XmlNodeType nodetype)
90 static const char * const type_names[] =
92 "None",
93 "Element",
94 "Attribute",
95 "Text",
96 "CDATA",
97 "",
98 "",
99 "ProcessingInstruction",
100 "Comment",
102 "DocumentType",
105 "Whitespace",
107 "EndElement",
109 "XmlDeclaration"
112 if (nodetype > _XmlNodeType_Last)
113 return wine_dbg_sprintf("unknown type=%d", nodetype);
115 return type_names[nodetype];
118 static const char *debugstr_reader_prop(XmlReaderProperty prop)
120 static const char * const prop_names[] =
122 "MultiLanguage",
123 "ConformanceLevel",
124 "RandomAccess",
125 "XmlResolver",
126 "DtdProcessing",
127 "ReadState",
128 "MaxElementDepth",
129 "MaxEntityExpansion"
132 if (prop > _XmlReaderProperty_Last)
133 return wine_dbg_sprintf("unknown property=%d", prop);
135 return prop_names[prop];
138 struct xml_encoding_data
140 const WCHAR *name;
141 xml_encoding enc;
142 UINT cp;
145 static const struct xml_encoding_data xml_encoding_map[] =
147 { L"US-ASCII", XmlEncoding_USASCII, 20127 },
148 { L"UTF-16", XmlEncoding_UTF16, 1200 },
149 { L"UTF-8", XmlEncoding_UTF8, CP_UTF8 },
152 const WCHAR *get_encoding_name(xml_encoding encoding)
154 return xml_encoding_map[encoding].name;
157 xml_encoding get_encoding_from_codepage(UINT codepage)
159 int i;
160 for (i = 0; i < ARRAY_SIZE(xml_encoding_map); i++)
162 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
164 return XmlEncoding_Unknown;
167 typedef struct
169 char *data;
170 UINT cur;
171 unsigned int allocated;
172 unsigned int written;
173 BOOL prev_cr;
174 } encoded_buffer;
176 typedef struct input_buffer input_buffer;
178 typedef struct
180 IXmlReaderInput IXmlReaderInput_iface;
181 LONG ref;
182 /* reference passed on IXmlReaderInput creation, is kept when input is created */
183 IUnknown *input;
184 IMalloc *imalloc;
185 xml_encoding encoding;
186 BOOL hint;
187 WCHAR *baseuri;
188 /* stream reference set after SetInput() call from reader,
189 stored as sequential stream, cause currently
190 optimizations possible with IStream aren't implemented */
191 ISequentialStream *stream;
192 input_buffer *buffer;
193 unsigned int pending : 1;
194 } xmlreaderinput;
196 static const struct IUnknownVtbl xmlreaderinputvtbl;
198 /* Structure to hold parsed string of specific length.
200 Reader stores node value as 'start' pointer, on request
201 a null-terminated version of it is allocated.
203 To init a strval variable use reader_init_strval(),
204 to set strval as a reader value use reader_set_strval().
206 typedef struct
208 WCHAR *str; /* allocated null-terminated string */
209 UINT len; /* length in WCHARs, altered after ReadValueChunk */
210 UINT start; /* input position where value starts */
211 } strval;
213 static WCHAR emptyW[] = L"";
214 static WCHAR xmlW[] = L"xml";
215 static WCHAR xmlnsW[] = L"xmlns";
216 static const strval strval_empty = { emptyW, 0 };
217 static const strval strval_xml = { xmlW, 3 };
218 static const strval strval_xmlns = { xmlnsW, 5 };
220 struct reader_position
222 UINT line_number;
223 UINT line_position;
226 enum attribute_flags
228 ATTRIBUTE_NS_DEFINITION = 0x1,
229 ATTRIBUTE_DEFAULT_NS_DEFINITION = 0x2,
232 struct attribute
234 struct list entry;
235 strval prefix;
236 strval localname;
237 strval qname;
238 strval value;
239 struct reader_position position;
240 unsigned int flags;
243 struct element
245 struct list entry;
246 strval prefix;
247 strval localname;
248 strval qname;
249 struct reader_position position;
252 struct ns
254 struct list entry;
255 strval prefix;
256 strval uri;
257 struct element *element;
260 typedef struct
262 IXmlReader IXmlReader_iface;
263 LONG ref;
264 xmlreaderinput *input;
265 IMalloc *imalloc;
266 XmlReadState state;
267 HRESULT error; /* error set on XmlReadState_Error */
268 XmlReaderInternalState instate;
269 XmlReaderResumeState resumestate;
270 XmlNodeType nodetype;
271 DtdProcessing dtdmode;
272 IXmlResolver *resolver;
273 IUnknown *mlang;
274 struct reader_position position;
275 struct list attrs; /* attributes list for current node */
276 struct attribute *attr; /* current attribute */
277 UINT attr_count;
278 struct list nsdef;
279 struct list ns;
280 struct list elements;
281 int chunk_read_off;
282 strval strvalues[StringValue_Last];
283 UINT depth;
284 UINT max_depth;
285 BOOL is_empty_element;
286 struct element empty_element; /* used for empty elements without end tag <a />,
287 and to keep <?xml reader position */
288 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
289 } xmlreader;
291 struct input_buffer
293 encoded_buffer utf16;
294 encoded_buffer encoded;
295 UINT code_page;
296 xmlreaderinput *input;
299 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
301 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
304 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
306 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
309 /* reader memory allocation functions */
310 static inline void *reader_alloc(xmlreader *reader, size_t len)
312 return m_alloc(reader->imalloc, len);
315 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
317 void *ret = reader_alloc(reader, len);
318 if (ret)
319 memset(ret, 0, len);
320 return ret;
323 static inline void reader_free(xmlreader *reader, void *mem)
325 m_free(reader->imalloc, mem);
328 /* Just return pointer from offset, no attempt to read more. */
329 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
331 encoded_buffer *buffer = &reader->input->buffer->utf16;
332 return (WCHAR*)buffer->data + offset;
335 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
337 return v->str ? v->str : reader_get_ptr2(reader, v->start);
340 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
342 *dest = *src;
344 if (src->str != strval_empty.str)
346 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
347 if (!dest->str) return E_OUTOFMEMORY;
348 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
349 dest->str[dest->len] = 0;
350 dest->start = 0;
353 return S_OK;
356 /* reader input memory allocation functions */
357 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
359 return m_alloc(input->imalloc, len);
362 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
364 return m_realloc(input->imalloc, mem, len);
367 static inline void readerinput_free(xmlreaderinput *input, void *mem)
369 m_free(input->imalloc, mem);
372 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
374 LPWSTR ret = NULL;
376 if(str) {
377 DWORD size;
379 size = (lstrlenW(str)+1)*sizeof(WCHAR);
380 ret = readerinput_alloc(input, size);
381 if (ret) memcpy(ret, str, size);
384 return ret;
387 /* This one frees stored string value if needed */
388 static void reader_free_strvalued(xmlreader *reader, strval *v)
390 if (v->str != strval_empty.str)
392 reader_free(reader, v->str);
393 *v = strval_empty;
397 static void reader_clear_attrs(xmlreader *reader)
399 struct attribute *attr, *attr2;
400 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
402 reader_free_strvalued(reader, &attr->localname);
403 reader_free_strvalued(reader, &attr->value);
404 reader_free(reader, attr);
406 list_init(&reader->attrs);
407 reader->attr_count = 0;
408 reader->attr = NULL;
411 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
412 while we are on a node with attributes */
413 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname,
414 strval *value, const struct reader_position *position, unsigned int flags)
416 struct attribute *attr;
417 HRESULT hr;
419 attr = reader_alloc(reader, sizeof(*attr));
420 if (!attr) return E_OUTOFMEMORY;
422 hr = reader_strvaldup(reader, localname, &attr->localname);
423 if (hr == S_OK)
425 hr = reader_strvaldup(reader, value, &attr->value);
426 if (hr != S_OK)
427 reader_free_strvalued(reader, &attr->value);
429 if (hr != S_OK)
431 reader_free(reader, attr);
432 return hr;
435 if (prefix)
436 attr->prefix = *prefix;
437 else
438 memset(&attr->prefix, 0, sizeof(attr->prefix));
439 attr->qname = qname ? *qname : *localname;
440 attr->position = *position;
441 attr->flags = flags;
442 list_add_tail(&reader->attrs, &attr->entry);
443 reader->attr_count++;
445 return S_OK;
448 /* Returns current element, doesn't check if reader is actually positioned on it. */
449 static struct element *reader_get_element(xmlreader *reader)
451 if (reader->is_empty_element)
452 return &reader->empty_element;
454 return LIST_ENTRY(list_head(&reader->elements), struct element, entry);
457 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
459 v->start = start;
460 v->len = len;
461 v->str = NULL;
464 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
466 return debugstr_wn(reader_get_strptr(reader, v), v->len);
469 /* used to initialize from constant string */
470 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
472 v->start = 0;
473 v->len = len;
474 v->str = str;
477 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
479 reader_free_strvalued(reader, &reader->strvalues[type]);
482 static void reader_free_strvalues(xmlreader *reader)
484 int type;
485 for (type = 0; type < StringValue_Last; type++)
486 reader_free_strvalue(reader, type);
489 /* This helper should only be used to test if strings are the same,
490 it doesn't try to sort. */
491 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
493 if (str1->len != str2->len) return 0;
494 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
497 static void reader_clear_elements(xmlreader *reader)
499 struct element *elem, *elem2;
500 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
502 reader_free_strvalued(reader, &elem->prefix);
503 reader_free_strvalued(reader, &elem->localname);
504 reader_free_strvalued(reader, &elem->qname);
505 reader_free(reader, elem);
507 list_init(&reader->elements);
508 reader_free_strvalued(reader, &reader->empty_element.localname);
509 reader_free_strvalued(reader, &reader->empty_element.qname);
510 reader->is_empty_element = FALSE;
513 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
515 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
516 struct ns *ns;
518 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
519 if (strval_eq(reader, prefix, &ns->prefix))
520 return ns;
523 return NULL;
526 static HRESULT reader_inc_depth(xmlreader *reader)
528 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
531 static void reader_dec_depth(xmlreader *reader)
533 if (reader->depth)
534 reader->depth--;
537 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
539 struct ns *ns;
540 HRESULT hr;
542 ns = reader_alloc(reader, sizeof(*ns));
543 if (!ns) return E_OUTOFMEMORY;
545 if (def)
546 memset(&ns->prefix, 0, sizeof(ns->prefix));
547 else {
548 hr = reader_strvaldup(reader, prefix, &ns->prefix);
549 if (FAILED(hr)) {
550 reader_free(reader, ns);
551 return hr;
555 hr = reader_strvaldup(reader, uri, &ns->uri);
556 if (FAILED(hr)) {
557 reader_free_strvalued(reader, &ns->prefix);
558 reader_free(reader, ns);
559 return hr;
562 ns->element = NULL;
563 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
564 return hr;
567 static void reader_free_element(xmlreader *reader, struct element *element)
569 reader_free_strvalued(reader, &element->prefix);
570 reader_free_strvalued(reader, &element->localname);
571 reader_free_strvalued(reader, &element->qname);
572 reader_free(reader, element);
575 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
577 struct ns *ns;
579 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
580 if (ns->element)
581 break;
582 ns->element = element;
585 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
586 if (ns->element)
587 break;
588 ns->element = element;
592 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
593 strval *qname, const struct reader_position *position)
595 struct element *element;
596 HRESULT hr;
598 element = reader_alloc_zero(reader, sizeof(*element));
599 if (!element)
600 return E_OUTOFMEMORY;
602 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK &&
603 (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK &&
604 (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK)
606 list_add_head(&reader->elements, &element->entry);
607 reader_mark_ns_nodes(reader, element);
608 reader->is_empty_element = FALSE;
609 element->position = *position;
611 else
612 reader_free_element(reader, element);
614 return hr;
617 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
619 struct ns *ns, *ns2;
621 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
622 if (ns->element != element)
623 break;
625 list_remove(&ns->entry);
626 reader_free_strvalued(reader, &ns->prefix);
627 reader_free_strvalued(reader, &ns->uri);
628 reader_free(reader, ns);
631 if (!list_empty(&reader->nsdef)) {
632 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
633 if (ns->element == element) {
634 list_remove(&ns->entry);
635 reader_free_strvalued(reader, &ns->prefix);
636 reader_free_strvalued(reader, &ns->uri);
637 reader_free(reader, ns);
642 static void reader_pop_element(xmlreader *reader)
644 struct element *element;
646 if (list_empty(&reader->elements))
647 return;
649 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
650 list_remove(&element->entry);
652 reader_pop_ns_nodes(reader, element);
653 reader_free_element(reader, element);
655 /* It was a root element, the rest is expected as Misc */
656 if (list_empty(&reader->elements))
657 reader->instate = XmlReadInState_MiscEnd;
660 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
661 means node value is to be determined. */
662 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
664 strval *v = &reader->strvalues[type];
666 reader_free_strvalue(reader, type);
667 if (!value)
669 v->str = NULL;
670 v->start = 0;
671 v->len = 0;
672 return;
675 if (value->str == strval_empty.str)
676 *v = *value;
677 else
679 if (type == StringValue_Value)
681 /* defer allocation for value string */
682 v->str = NULL;
683 v->start = value->start;
684 v->len = value->len;
686 else
688 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
689 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
690 v->str[value->len] = 0;
691 v->len = value->len;
696 static inline int is_reader_pending(xmlreader *reader)
698 return reader->input->pending;
701 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
703 const int initial_len = 0x2000;
704 buffer->data = readerinput_alloc(input, initial_len);
705 if (!buffer->data) return E_OUTOFMEMORY;
707 memset(buffer->data, 0, 4);
708 buffer->cur = 0;
709 buffer->allocated = initial_len;
710 buffer->written = 0;
711 buffer->prev_cr = FALSE;
713 return S_OK;
716 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
718 readerinput_free(input, buffer->data);
721 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
723 if (encoding == XmlEncoding_Unknown)
725 FIXME("unsupported encoding %d\n", encoding);
726 return E_NOTIMPL;
729 *cp = xml_encoding_map[encoding].cp;
731 return S_OK;
734 xml_encoding parse_encoding_name(const WCHAR *name, int len)
736 int min, max, n, c;
738 if (!name) return XmlEncoding_Unknown;
740 min = 0;
741 max = ARRAY_SIZE(xml_encoding_map) - 1;
743 while (min <= max)
745 n = (min+max)/2;
747 if (len != -1)
748 c = wcsnicmp(xml_encoding_map[n].name, name, len);
749 else
750 c = wcsicmp(xml_encoding_map[n].name, name);
751 if (!c)
752 return xml_encoding_map[n].enc;
754 if (c > 0)
755 max = n-1;
756 else
757 min = n+1;
760 return XmlEncoding_Unknown;
763 static HRESULT alloc_input_buffer(xmlreaderinput *input)
765 input_buffer *buffer;
766 HRESULT hr;
768 input->buffer = NULL;
770 buffer = readerinput_alloc(input, sizeof(*buffer));
771 if (!buffer) return E_OUTOFMEMORY;
773 buffer->input = input;
774 buffer->code_page = ~0; /* code page is unknown at this point */
775 hr = init_encoded_buffer(input, &buffer->utf16);
776 if (hr != S_OK) {
777 readerinput_free(input, buffer);
778 return hr;
781 hr = init_encoded_buffer(input, &buffer->encoded);
782 if (hr != S_OK) {
783 free_encoded_buffer(input, &buffer->utf16);
784 readerinput_free(input, buffer);
785 return hr;
788 input->buffer = buffer;
789 return S_OK;
792 static void free_input_buffer(input_buffer *buffer)
794 free_encoded_buffer(buffer->input, &buffer->encoded);
795 free_encoded_buffer(buffer->input, &buffer->utf16);
796 readerinput_free(buffer->input, buffer);
799 static void readerinput_release_stream(xmlreaderinput *readerinput)
801 if (readerinput->stream) {
802 ISequentialStream_Release(readerinput->stream);
803 readerinput->stream = NULL;
807 /* Queries already stored interface for IStream/ISequentialStream.
808 Interface supplied on creation will be overwritten */
809 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
811 HRESULT hr;
813 readerinput_release_stream(readerinput);
814 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
815 if (hr != S_OK)
816 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
818 return hr;
821 /* reads a chunk to raw buffer */
822 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
824 encoded_buffer *buffer = &readerinput->buffer->encoded;
825 /* to make sure aligned length won't exceed allocated length */
826 ULONG len = buffer->allocated - buffer->written - 4;
827 ULONG read;
828 HRESULT hr;
830 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
831 variable width encodings like UTF-8 */
832 len = (len + 3) & ~3;
833 /* try to use allocated space or grow */
834 if (buffer->allocated - buffer->written < len)
836 buffer->allocated *= 2;
837 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
838 len = buffer->allocated - buffer->written;
841 read = 0;
842 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
843 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
844 readerinput->pending = hr == E_PENDING;
845 if (FAILED(hr)) return hr;
846 buffer->written += read;
848 return hr;
851 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
852 static void readerinput_grow(xmlreaderinput *readerinput, int length)
854 encoded_buffer *buffer = &readerinput->buffer->utf16;
856 length *= sizeof(WCHAR);
857 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
858 if (buffer->allocated < buffer->written + length + 4)
860 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
861 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
862 buffer->allocated = grown_size;
866 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
868 static const char startA[] = {'<','?'};
869 static const char commentA[] = {'<','!'};
870 encoded_buffer *buffer = &readerinput->buffer->encoded;
871 unsigned char *ptr = (unsigned char*)buffer->data;
873 return !memcmp(buffer->data, startA, sizeof(startA)) ||
874 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
875 /* test start byte */
876 (ptr[0] == '<' &&
878 (ptr[1] && (ptr[1] <= 0x7f)) ||
879 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
880 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
881 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
885 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
887 encoded_buffer *buffer = &readerinput->buffer->encoded;
888 static const char utf8bom[] = {0xef,0xbb,0xbf};
889 static const char utf16lebom[] = {0xff,0xfe};
890 WCHAR *ptrW;
892 *enc = XmlEncoding_Unknown;
894 if (buffer->written <= 3)
896 HRESULT hr = readerinput_growraw(readerinput);
897 if (FAILED(hr)) return hr;
898 if (buffer->written < 3) return MX_E_INPUTEND;
901 ptrW = (WCHAR *)buffer->data;
902 /* try start symbols if we have enough data to do that, input buffer should contain
903 first chunk already */
904 if (readerinput_is_utf8(readerinput))
905 *enc = XmlEncoding_UTF8;
906 else if (*ptrW == '<')
908 ptrW++;
909 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
910 *enc = XmlEncoding_UTF16;
912 /* try with BOM now */
913 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
915 buffer->cur += sizeof(utf8bom);
916 *enc = XmlEncoding_UTF8;
918 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
920 buffer->cur += sizeof(utf16lebom);
921 *enc = XmlEncoding_UTF16;
924 return S_OK;
927 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
929 encoded_buffer *buffer = &readerinput->buffer->encoded;
930 int len = buffer->written;
932 /* complete single byte char */
933 if (!(buffer->data[len-1] & 0x80)) return len;
935 /* find start byte of multibyte char */
936 while (--len && !(buffer->data[len] & 0xc0))
939 return len;
942 /* Returns byte length of complete char sequence for buffer code page,
943 it's relative to current buffer position which is currently used for BOM handling
944 only. */
945 static int readerinput_get_convlen(xmlreaderinput *readerinput)
947 encoded_buffer *buffer = &readerinput->buffer->encoded;
948 int len;
950 if (readerinput->buffer->code_page == CP_UTF8)
951 len = readerinput_get_utf8_convlen(readerinput);
952 else
953 len = buffer->written;
955 TRACE("%d\n", len - buffer->cur);
956 return len - buffer->cur;
959 /* It's possible that raw buffer has some leftovers from last conversion - some char
960 sequence that doesn't represent a full code point. Length argument should be calculated with
961 readerinput_get_convlen(), if it's -1 it will be calculated here. */
962 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
964 encoded_buffer *buffer = &readerinput->buffer->encoded;
966 if (len == -1)
967 len = readerinput_get_convlen(readerinput);
969 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
970 /* everything below cur is lost too */
971 buffer->written -= len + buffer->cur;
972 /* after this point we don't need cur offset really,
973 it's used only to mark where actual data begins when first chunk is read */
974 buffer->cur = 0;
977 static void fixup_buffer_cr(encoded_buffer *buffer, int off)
979 BOOL prev_cr = buffer->prev_cr;
980 const WCHAR *src;
981 WCHAR *dest;
983 src = dest = (WCHAR*)buffer->data + off;
984 while ((const char*)src < buffer->data + buffer->written)
986 if (*src == '\r')
988 *dest++ = '\n';
989 src++;
990 prev_cr = TRUE;
991 continue;
993 if(prev_cr && *src == '\n')
994 src++;
995 else
996 *dest++ = *src++;
997 prev_cr = FALSE;
1000 buffer->written = (char*)dest - buffer->data;
1001 buffer->prev_cr = prev_cr;
1002 *dest = 0;
1005 /* note that raw buffer content is kept */
1006 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
1008 encoded_buffer *src = &readerinput->buffer->encoded;
1009 encoded_buffer *dest = &readerinput->buffer->utf16;
1010 int len, dest_len;
1011 UINT cp = ~0u;
1012 HRESULT hr;
1013 WCHAR *ptr;
1015 hr = get_code_page(enc, &cp);
1016 if (FAILED(hr)) return;
1018 readerinput->buffer->code_page = cp;
1019 len = readerinput_get_convlen(readerinput);
1021 TRACE("switching to cp %d\n", cp);
1023 /* just copy in this case */
1024 if (enc == XmlEncoding_UTF16)
1026 readerinput_grow(readerinput, len);
1027 memcpy(dest->data, src->data + src->cur, len);
1028 dest->written += len*sizeof(WCHAR);
1030 else
1032 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1033 readerinput_grow(readerinput, dest_len);
1034 ptr = (WCHAR*)dest->data;
1035 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1036 ptr[dest_len] = 0;
1037 dest->written += dest_len*sizeof(WCHAR);
1040 fixup_buffer_cr(dest, 0);
1043 /* shrinks parsed data a buffer begins with */
1044 static void reader_shrink(xmlreader *reader)
1046 encoded_buffer *buffer = &reader->input->buffer->utf16;
1048 /* avoid to move too often using threshold shrink length */
1049 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
1051 buffer->written -= buffer->cur*sizeof(WCHAR);
1052 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
1053 buffer->cur = 0;
1054 *(WCHAR*)&buffer->data[buffer->written] = 0;
1058 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1059 It won't attempt to shrink but will grow destination buffer if needed */
1060 static HRESULT reader_more(xmlreader *reader)
1062 xmlreaderinput *readerinput = reader->input;
1063 encoded_buffer *src = &readerinput->buffer->encoded;
1064 encoded_buffer *dest = &readerinput->buffer->utf16;
1065 UINT cp = readerinput->buffer->code_page;
1066 int len, dest_len, prev_len;
1067 HRESULT hr;
1068 WCHAR *ptr;
1070 /* get some raw data from stream first */
1071 hr = readerinput_growraw(readerinput);
1072 len = readerinput_get_convlen(readerinput);
1073 prev_len = dest->written / sizeof(WCHAR);
1075 /* just copy for UTF-16 case */
1076 if (cp == 1200)
1078 readerinput_grow(readerinput, len);
1079 memcpy(dest->data + dest->written, src->data + src->cur, len);
1080 dest->written += len*sizeof(WCHAR);
1082 else
1084 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1085 readerinput_grow(readerinput, dest_len);
1086 ptr = (WCHAR*)(dest->data + dest->written);
1087 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1088 ptr[dest_len] = 0;
1089 dest->written += dest_len*sizeof(WCHAR);
1090 /* get rid of processed data */
1091 readerinput_shrinkraw(readerinput, len);
1094 fixup_buffer_cr(dest, prev_len);
1095 return hr;
1098 static inline UINT reader_get_cur(xmlreader *reader)
1100 return reader->input->buffer->utf16.cur;
1103 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1105 encoded_buffer *buffer = &reader->input->buffer->utf16;
1106 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1107 if (!*ptr) reader_more(reader);
1108 return (WCHAR*)buffer->data + buffer->cur;
1111 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1113 int i=0;
1114 const WCHAR *ptr = reader_get_ptr(reader);
1115 while (str[i])
1117 if (!ptr[i])
1119 reader_more(reader);
1120 ptr = reader_get_ptr(reader);
1122 if (str[i] != ptr[i])
1123 return ptr[i] - str[i];
1124 i++;
1126 return 0;
1129 static void reader_update_position(xmlreader *reader, WCHAR ch)
1131 if (ch == '\r')
1132 reader->position.line_position = 1;
1133 else if (ch == '\n')
1135 reader->position.line_number++;
1136 reader->position.line_position = 1;
1138 else
1139 reader->position.line_position++;
1142 /* moves cursor n WCHARs forward */
1143 static void reader_skipn(xmlreader *reader, int n)
1145 encoded_buffer *buffer = &reader->input->buffer->utf16;
1146 const WCHAR *ptr;
1148 while (*(ptr = reader_get_ptr(reader)) && n--)
1150 reader_update_position(reader, *ptr);
1151 buffer->cur++;
1155 static inline BOOL is_wchar_space(WCHAR ch)
1157 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1160 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1161 static int reader_skipspaces(xmlreader *reader)
1163 const WCHAR *ptr = reader_get_ptr(reader);
1164 UINT start = reader_get_cur(reader);
1166 while (is_wchar_space(*ptr))
1168 reader_skipn(reader, 1);
1169 ptr = reader_get_ptr(reader);
1172 return reader_get_cur(reader) - start;
1175 /* [26] VersionNum ::= '1.' [0-9]+ */
1176 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1178 WCHAR *ptr, *ptr2;
1179 UINT start;
1181 if (reader_cmp(reader, L"1.")) return WC_E_XMLDECL;
1183 start = reader_get_cur(reader);
1184 /* skip "1." */
1185 reader_skipn(reader, 2);
1187 ptr2 = ptr = reader_get_ptr(reader);
1188 while (*ptr >= '0' && *ptr <= '9')
1190 reader_skipn(reader, 1);
1191 ptr = reader_get_ptr(reader);
1194 if (ptr2 == ptr) return WC_E_DIGIT;
1195 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1196 TRACE("version=%s\n", debug_strval(reader, val));
1197 return S_OK;
1200 /* [25] Eq ::= S? '=' S? */
1201 static HRESULT reader_parse_eq(xmlreader *reader)
1203 reader_skipspaces(reader);
1204 if (reader_cmp(reader, L"=")) return WC_E_EQUAL;
1205 /* skip '=' */
1206 reader_skipn(reader, 1);
1207 reader_skipspaces(reader);
1208 return S_OK;
1211 static BOOL reader_is_quote(xmlreader *reader)
1213 return !reader_cmp(reader, L"\'") || !reader_cmp(reader, L"\"");
1216 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1217 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1219 struct reader_position position;
1220 strval val, name;
1221 HRESULT hr;
1223 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1225 position = reader->position;
1226 if (reader_cmp(reader, L"version")) return WC_E_XMLDECL;
1227 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1228 /* skip 'version' */
1229 reader_skipn(reader, 7);
1231 hr = reader_parse_eq(reader);
1232 if (FAILED(hr)) return hr;
1234 if (!reader_is_quote(reader))
1235 return WC_E_QUOTE;
1236 /* skip "'"|'"' */
1237 reader_skipn(reader, 1);
1239 hr = reader_parse_versionnum(reader, &val);
1240 if (FAILED(hr)) return hr;
1242 if (!reader_is_quote(reader))
1243 return WC_E_QUOTE;
1245 /* skip "'"|'"' */
1246 reader_skipn(reader, 1);
1248 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1251 /* ([A-Za-z0-9._] | '-') */
1252 static inline BOOL is_wchar_encname(WCHAR ch)
1254 return ((ch >= 'A' && ch <= 'Z') ||
1255 (ch >= 'a' && ch <= 'z') ||
1256 (ch >= '0' && ch <= '9') ||
1257 (ch == '.') || (ch == '_') ||
1258 (ch == '-'));
1261 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1262 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1264 WCHAR *start = reader_get_ptr(reader), *ptr;
1265 xml_encoding enc;
1266 int len;
1268 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1269 return WC_E_ENCNAME;
1271 val->start = reader_get_cur(reader);
1273 ptr = start;
1274 while (is_wchar_encname(*++ptr))
1277 len = ptr - start;
1278 enc = parse_encoding_name(start, len);
1279 TRACE("encoding name %s\n", debugstr_wn(start, len));
1280 val->str = start;
1281 val->len = len;
1283 if (enc == XmlEncoding_Unknown)
1284 return WC_E_ENCNAME;
1286 /* skip encoding name */
1287 reader_skipn(reader, len);
1288 return S_OK;
1291 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1292 static HRESULT reader_parse_encdecl(xmlreader *reader)
1294 struct reader_position position;
1295 strval name, val;
1296 HRESULT hr;
1298 if (!reader_skipspaces(reader)) return S_FALSE;
1300 position = reader->position;
1301 if (reader_cmp(reader, L"encoding")) return S_FALSE;
1302 name.str = reader_get_ptr(reader);
1303 name.start = reader_get_cur(reader);
1304 name.len = 8;
1305 /* skip 'encoding' */
1306 reader_skipn(reader, 8);
1308 hr = reader_parse_eq(reader);
1309 if (FAILED(hr)) return hr;
1311 if (!reader_is_quote(reader))
1312 return WC_E_QUOTE;
1313 /* skip "'"|'"' */
1314 reader_skipn(reader, 1);
1316 hr = reader_parse_encname(reader, &val);
1317 if (FAILED(hr)) return hr;
1319 if (!reader_is_quote(reader))
1320 return WC_E_QUOTE;
1322 /* skip "'"|'"' */
1323 reader_skipn(reader, 1);
1325 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1328 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1329 static HRESULT reader_parse_sddecl(xmlreader *reader)
1331 struct reader_position position;
1332 strval name, val;
1333 UINT start;
1334 HRESULT hr;
1336 if (!reader_skipspaces(reader)) return S_FALSE;
1338 position = reader->position;
1339 if (reader_cmp(reader, L"standalone")) return S_FALSE;
1340 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1341 /* skip 'standalone' */
1342 reader_skipn(reader, 10);
1344 hr = reader_parse_eq(reader);
1345 if (FAILED(hr)) return hr;
1347 if (!reader_is_quote(reader))
1348 return WC_E_QUOTE;
1349 /* skip "'"|'"' */
1350 reader_skipn(reader, 1);
1352 if (reader_cmp(reader, L"yes") && reader_cmp(reader, L"no"))
1353 return WC_E_XMLDECL;
1355 start = reader_get_cur(reader);
1356 /* skip 'yes'|'no' */
1357 reader_skipn(reader, reader_cmp(reader, L"yes") ? 2 : 3);
1358 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1359 TRACE("standalone=%s\n", debug_strval(reader, &val));
1361 if (!reader_is_quote(reader))
1362 return WC_E_QUOTE;
1363 /* skip "'"|'"' */
1364 reader_skipn(reader, 1);
1366 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1369 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1370 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1372 struct reader_position position;
1373 HRESULT hr;
1375 if (reader_cmp(reader, L"<?xml "))
1376 return S_FALSE;
1378 reader_skipn(reader, 2);
1379 position = reader->position;
1380 reader_skipn(reader, 3);
1381 hr = reader_parse_versioninfo(reader);
1382 if (FAILED(hr))
1383 return hr;
1385 hr = reader_parse_encdecl(reader);
1386 if (FAILED(hr))
1387 return hr;
1389 hr = reader_parse_sddecl(reader);
1390 if (FAILED(hr))
1391 return hr;
1393 reader_skipspaces(reader);
1394 if (reader_cmp(reader, L"?>"))
1395 return WC_E_XMLDECL;
1397 /* skip '?>' */
1398 reader_skipn(reader, 2);
1400 reader->nodetype = XmlNodeType_XmlDeclaration;
1401 reader->empty_element.position = position;
1402 reader_set_strvalue(reader, StringValue_LocalName, &strval_xml);
1403 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml);
1405 return S_OK;
1408 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1409 static HRESULT reader_parse_comment(xmlreader *reader)
1411 WCHAR *ptr;
1412 UINT start;
1414 if (reader->resumestate == XmlReadResumeState_Comment)
1416 start = reader->resume[XmlReadResume_Body];
1417 ptr = reader_get_ptr(reader);
1419 else
1421 /* skip '<!--' */
1422 reader_skipn(reader, 4);
1423 reader_shrink(reader);
1424 ptr = reader_get_ptr(reader);
1425 start = reader_get_cur(reader);
1426 reader->nodetype = XmlNodeType_Comment;
1427 reader->resume[XmlReadResume_Body] = start;
1428 reader->resumestate = XmlReadResumeState_Comment;
1429 reader_set_strvalue(reader, StringValue_Value, NULL);
1432 /* will exit when there's no more data, it won't attempt to
1433 read more from stream */
1434 while (*ptr)
1436 if (ptr[0] == '-')
1438 if (ptr[1] == '-')
1440 if (ptr[2] == '>')
1442 strval value;
1444 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1445 TRACE("%s\n", debug_strval(reader, &value));
1447 /* skip rest of markup '->' */
1448 reader_skipn(reader, 3);
1450 reader_set_strvalue(reader, StringValue_Value, &value);
1451 reader->resume[XmlReadResume_Body] = 0;
1452 reader->resumestate = XmlReadResumeState_Initial;
1453 return S_OK;
1455 else
1456 return WC_E_COMMENT;
1460 reader_skipn(reader, 1);
1461 ptr++;
1464 return S_OK;
1467 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1468 static inline BOOL is_char(WCHAR ch)
1470 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1471 (ch >= 0x20 && ch <= 0xd7ff) ||
1472 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1473 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1474 (ch >= 0xe000 && ch <= 0xfffd);
1477 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1478 BOOL is_pubchar(WCHAR ch)
1480 return (ch == ' ') ||
1481 (ch >= 'a' && ch <= 'z') ||
1482 (ch >= 'A' && ch <= 'Z') ||
1483 (ch >= '0' && ch <= '9') ||
1484 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1485 (ch == '=') || (ch == '?') ||
1486 (ch == '@') || (ch == '!') ||
1487 (ch >= '#' && ch <= '%') || /* #$% */
1488 (ch == '_') || (ch == '\r') || (ch == '\n');
1491 BOOL is_namestartchar(WCHAR ch)
1493 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1494 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1495 (ch >= 0xc0 && ch <= 0xd6) ||
1496 (ch >= 0xd8 && ch <= 0xf6) ||
1497 (ch >= 0xf8 && ch <= 0x2ff) ||
1498 (ch >= 0x370 && ch <= 0x37d) ||
1499 (ch >= 0x37f && ch <= 0x1fff) ||
1500 (ch >= 0x200c && ch <= 0x200d) ||
1501 (ch >= 0x2070 && ch <= 0x218f) ||
1502 (ch >= 0x2c00 && ch <= 0x2fef) ||
1503 (ch >= 0x3001 && ch <= 0xd7ff) ||
1504 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1505 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1506 (ch >= 0xf900 && ch <= 0xfdcf) ||
1507 (ch >= 0xfdf0 && ch <= 0xfffd);
1510 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1511 BOOL is_ncnamechar(WCHAR ch)
1513 return (ch >= 'A' && ch <= 'Z') ||
1514 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1515 (ch == '-') || (ch == '.') ||
1516 (ch >= '0' && ch <= '9') ||
1517 (ch == 0xb7) ||
1518 (ch >= 0xc0 && ch <= 0xd6) ||
1519 (ch >= 0xd8 && ch <= 0xf6) ||
1520 (ch >= 0xf8 && ch <= 0x2ff) ||
1521 (ch >= 0x300 && ch <= 0x36f) ||
1522 (ch >= 0x370 && ch <= 0x37d) ||
1523 (ch >= 0x37f && ch <= 0x1fff) ||
1524 (ch >= 0x200c && ch <= 0x200d) ||
1525 (ch >= 0x203f && ch <= 0x2040) ||
1526 (ch >= 0x2070 && ch <= 0x218f) ||
1527 (ch >= 0x2c00 && ch <= 0x2fef) ||
1528 (ch >= 0x3001 && ch <= 0xd7ff) ||
1529 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1530 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1531 (ch >= 0xf900 && ch <= 0xfdcf) ||
1532 (ch >= 0xfdf0 && ch <= 0xfffd);
1535 BOOL is_namechar(WCHAR ch)
1537 return (ch == ':') || is_ncnamechar(ch);
1540 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1542 /* When we're on attribute always return attribute type, container node type is kept.
1543 Note that container is not necessarily an element, and attribute doesn't mean it's
1544 an attribute in XML spec terms. */
1545 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1548 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1549 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1550 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1551 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1552 [5] Name ::= NameStartChar (NameChar)* */
1553 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1555 WCHAR *ptr;
1556 UINT start;
1558 if (reader->resume[XmlReadResume_Name])
1560 start = reader->resume[XmlReadResume_Name];
1561 ptr = reader_get_ptr(reader);
1563 else
1565 ptr = reader_get_ptr(reader);
1566 start = reader_get_cur(reader);
1567 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1570 while (is_namechar(*ptr))
1572 reader_skipn(reader, 1);
1573 ptr = reader_get_ptr(reader);
1576 if (is_reader_pending(reader))
1578 reader->resume[XmlReadResume_Name] = start;
1579 return E_PENDING;
1581 else
1582 reader->resume[XmlReadResume_Name] = 0;
1584 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1585 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1587 return S_OK;
1590 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1591 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1593 static const strval xmlval = { (WCHAR *)L"xml", 3 };
1594 strval name;
1595 WCHAR *ptr;
1596 HRESULT hr;
1597 UINT i;
1599 hr = reader_parse_name(reader, &name);
1600 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1602 /* now that we got name check for illegal content */
1603 if (strval_eq(reader, &name, &xmlval))
1604 return WC_E_LEADINGXML;
1606 /* PITarget can't be a qualified name */
1607 ptr = reader_get_strptr(reader, &name);
1608 for (i = 0; i < name.len; i++)
1609 if (ptr[i] == ':')
1610 return i ? NC_E_NAMECOLON : WC_E_PI;
1612 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1613 *target = name;
1614 return S_OK;
1617 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1618 static HRESULT reader_parse_pi(xmlreader *reader)
1620 strval target;
1621 WCHAR *ptr;
1622 UINT start;
1623 HRESULT hr;
1625 switch (reader->resumestate)
1627 case XmlReadResumeState_Initial:
1628 /* skip '<?' */
1629 reader_skipn(reader, 2);
1630 reader_shrink(reader);
1631 reader->resumestate = XmlReadResumeState_PITarget;
1632 case XmlReadResumeState_PITarget:
1633 hr = reader_parse_pitarget(reader, &target);
1634 if (FAILED(hr)) return hr;
1635 reader_set_strvalue(reader, StringValue_LocalName, &target);
1636 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1637 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1638 reader->resumestate = XmlReadResumeState_PIBody;
1639 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1640 default:
1644 start = reader->resume[XmlReadResume_Body];
1645 ptr = reader_get_ptr(reader);
1646 while (*ptr)
1648 if (ptr[0] == '?')
1650 if (ptr[1] == '>')
1652 UINT cur = reader_get_cur(reader);
1653 strval value;
1655 /* strip all leading whitespace chars */
1656 while (start < cur)
1658 ptr = reader_get_ptr2(reader, start);
1659 if (!is_wchar_space(*ptr)) break;
1660 start++;
1663 reader_init_strvalue(start, cur-start, &value);
1665 /* skip '?>' */
1666 reader_skipn(reader, 2);
1667 TRACE("%s\n", debug_strval(reader, &value));
1668 reader->nodetype = XmlNodeType_ProcessingInstruction;
1669 reader->resumestate = XmlReadResumeState_Initial;
1670 reader->resume[XmlReadResume_Body] = 0;
1671 reader_set_strvalue(reader, StringValue_Value, &value);
1672 return S_OK;
1676 reader_skipn(reader, 1);
1677 ptr = reader_get_ptr(reader);
1680 return S_OK;
1683 /* This one is used to parse significant whitespace nodes, like in Misc production */
1684 static HRESULT reader_parse_whitespace(xmlreader *reader)
1686 switch (reader->resumestate)
1688 case XmlReadResumeState_Initial:
1689 reader_shrink(reader);
1690 reader->resumestate = XmlReadResumeState_Whitespace;
1691 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1692 reader->nodetype = XmlNodeType_Whitespace;
1693 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1694 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1695 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1696 /* fallthrough */
1697 case XmlReadResumeState_Whitespace:
1699 strval value;
1700 UINT start;
1702 reader_skipspaces(reader);
1703 if (is_reader_pending(reader)) return S_OK;
1705 start = reader->resume[XmlReadResume_Body];
1706 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1707 reader_set_strvalue(reader, StringValue_Value, &value);
1708 TRACE("%s\n", debug_strval(reader, &value));
1709 reader->resumestate = XmlReadResumeState_Initial;
1711 default:
1715 return S_OK;
1718 /* [27] Misc ::= Comment | PI | S */
1719 static HRESULT reader_parse_misc(xmlreader *reader)
1721 HRESULT hr = S_FALSE;
1723 if (reader->resumestate != XmlReadResumeState_Initial)
1725 hr = reader_more(reader);
1726 if (FAILED(hr)) return hr;
1728 /* finish current node */
1729 switch (reader->resumestate)
1731 case XmlReadResumeState_PITarget:
1732 case XmlReadResumeState_PIBody:
1733 return reader_parse_pi(reader);
1734 case XmlReadResumeState_Comment:
1735 return reader_parse_comment(reader);
1736 case XmlReadResumeState_Whitespace:
1737 return reader_parse_whitespace(reader);
1738 default:
1739 ERR("unknown resume state %d\n", reader->resumestate);
1743 while (1)
1745 const WCHAR *cur = reader_get_ptr(reader);
1747 if (is_wchar_space(*cur))
1748 hr = reader_parse_whitespace(reader);
1749 else if (!reader_cmp(reader, L"<!--"))
1750 hr = reader_parse_comment(reader);
1751 else if (!reader_cmp(reader, L"<?"))
1752 hr = reader_parse_pi(reader);
1753 else
1754 break;
1756 if (hr != S_FALSE) return hr;
1759 return hr;
1762 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1763 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1765 WCHAR *cur = reader_get_ptr(reader), quote;
1766 UINT start;
1768 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1770 quote = *cur;
1771 reader_skipn(reader, 1);
1773 cur = reader_get_ptr(reader);
1774 start = reader_get_cur(reader);
1775 while (is_char(*cur) && *cur != quote)
1777 reader_skipn(reader, 1);
1778 cur = reader_get_ptr(reader);
1780 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1781 if (*cur == quote) reader_skipn(reader, 1);
1783 TRACE("%s\n", debug_strval(reader, literal));
1784 return S_OK;
1787 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1788 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1789 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1791 WCHAR *cur = reader_get_ptr(reader), quote;
1792 UINT start;
1794 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1796 quote = *cur;
1797 reader_skipn(reader, 1);
1799 start = reader_get_cur(reader);
1800 cur = reader_get_ptr(reader);
1801 while (is_pubchar(*cur) && *cur != quote)
1803 reader_skipn(reader, 1);
1804 cur = reader_get_ptr(reader);
1806 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1807 if (*cur == quote) reader_skipn(reader, 1);
1809 TRACE("%s\n", debug_strval(reader, literal));
1810 return S_OK;
1813 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1814 static HRESULT reader_parse_externalid(xmlreader *reader)
1816 static WCHAR systemW[] = L"SYSTEM";
1817 static WCHAR publicW[] = L"PUBLIC";
1818 struct reader_position position = reader->position;
1819 strval name, sys;
1820 HRESULT hr;
1821 int cnt;
1823 if (!reader_cmp(reader, publicW)) {
1824 strval pub;
1826 /* public id */
1827 reader_skipn(reader, 6);
1828 cnt = reader_skipspaces(reader);
1829 if (!cnt) return WC_E_WHITESPACE;
1831 hr = reader_parse_pub_literal(reader, &pub);
1832 if (FAILED(hr)) return hr;
1834 reader_init_cstrvalue(publicW, lstrlenW(publicW), &name);
1835 hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0);
1836 if (FAILED(hr)) return hr;
1838 cnt = reader_skipspaces(reader);
1839 if (!cnt) return S_OK;
1841 /* optional system id */
1842 hr = reader_parse_sys_literal(reader, &sys);
1843 if (FAILED(hr)) return S_OK;
1845 reader_init_cstrvalue(systemW, lstrlenW(systemW), &name);
1846 hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1847 if (FAILED(hr)) return hr;
1849 return S_OK;
1850 } else if (!reader_cmp(reader, systemW)) {
1851 /* system id */
1852 reader_skipn(reader, 6);
1853 cnt = reader_skipspaces(reader);
1854 if (!cnt) return WC_E_WHITESPACE;
1856 hr = reader_parse_sys_literal(reader, &sys);
1857 if (FAILED(hr)) return hr;
1859 reader_init_cstrvalue(systemW, lstrlenW(systemW), &name);
1860 return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1863 return S_FALSE;
1866 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1867 static HRESULT reader_parse_dtd(xmlreader *reader)
1869 strval name;
1870 WCHAR *cur;
1871 HRESULT hr;
1873 if (reader_cmp(reader, L"<!DOCTYPE")) return S_FALSE;
1874 reader_shrink(reader);
1876 /* DTD processing is not allowed by default */
1877 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1879 reader_skipn(reader, 9);
1880 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1882 /* name */
1883 hr = reader_parse_name(reader, &name);
1884 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1886 reader_skipspaces(reader);
1888 hr = reader_parse_externalid(reader);
1889 if (FAILED(hr)) return hr;
1891 reader_skipspaces(reader);
1893 cur = reader_get_ptr(reader);
1894 if (*cur != '>')
1896 FIXME("internal subset parsing not implemented\n");
1897 return E_NOTIMPL;
1900 /* skip '>' */
1901 reader_skipn(reader, 1);
1903 reader->nodetype = XmlNodeType_DocumentType;
1904 reader_set_strvalue(reader, StringValue_LocalName, &name);
1905 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1907 return S_OK;
1910 /* [11 NS] LocalPart ::= NCName */
1911 static HRESULT reader_parse_local(xmlreader *reader, strval *local, BOOL check_for_separator)
1913 WCHAR *ptr;
1914 UINT start;
1916 if (reader->resume[XmlReadResume_Local])
1918 start = reader->resume[XmlReadResume_Local];
1919 ptr = reader_get_ptr(reader);
1921 else
1923 ptr = reader_get_ptr(reader);
1924 start = reader_get_cur(reader);
1927 while (is_ncnamechar(*ptr))
1929 reader_skipn(reader, 1);
1930 ptr = reader_get_ptr(reader);
1933 if (check_for_separator && *ptr == ':')
1934 return NC_E_QNAMECOLON;
1936 if (is_reader_pending(reader))
1938 reader->resume[XmlReadResume_Local] = start;
1939 return E_PENDING;
1941 else
1942 reader->resume[XmlReadResume_Local] = 0;
1944 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1946 return S_OK;
1949 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1950 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1951 [9 NS] UnprefixedName ::= LocalPart
1952 [10 NS] Prefix ::= NCName */
1953 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1955 WCHAR *ptr;
1956 UINT start;
1957 HRESULT hr;
1959 if (reader->resume[XmlReadResume_Name])
1961 start = reader->resume[XmlReadResume_Name];
1962 ptr = reader_get_ptr(reader);
1964 else
1966 ptr = reader_get_ptr(reader);
1967 start = reader_get_cur(reader);
1968 reader->resume[XmlReadResume_Name] = start;
1969 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1972 if (reader->resume[XmlReadResume_Local])
1974 hr = reader_parse_local(reader, local, FALSE);
1975 if (FAILED(hr)) return hr;
1977 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1978 local->start - reader->resume[XmlReadResume_Name] - 1,
1979 prefix);
1981 else
1983 /* skip prefix part */
1984 while (is_ncnamechar(*ptr))
1986 reader_skipn(reader, 1);
1987 ptr = reader_get_ptr(reader);
1990 if (is_reader_pending(reader)) return E_PENDING;
1992 /* got a qualified name */
1993 if (*ptr == ':')
1995 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1997 /* skip ':' */
1998 reader_skipn(reader, 1);
1999 hr = reader_parse_local(reader, local, TRUE);
2000 if (FAILED(hr)) return hr;
2002 else
2004 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
2005 reader_init_strvalue(0, 0, prefix);
2009 if (prefix->len)
2010 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
2011 else
2012 TRACE("ncname %s\n", debug_strval(reader, local));
2014 reader_init_strvalue(prefix->len ? prefix->start : local->start,
2015 /* count ':' too */
2016 (prefix->len ? prefix->len + 1 : 0) + local->len,
2017 qname);
2019 reader->resume[XmlReadResume_Name] = 0;
2020 reader->resume[XmlReadResume_Local] = 0;
2022 return S_OK;
2025 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
2027 static const strval lt = { (WCHAR *)L"lt", 2 };
2028 static const strval gt = { (WCHAR *)L"gt", 2 };
2029 static const strval amp = { (WCHAR *)L"amp", 3 };
2030 static const strval apos = { (WCHAR *)L"apos", 4 };
2031 static const strval quot = { (WCHAR *)L"quot", 4 };
2032 WCHAR *str = reader_get_strptr(reader, name);
2034 switch (*str)
2036 case 'l':
2037 if (strval_eq(reader, name, &lt)) return '<';
2038 break;
2039 case 'g':
2040 if (strval_eq(reader, name, &gt)) return '>';
2041 break;
2042 case 'a':
2043 if (strval_eq(reader, name, &amp))
2044 return '&';
2045 else if (strval_eq(reader, name, &apos))
2046 return '\'';
2047 break;
2048 case 'q':
2049 if (strval_eq(reader, name, &quot)) return '\"';
2050 break;
2051 default:
2055 return 0;
2058 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2059 [67] Reference ::= EntityRef | CharRef
2060 [68] EntityRef ::= '&' Name ';' */
2061 static HRESULT reader_parse_reference(xmlreader *reader)
2063 encoded_buffer *buffer = &reader->input->buffer->utf16;
2064 WCHAR *start = reader_get_ptr(reader), *ptr;
2065 UINT cur = reader_get_cur(reader);
2066 WCHAR ch = 0;
2067 int len;
2069 /* skip '&' */
2070 reader_skipn(reader, 1);
2071 ptr = reader_get_ptr(reader);
2073 if (*ptr == '#')
2075 reader_skipn(reader, 1);
2076 ptr = reader_get_ptr(reader);
2078 /* hex char or decimal */
2079 if (*ptr == 'x')
2081 reader_skipn(reader, 1);
2082 ptr = reader_get_ptr(reader);
2084 while (*ptr != ';')
2086 if ((*ptr >= '0' && *ptr <= '9'))
2087 ch = ch*16 + *ptr - '0';
2088 else if ((*ptr >= 'a' && *ptr <= 'f'))
2089 ch = ch*16 + *ptr - 'a' + 10;
2090 else if ((*ptr >= 'A' && *ptr <= 'F'))
2091 ch = ch*16 + *ptr - 'A' + 10;
2092 else
2093 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2094 reader_skipn(reader, 1);
2095 ptr = reader_get_ptr(reader);
2098 else
2100 while (*ptr != ';')
2102 if ((*ptr >= '0' && *ptr <= '9'))
2104 ch = ch*10 + *ptr - '0';
2105 reader_skipn(reader, 1);
2106 ptr = reader_get_ptr(reader);
2108 else
2109 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2113 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2115 /* normalize */
2116 if (is_wchar_space(ch)) ch = ' ';
2118 ptr = reader_get_ptr(reader);
2119 start = reader_get_ptr2(reader, cur);
2120 len = buffer->written - ((char *)ptr - buffer->data);
2121 memmove(start + 1, ptr + 1, len);
2123 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2124 buffer->cur = cur + 1;
2126 *start = ch;
2128 else
2130 strval name;
2131 HRESULT hr;
2133 hr = reader_parse_name(reader, &name);
2134 if (FAILED(hr)) return hr;
2136 ptr = reader_get_ptr(reader);
2137 if (*ptr != ';') return WC_E_SEMICOLON;
2139 /* predefined entities resolve to a single character */
2140 ch = get_predefined_entity(reader, &name);
2141 if (ch)
2143 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2144 memmove(start+1, ptr+1, len);
2145 buffer->cur = cur + 1;
2146 buffer->written -= (ptr - start) * sizeof(WCHAR);
2148 *start = ch;
2150 else
2152 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2153 return WC_E_UNDECLAREDENTITY;
2158 return S_OK;
2161 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2162 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2164 WCHAR *ptr, quote;
2165 UINT start;
2167 ptr = reader_get_ptr(reader);
2169 /* skip opening quote */
2170 quote = *ptr;
2171 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2172 reader_skipn(reader, 1);
2174 ptr = reader_get_ptr(reader);
2175 start = reader_get_cur(reader);
2176 while (*ptr)
2178 if (*ptr == '<') return WC_E_LESSTHAN;
2180 if (*ptr == quote)
2182 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2183 /* skip closing quote */
2184 reader_skipn(reader, 1);
2185 return S_OK;
2188 if (*ptr == '&')
2190 HRESULT hr = reader_parse_reference(reader);
2191 if (FAILED(hr)) return hr;
2193 else
2195 /* replace all whitespace chars with ' ' */
2196 if (is_wchar_space(*ptr)) *ptr = ' ';
2197 reader_skipn(reader, 1);
2199 ptr = reader_get_ptr(reader);
2202 return WC_E_QUOTE;
2205 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2206 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2207 [3 NS] DefaultAttName ::= 'xmlns'
2208 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2209 static HRESULT reader_parse_attribute(xmlreader *reader)
2211 struct reader_position position = reader->position;
2212 strval prefix, local, qname, value;
2213 enum attribute_flags flags = 0;
2214 HRESULT hr;
2216 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2217 if (FAILED(hr)) return hr;
2219 if (strval_eq(reader, &prefix, &strval_xmlns))
2220 flags |= ATTRIBUTE_NS_DEFINITION;
2222 if (strval_eq(reader, &qname, &strval_xmlns))
2223 flags |= ATTRIBUTE_DEFAULT_NS_DEFINITION;
2225 hr = reader_parse_eq(reader);
2226 if (FAILED(hr)) return hr;
2228 hr = reader_parse_attvalue(reader, &value);
2229 if (FAILED(hr)) return hr;
2231 if (flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
2232 reader_push_ns(reader, &local, &value, !!(flags & ATTRIBUTE_DEFAULT_NS_DEFINITION));
2234 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2235 return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags);
2238 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2239 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2240 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
2242 struct reader_position position = reader->position;
2243 HRESULT hr;
2245 hr = reader_parse_qname(reader, prefix, local, qname);
2246 if (FAILED(hr)) return hr;
2248 for (;;)
2250 reader_skipspaces(reader);
2252 /* empty element */
2253 if ((reader->is_empty_element = !reader_cmp(reader, L"/>")))
2255 struct element *element = &reader->empty_element;
2257 /* skip '/>' */
2258 reader_skipn(reader, 2);
2260 reader_free_strvalued(reader, &element->qname);
2261 reader_free_strvalued(reader, &element->localname);
2263 element->prefix = *prefix;
2264 reader_strvaldup(reader, qname, &element->qname);
2265 reader_strvaldup(reader, local, &element->localname);
2266 element->position = position;
2267 reader_mark_ns_nodes(reader, element);
2268 return S_OK;
2271 /* got a start tag */
2272 if (!reader_cmp(reader, L">"))
2274 /* skip '>' */
2275 reader_skipn(reader, 1);
2276 return reader_push_element(reader, prefix, local, qname, &position);
2279 hr = reader_parse_attribute(reader);
2280 if (FAILED(hr)) return hr;
2283 return S_OK;
2286 /* [39] element ::= EmptyElemTag | STag content ETag */
2287 static HRESULT reader_parse_element(xmlreader *reader)
2289 HRESULT hr;
2291 switch (reader->resumestate)
2293 case XmlReadResumeState_Initial:
2294 /* check if we are really on element */
2295 if (reader_cmp(reader, L"<")) return S_FALSE;
2297 /* skip '<' */
2298 reader_skipn(reader, 1);
2300 reader_shrink(reader);
2301 reader->resumestate = XmlReadResumeState_STag;
2302 case XmlReadResumeState_STag:
2304 strval qname, prefix, local;
2306 /* this handles empty elements too */
2307 hr = reader_parse_stag(reader, &prefix, &local, &qname);
2308 if (FAILED(hr)) return hr;
2310 /* FIXME: need to check for defined namespace to reject invalid prefix */
2312 /* if we got empty element and stack is empty go straight to Misc */
2313 if (reader->is_empty_element && list_empty(&reader->elements))
2314 reader->instate = XmlReadInState_MiscEnd;
2315 else
2316 reader->instate = XmlReadInState_Content;
2318 reader->nodetype = XmlNodeType_Element;
2319 reader->resumestate = XmlReadResumeState_Initial;
2320 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2321 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2322 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
2323 break;
2325 default:
2326 hr = E_FAIL;
2329 return hr;
2332 /* [13 NS] ETag ::= '</' QName S? '>' */
2333 static HRESULT reader_parse_endtag(xmlreader *reader)
2335 struct reader_position position;
2336 strval prefix, local, qname;
2337 struct element *element;
2338 HRESULT hr;
2340 /* skip '</' */
2341 reader_skipn(reader, 2);
2343 position = reader->position;
2344 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2345 if (FAILED(hr)) return hr;
2347 reader_skipspaces(reader);
2349 if (reader_cmp(reader, L">")) return WC_E_GREATERTHAN;
2351 /* skip '>' */
2352 reader_skipn(reader, 1);
2354 /* Element stack should never be empty at this point, cause we shouldn't get to
2355 content parsing if it's empty. */
2356 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2357 if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH;
2359 /* update position stored for start tag, we won't be using it */
2360 element->position = position;
2362 reader->nodetype = XmlNodeType_EndElement;
2363 reader->is_empty_element = FALSE;
2364 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2366 return S_OK;
2369 /* [18] CDSect ::= CDStart CData CDEnd
2370 [19] CDStart ::= '<![CDATA['
2371 [20] CData ::= (Char* - (Char* ']]>' Char*))
2372 [21] CDEnd ::= ']]>' */
2373 static HRESULT reader_parse_cdata(xmlreader *reader)
2375 WCHAR *ptr;
2376 UINT start;
2378 if (reader->resumestate == XmlReadResumeState_CDATA)
2380 start = reader->resume[XmlReadResume_Body];
2381 ptr = reader_get_ptr(reader);
2383 else
2385 /* skip markup '<![CDATA[' */
2386 reader_skipn(reader, 9);
2387 reader_shrink(reader);
2388 ptr = reader_get_ptr(reader);
2389 start = reader_get_cur(reader);
2390 reader->nodetype = XmlNodeType_CDATA;
2391 reader->resume[XmlReadResume_Body] = start;
2392 reader->resumestate = XmlReadResumeState_CDATA;
2393 reader_set_strvalue(reader, StringValue_Value, NULL);
2396 while (*ptr)
2398 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2400 strval value;
2402 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2404 /* skip ']]>' */
2405 reader_skipn(reader, 3);
2406 TRACE("%s\n", debug_strval(reader, &value));
2408 reader_set_strvalue(reader, StringValue_Value, &value);
2409 reader->resume[XmlReadResume_Body] = 0;
2410 reader->resumestate = XmlReadResumeState_Initial;
2411 return S_OK;
2413 else
2415 reader_skipn(reader, 1);
2416 ptr = reader_get_ptr(reader);
2420 return S_OK;
2423 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2424 static HRESULT reader_parse_chardata(xmlreader *reader)
2426 struct reader_position position;
2427 WCHAR *ptr;
2428 UINT start;
2430 if (reader->resumestate == XmlReadResumeState_CharData)
2432 start = reader->resume[XmlReadResume_Body];
2433 ptr = reader_get_ptr(reader);
2435 else
2437 reader_shrink(reader);
2438 ptr = reader_get_ptr(reader);
2439 start = reader_get_cur(reader);
2440 /* There's no text */
2441 if (!*ptr || *ptr == '<') return S_OK;
2442 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2443 reader->resume[XmlReadResume_Body] = start;
2444 reader->resumestate = XmlReadResumeState_CharData;
2445 reader_set_strvalue(reader, StringValue_Value, NULL);
2448 position = reader->position;
2449 while (*ptr)
2451 /* CDATA closing sequence ']]>' is not allowed */
2452 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2453 return WC_E_CDSECTEND;
2455 /* Found next markup part */
2456 if (ptr[0] == '<')
2458 strval value;
2460 reader->empty_element.position = position;
2461 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2462 reader_set_strvalue(reader, StringValue_Value, &value);
2463 reader->resume[XmlReadResume_Body] = 0;
2464 reader->resumestate = XmlReadResumeState_Initial;
2465 return S_OK;
2468 /* this covers a case when text has leading whitespace chars */
2469 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2471 if (!reader_cmp(reader, L"&"))
2472 reader_parse_reference(reader);
2473 else
2474 reader_skipn(reader, 1);
2476 ptr = reader_get_ptr(reader);
2479 return S_OK;
2482 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2483 static HRESULT reader_parse_content(xmlreader *reader)
2485 if (reader->resumestate != XmlReadResumeState_Initial)
2487 switch (reader->resumestate)
2489 case XmlReadResumeState_CDATA:
2490 return reader_parse_cdata(reader);
2491 case XmlReadResumeState_Comment:
2492 return reader_parse_comment(reader);
2493 case XmlReadResumeState_PIBody:
2494 case XmlReadResumeState_PITarget:
2495 return reader_parse_pi(reader);
2496 case XmlReadResumeState_CharData:
2497 return reader_parse_chardata(reader);
2498 default:
2499 ERR("unknown resume state %d\n", reader->resumestate);
2503 reader_shrink(reader);
2505 /* handle end tag here, it indicates end of content as well */
2506 if (!reader_cmp(reader, L"</"))
2507 return reader_parse_endtag(reader);
2509 if (!reader_cmp(reader, L"<!--"))
2510 return reader_parse_comment(reader);
2512 if (!reader_cmp(reader, L"<?"))
2513 return reader_parse_pi(reader);
2515 if (!reader_cmp(reader, L"<![CDATA["))
2516 return reader_parse_cdata(reader);
2518 if (!reader_cmp(reader, L"<"))
2519 return reader_parse_element(reader);
2521 /* what's left must be CharData */
2522 return reader_parse_chardata(reader);
2525 static HRESULT reader_parse_nextnode(xmlreader *reader)
2527 XmlNodeType nodetype = reader_get_nodetype(reader);
2528 HRESULT hr;
2530 if (!is_reader_pending(reader))
2532 reader->chunk_read_off = 0;
2533 reader_clear_attrs(reader);
2536 /* When moving from EndElement or empty element, pop its own namespace definitions */
2537 switch (nodetype)
2539 case XmlNodeType_Attribute:
2540 reader_dec_depth(reader);
2541 /* fallthrough */
2542 case XmlNodeType_Element:
2543 if (reader->is_empty_element)
2544 reader_pop_ns_nodes(reader, &reader->empty_element);
2545 else if (FAILED(hr = reader_inc_depth(reader)))
2546 return hr;
2547 break;
2548 case XmlNodeType_EndElement:
2549 reader_pop_element(reader);
2550 reader_dec_depth(reader);
2551 break;
2552 default:
2556 for (;;)
2558 switch (reader->instate)
2560 /* if it's a first call for a new input we need to detect stream encoding */
2561 case XmlReadInState_Initial:
2563 xml_encoding enc;
2565 hr = readerinput_growraw(reader->input);
2566 if (FAILED(hr)) return hr;
2568 reader->position.line_number = 1;
2569 reader->position.line_position = 1;
2571 /* try to detect encoding by BOM or data and set input code page */
2572 hr = readerinput_detectencoding(reader->input, &enc);
2573 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2574 debugstr_w(xml_encoding_map[enc].name), hr);
2575 if (FAILED(hr)) return hr;
2577 /* always switch first time cause we have to put something in */
2578 readerinput_switchencoding(reader->input, enc);
2580 /* parse xml declaration */
2581 hr = reader_parse_xmldecl(reader);
2582 if (FAILED(hr)) return hr;
2584 readerinput_shrinkraw(reader->input, -1);
2585 reader->instate = XmlReadInState_Misc_DTD;
2586 if (hr == S_OK) return hr;
2588 break;
2589 case XmlReadInState_Misc_DTD:
2590 hr = reader_parse_misc(reader);
2591 if (FAILED(hr)) return hr;
2593 if (hr == S_FALSE)
2594 reader->instate = XmlReadInState_DTD;
2595 else
2596 return hr;
2597 break;
2598 case XmlReadInState_DTD:
2599 hr = reader_parse_dtd(reader);
2600 if (FAILED(hr)) return hr;
2602 if (hr == S_OK)
2604 reader->instate = XmlReadInState_DTD_Misc;
2605 return hr;
2607 else
2608 reader->instate = XmlReadInState_Element;
2609 break;
2610 case XmlReadInState_DTD_Misc:
2611 hr = reader_parse_misc(reader);
2612 if (FAILED(hr)) return hr;
2614 if (hr == S_FALSE)
2615 reader->instate = XmlReadInState_Element;
2616 else
2617 return hr;
2618 break;
2619 case XmlReadInState_Element:
2620 return reader_parse_element(reader);
2621 case XmlReadInState_Content:
2622 return reader_parse_content(reader);
2623 case XmlReadInState_MiscEnd:
2624 hr = reader_parse_misc(reader);
2625 if (hr != S_FALSE) return hr;
2627 if (*reader_get_ptr(reader))
2629 WARN("found garbage in the end of XML\n");
2630 return WC_E_SYNTAX;
2633 reader->instate = XmlReadInState_Eof;
2634 reader->state = XmlReadState_EndOfFile;
2635 reader->nodetype = XmlNodeType_None;
2636 return hr;
2637 case XmlReadInState_Eof:
2638 return S_FALSE;
2639 default:
2640 FIXME("internal state %d not handled\n", reader->instate);
2641 return E_NOTIMPL;
2645 return E_NOTIMPL;
2648 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2650 xmlreader *This = impl_from_IXmlReader(iface);
2652 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2654 if (IsEqualGUID(riid, &IID_IUnknown) ||
2655 IsEqualGUID(riid, &IID_IXmlReader))
2657 *ppvObject = iface;
2659 else
2661 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2662 *ppvObject = NULL;
2663 return E_NOINTERFACE;
2666 IXmlReader_AddRef(iface);
2668 return S_OK;
2671 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2673 xmlreader *This = impl_from_IXmlReader(iface);
2674 ULONG ref = InterlockedIncrement(&This->ref);
2675 TRACE("(%p)->(%d)\n", This, ref);
2676 return ref;
2679 static void reader_clear_ns(xmlreader *reader)
2681 struct ns *ns, *ns2;
2683 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2684 list_remove(&ns->entry);
2685 reader_free_strvalued(reader, &ns->prefix);
2686 reader_free_strvalued(reader, &ns->uri);
2687 reader_free(reader, ns);
2690 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2691 list_remove(&ns->entry);
2692 reader_free_strvalued(reader, &ns->uri);
2693 reader_free(reader, ns);
2697 static void reader_reset_parser(xmlreader *reader)
2699 reader->position.line_number = 0;
2700 reader->position.line_position = 0;
2702 reader_clear_elements(reader);
2703 reader_clear_attrs(reader);
2704 reader_clear_ns(reader);
2705 reader_free_strvalues(reader);
2707 reader->depth = 0;
2708 reader->nodetype = XmlNodeType_None;
2709 reader->resumestate = XmlReadResumeState_Initial;
2710 memset(reader->resume, 0, sizeof(reader->resume));
2711 reader->is_empty_element = FALSE;
2714 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2716 xmlreader *This = impl_from_IXmlReader(iface);
2717 LONG ref = InterlockedDecrement(&This->ref);
2719 TRACE("(%p)->(%d)\n", This, ref);
2721 if (ref == 0)
2723 IMalloc *imalloc = This->imalloc;
2724 reader_reset_parser(This);
2725 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2726 if (This->resolver) IXmlResolver_Release(This->resolver);
2727 if (This->mlang) IUnknown_Release(This->mlang);
2728 reader_free(This, This);
2729 if (imalloc) IMalloc_Release(imalloc);
2732 return ref;
2735 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2737 xmlreader *This = impl_from_IXmlReader(iface);
2738 IXmlReaderInput *readerinput;
2739 HRESULT hr;
2741 TRACE("(%p)->(%p)\n", This, input);
2743 if (This->input)
2745 readerinput_release_stream(This->input);
2746 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2747 This->input = NULL;
2750 reader_reset_parser(This);
2752 /* just reset current input */
2753 if (!input)
2755 This->state = XmlReadState_Initial;
2756 return S_OK;
2759 /* now try IXmlReaderInput, ISequentialStream, IStream */
2760 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2761 if (hr == S_OK)
2763 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2764 This->input = impl_from_IXmlReaderInput(readerinput);
2765 else
2767 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2768 readerinput, readerinput->lpVtbl);
2769 IUnknown_Release(readerinput);
2770 return E_FAIL;
2775 if (hr != S_OK || !readerinput)
2777 /* create IXmlReaderInput basing on supplied interface */
2778 hr = CreateXmlReaderInputWithEncodingName(input,
2779 This->imalloc, NULL, FALSE, NULL, &readerinput);
2780 if (hr != S_OK) return hr;
2781 This->input = impl_from_IXmlReaderInput(readerinput);
2784 /* set stream for supplied IXmlReaderInput */
2785 hr = readerinput_query_for_stream(This->input);
2786 if (hr == S_OK)
2788 This->state = XmlReadState_Initial;
2789 This->instate = XmlReadInState_Initial;
2791 return hr;
2794 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2796 xmlreader *This = impl_from_IXmlReader(iface);
2798 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2800 if (!value) return E_INVALIDARG;
2802 switch (property)
2804 case XmlReaderProperty_MultiLanguage:
2805 *value = (LONG_PTR)This->mlang;
2806 if (This->mlang)
2807 IUnknown_AddRef(This->mlang);
2808 break;
2809 case XmlReaderProperty_XmlResolver:
2810 *value = (LONG_PTR)This->resolver;
2811 if (This->resolver)
2812 IXmlResolver_AddRef(This->resolver);
2813 break;
2814 case XmlReaderProperty_DtdProcessing:
2815 *value = This->dtdmode;
2816 break;
2817 case XmlReaderProperty_ReadState:
2818 *value = This->state;
2819 break;
2820 case XmlReaderProperty_MaxElementDepth:
2821 *value = This->max_depth;
2822 break;
2823 default:
2824 FIXME("Unimplemented property (%u)\n", property);
2825 return E_NOTIMPL;
2828 return S_OK;
2831 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2833 xmlreader *This = impl_from_IXmlReader(iface);
2835 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2837 switch (property)
2839 case XmlReaderProperty_MultiLanguage:
2840 if (This->mlang)
2841 IUnknown_Release(This->mlang);
2842 This->mlang = (IUnknown*)value;
2843 if (This->mlang)
2844 IUnknown_AddRef(This->mlang);
2845 if (This->mlang)
2846 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2847 break;
2848 case XmlReaderProperty_XmlResolver:
2849 if (This->resolver)
2850 IXmlResolver_Release(This->resolver);
2851 This->resolver = (IXmlResolver*)value;
2852 if (This->resolver)
2853 IXmlResolver_AddRef(This->resolver);
2854 break;
2855 case XmlReaderProperty_DtdProcessing:
2856 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2857 This->dtdmode = value;
2858 break;
2859 case XmlReaderProperty_MaxElementDepth:
2860 This->max_depth = value;
2861 break;
2862 default:
2863 FIXME("Unimplemented property (%u)\n", property);
2864 return E_NOTIMPL;
2867 return S_OK;
2870 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2872 xmlreader *This = impl_from_IXmlReader(iface);
2873 XmlNodeType oldtype = This->nodetype;
2874 XmlNodeType type;
2875 HRESULT hr;
2877 TRACE("(%p)->(%p)\n", This, nodetype);
2879 if (!nodetype)
2880 nodetype = &type;
2882 switch (This->state)
2884 case XmlReadState_Closed:
2885 hr = S_FALSE;
2886 break;
2887 case XmlReadState_Error:
2888 hr = This->error;
2889 break;
2890 default:
2891 hr = reader_parse_nextnode(This);
2892 if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype)
2893 This->state = XmlReadState_Interactive;
2895 if (FAILED(hr))
2897 This->state = XmlReadState_Error;
2898 This->nodetype = XmlNodeType_None;
2899 This->depth = 0;
2900 This->error = hr;
2904 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2905 *nodetype = This->nodetype;
2907 return hr;
2910 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2912 xmlreader *This = impl_from_IXmlReader(iface);
2914 TRACE("(%p)->(%p)\n", This, node_type);
2916 if (!node_type)
2917 return E_INVALIDARG;
2919 *node_type = reader_get_nodetype(This);
2920 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2923 static void reader_set_current_attribute(xmlreader *reader, struct attribute *attr)
2925 reader->attr = attr;
2926 reader->chunk_read_off = 0;
2927 reader_set_strvalue(reader, StringValue_Prefix, &attr->prefix);
2928 reader_set_strvalue(reader, StringValue_QualifiedName, &attr->qname);
2929 reader_set_strvalue(reader, StringValue_Value, &attr->value);
2932 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2934 if (!reader->attr_count)
2935 return S_FALSE;
2937 if (!reader->attr)
2938 reader_inc_depth(reader);
2940 reader_set_current_attribute(reader, LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry));
2942 return S_OK;
2945 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2947 xmlreader *This = impl_from_IXmlReader(iface);
2949 TRACE("(%p)\n", This);
2951 return reader_move_to_first_attribute(This);
2954 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2956 xmlreader *This = impl_from_IXmlReader(iface);
2957 const struct list *next;
2959 TRACE("(%p)\n", This);
2961 if (!This->attr_count) return S_FALSE;
2963 if (!This->attr)
2964 return reader_move_to_first_attribute(This);
2966 next = list_next(&This->attrs, &This->attr->entry);
2967 if (next)
2968 reader_set_current_attribute(This, LIST_ENTRY(next, struct attribute, entry));
2970 return next ? S_OK : S_FALSE;
2973 static void reader_get_attribute_ns_uri(xmlreader *reader, struct attribute *attr, const WCHAR **uri, UINT *len)
2975 static const WCHAR xmlns_uriW[] = L"http://www.w3.org/2000/xmlns/";
2976 static const WCHAR xml_uriW[] = L"http://www.w3.org/XML/1998/namespace";
2978 /* Check for reserved prefixes first */
2979 if ((strval_eq(reader, &attr->prefix, &strval_empty) && strval_eq(reader, &attr->localname, &strval_xmlns)) ||
2980 strval_eq(reader, &attr->prefix, &strval_xmlns))
2982 *uri = xmlns_uriW;
2983 *len = ARRAY_SIZE(xmlns_uriW) - 1;
2985 else if (strval_eq(reader, &attr->prefix, &strval_xml))
2987 *uri = xml_uriW;
2988 *len = ARRAY_SIZE(xml_uriW) - 1;
2990 else
2992 *uri = NULL;
2993 *len = 0;
2996 if (!*uri)
2998 struct ns *ns;
3000 if ((ns = reader_lookup_ns(reader, &attr->prefix)))
3002 *uri = ns->uri.str;
3003 *len = ns->uri.len;
3005 else
3007 *uri = emptyW;
3008 *len = 0;
3013 static void reader_get_attribute_local_name(xmlreader *reader, struct attribute *attr, const WCHAR **name, UINT *len)
3015 if (attr->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3017 *name = xmlnsW;
3018 *len = 5;
3020 else if (attr->flags & ATTRIBUTE_NS_DEFINITION)
3022 const struct ns *ns = reader_lookup_ns(reader, &attr->localname);
3023 *name = ns->prefix.str;
3024 *len = ns->prefix.len;
3026 else
3028 *name = attr->localname.str;
3029 *len = attr->localname.len;
3033 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
3034 const WCHAR *local_name, const WCHAR *namespace_uri)
3036 xmlreader *This = impl_from_IXmlReader(iface);
3037 UINT target_name_len, target_uri_len;
3038 struct attribute *attr;
3040 TRACE("(%p)->(%s %s)\n", This, debugstr_w(local_name), debugstr_w(namespace_uri));
3042 if (!local_name)
3043 return E_INVALIDARG;
3045 if (!This->attr_count)
3046 return S_FALSE;
3048 if (!namespace_uri)
3049 namespace_uri = emptyW;
3051 target_name_len = lstrlenW(local_name);
3052 target_uri_len = lstrlenW(namespace_uri);
3054 LIST_FOR_EACH_ENTRY(attr, &This->attrs, struct attribute, entry)
3056 UINT name_len, uri_len;
3057 const WCHAR *name, *uri;
3059 reader_get_attribute_local_name(This, attr, &name, &name_len);
3060 reader_get_attribute_ns_uri(This, attr, &uri, &uri_len);
3062 if (name_len == target_name_len && uri_len == target_uri_len &&
3063 !wcscmp(name, local_name) && !wcscmp(uri, namespace_uri))
3065 reader_set_current_attribute(This, attr);
3066 return S_OK;
3070 return S_FALSE;
3073 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
3075 xmlreader *This = impl_from_IXmlReader(iface);
3077 TRACE("(%p)\n", This);
3079 if (!This->attr_count) return S_FALSE;
3081 if (This->attr)
3082 reader_dec_depth(This);
3084 This->attr = NULL;
3086 /* FIXME: support other node types with 'attributes' like DTD */
3087 if (This->is_empty_element) {
3088 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
3089 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
3091 else {
3092 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3093 if (element) {
3094 reader_set_strvalue(This, StringValue_Prefix, &element->prefix);
3095 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
3098 This->chunk_read_off = 0;
3099 reader_set_strvalue(This, StringValue_Value, &strval_empty);
3101 return S_OK;
3104 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3106 xmlreader *This = impl_from_IXmlReader(iface);
3107 struct attribute *attribute = This->attr;
3108 struct element *element;
3109 UINT length;
3111 TRACE("(%p)->(%p %p)\n", This, name, len);
3113 if (!len)
3114 len = &length;
3116 switch (reader_get_nodetype(This))
3118 case XmlNodeType_Text:
3119 case XmlNodeType_CDATA:
3120 case XmlNodeType_Comment:
3121 case XmlNodeType_Whitespace:
3122 *name = emptyW;
3123 *len = 0;
3124 break;
3125 case XmlNodeType_Element:
3126 case XmlNodeType_EndElement:
3127 element = reader_get_element(This);
3128 if (element->prefix.len)
3130 *name = element->qname.str;
3131 *len = element->qname.len;
3133 else
3135 *name = element->localname.str;
3136 *len = element->localname.len;
3138 break;
3139 case XmlNodeType_Attribute:
3140 if (attribute->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3142 *name = xmlnsW;
3143 *len = 5;
3144 } else if (attribute->prefix.len)
3146 *name = This->strvalues[StringValue_QualifiedName].str;
3147 *len = This->strvalues[StringValue_QualifiedName].len;
3149 else
3151 *name = attribute->localname.str;
3152 *len = attribute->localname.len;
3154 break;
3155 default:
3156 *name = This->strvalues[StringValue_QualifiedName].str;
3157 *len = This->strvalues[StringValue_QualifiedName].len;
3158 break;
3161 return S_OK;
3164 static struct ns *reader_lookup_nsdef(xmlreader *reader)
3166 if (list_empty(&reader->nsdef))
3167 return NULL;
3169 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
3172 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
3174 xmlreader *This = impl_from_IXmlReader(iface);
3175 const strval *prefix = &This->strvalues[StringValue_Prefix];
3176 XmlNodeType nodetype;
3177 struct ns *ns;
3178 UINT length;
3180 TRACE("(%p %p %p)\n", iface, uri, len);
3182 if (!len)
3183 len = &length;
3185 switch ((nodetype = reader_get_nodetype(This)))
3187 case XmlNodeType_Attribute:
3188 reader_get_attribute_ns_uri(This, This->attr, uri, len);
3189 break;
3190 case XmlNodeType_Element:
3191 case XmlNodeType_EndElement:
3193 ns = reader_lookup_ns(This, prefix);
3195 /* pick top default ns if any */
3196 if (!ns)
3197 ns = reader_lookup_nsdef(This);
3199 if (ns) {
3200 *uri = ns->uri.str;
3201 *len = ns->uri.len;
3203 else {
3204 *uri = emptyW;
3205 *len = 0;
3208 break;
3209 case XmlNodeType_Text:
3210 case XmlNodeType_CDATA:
3211 case XmlNodeType_ProcessingInstruction:
3212 case XmlNodeType_Comment:
3213 case XmlNodeType_Whitespace:
3214 case XmlNodeType_XmlDeclaration:
3215 *uri = emptyW;
3216 *len = 0;
3217 break;
3218 default:
3219 FIXME("Unhandled node type %d\n", nodetype);
3220 *uri = NULL;
3221 *len = 0;
3222 return E_NOTIMPL;
3225 return S_OK;
3228 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3230 xmlreader *This = impl_from_IXmlReader(iface);
3231 struct element *element;
3232 UINT length;
3234 TRACE("(%p)->(%p %p)\n", This, name, len);
3236 if (!len)
3237 len = &length;
3239 switch (reader_get_nodetype(This))
3241 case XmlNodeType_Text:
3242 case XmlNodeType_CDATA:
3243 case XmlNodeType_Comment:
3244 case XmlNodeType_Whitespace:
3245 *name = emptyW;
3246 *len = 0;
3247 break;
3248 case XmlNodeType_Element:
3249 case XmlNodeType_EndElement:
3250 element = reader_get_element(This);
3251 *name = element->localname.str;
3252 *len = element->localname.len;
3253 break;
3254 case XmlNodeType_Attribute:
3255 reader_get_attribute_local_name(This, This->attr, name, len);
3256 break;
3257 default:
3258 *name = This->strvalues[StringValue_LocalName].str;
3259 *len = This->strvalues[StringValue_LocalName].len;
3260 break;
3263 return S_OK;
3266 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, const WCHAR **ret, UINT *len)
3268 xmlreader *This = impl_from_IXmlReader(iface);
3269 XmlNodeType nodetype;
3270 UINT length;
3272 TRACE("(%p)->(%p %p)\n", This, ret, len);
3274 if (!len)
3275 len = &length;
3277 *ret = emptyW;
3278 *len = 0;
3280 switch ((nodetype = reader_get_nodetype(This)))
3282 case XmlNodeType_Element:
3283 case XmlNodeType_EndElement:
3284 case XmlNodeType_Attribute:
3286 const strval *prefix = &This->strvalues[StringValue_Prefix];
3287 struct ns *ns;
3289 if (strval_eq(This, prefix, &strval_xml))
3291 *ret = xmlW;
3292 *len = 3;
3294 else if (strval_eq(This, prefix, &strval_xmlns))
3296 *ret = xmlnsW;
3297 *len = 5;
3299 else if ((ns = reader_lookup_ns(This, prefix)))
3301 *ret = ns->prefix.str;
3302 *len = ns->prefix.len;
3305 break;
3307 default:
3311 return S_OK;
3314 static const strval *reader_get_value(xmlreader *reader, BOOL ensure_allocated)
3316 strval *val;
3318 switch (reader_get_nodetype(reader))
3320 case XmlNodeType_XmlDeclaration:
3321 case XmlNodeType_EndElement:
3322 case XmlNodeType_None:
3323 return &strval_empty;
3324 case XmlNodeType_Attribute:
3325 /* For namespace definition attributes return values from namespace list */
3326 if (reader->attr->flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
3328 struct ns *ns;
3330 if (!(ns = reader_lookup_ns(reader, &reader->attr->localname)))
3331 ns = reader_lookup_nsdef(reader);
3333 return &ns->uri;
3335 return &reader->attr->value;
3336 default:
3337 break;
3340 val = &reader->strvalues[StringValue_Value];
3341 if (!val->str && ensure_allocated)
3343 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3344 if (!ptr) return NULL;
3345 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3346 ptr[val->len] = 0;
3347 val->str = ptr;
3350 return val;
3353 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3355 xmlreader *reader = impl_from_IXmlReader(iface);
3356 const strval *val = &reader->strvalues[StringValue_Value];
3357 UINT off;
3359 TRACE("(%p)->(%p %p)\n", reader, value, len);
3361 *value = NULL;
3363 if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader))
3365 XmlNodeType type;
3366 HRESULT hr;
3368 hr = IXmlReader_Read(iface, &type);
3369 if (FAILED(hr)) return hr;
3371 /* return if still pending, partially read values are not reported */
3372 if (is_reader_pending(reader)) return E_PENDING;
3375 val = reader_get_value(reader, TRUE);
3376 if (!val)
3377 return E_OUTOFMEMORY;
3379 off = abs(reader->chunk_read_off);
3380 assert(off <= val->len);
3381 *value = val->str + off;
3382 if (len) *len = val->len - off;
3383 reader->chunk_read_off = -off;
3384 return S_OK;
3387 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3389 xmlreader *reader = impl_from_IXmlReader(iface);
3390 const strval *val;
3391 UINT len = 0;
3393 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3395 val = reader_get_value(reader, FALSE);
3397 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3398 if (reader->chunk_read_off >= 0)
3400 assert(reader->chunk_read_off <= val->len);
3401 len = min(val->len - reader->chunk_read_off, chunk_size);
3403 if (read) *read = len;
3405 if (len)
3407 memcpy(buffer, reader_get_strptr(reader, val) + reader->chunk_read_off, len*sizeof(WCHAR));
3408 reader->chunk_read_off += len;
3411 return len || !chunk_size ? S_OK : S_FALSE;
3414 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3415 LPCWSTR *baseUri,
3416 UINT *baseUri_length)
3418 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3419 return E_NOTIMPL;
3422 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3424 FIXME("(%p): stub\n", iface);
3425 return FALSE;
3428 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3430 xmlreader *This = impl_from_IXmlReader(iface);
3431 TRACE("(%p)\n", This);
3432 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3433 when current node is start tag of an element */
3434 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3437 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *line_number)
3439 xmlreader *This = impl_from_IXmlReader(iface);
3440 const struct element *element;
3442 TRACE("(%p %p)\n", This, line_number);
3444 if (!line_number)
3445 return E_INVALIDARG;
3447 switch (reader_get_nodetype(This))
3449 case XmlNodeType_Element:
3450 case XmlNodeType_EndElement:
3451 element = reader_get_element(This);
3452 *line_number = element->position.line_number;
3453 break;
3454 case XmlNodeType_Attribute:
3455 *line_number = This->attr->position.line_number;
3456 break;
3457 case XmlNodeType_Whitespace:
3458 case XmlNodeType_XmlDeclaration:
3459 *line_number = This->empty_element.position.line_number;
3460 break;
3461 default:
3462 *line_number = This->position.line_number;
3463 break;
3466 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3469 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *line_position)
3471 xmlreader *This = impl_from_IXmlReader(iface);
3472 const struct element *element;
3474 TRACE("(%p %p)\n", This, line_position);
3476 if (!line_position)
3477 return E_INVALIDARG;
3479 switch (reader_get_nodetype(This))
3481 case XmlNodeType_Element:
3482 case XmlNodeType_EndElement:
3483 element = reader_get_element(This);
3484 *line_position = element->position.line_position;
3485 break;
3486 case XmlNodeType_Attribute:
3487 *line_position = This->attr->position.line_position;
3488 break;
3489 case XmlNodeType_Whitespace:
3490 case XmlNodeType_XmlDeclaration:
3491 *line_position = This->empty_element.position.line_position;
3492 break;
3493 default:
3494 *line_position = This->position.line_position;
3495 break;
3498 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3501 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3503 xmlreader *This = impl_from_IXmlReader(iface);
3505 TRACE("(%p)->(%p)\n", This, count);
3507 if (!count) return E_INVALIDARG;
3509 *count = This->attr_count;
3510 return S_OK;
3513 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3515 xmlreader *This = impl_from_IXmlReader(iface);
3516 TRACE("(%p)->(%p)\n", This, depth);
3517 *depth = This->depth;
3518 return S_OK;
3521 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3523 xmlreader *This = impl_from_IXmlReader(iface);
3524 TRACE("(%p)\n", iface);
3525 return This->state == XmlReadState_EndOfFile;
3528 static const struct IXmlReaderVtbl xmlreader_vtbl =
3530 xmlreader_QueryInterface,
3531 xmlreader_AddRef,
3532 xmlreader_Release,
3533 xmlreader_SetInput,
3534 xmlreader_GetProperty,
3535 xmlreader_SetProperty,
3536 xmlreader_Read,
3537 xmlreader_GetNodeType,
3538 xmlreader_MoveToFirstAttribute,
3539 xmlreader_MoveToNextAttribute,
3540 xmlreader_MoveToAttributeByName,
3541 xmlreader_MoveToElement,
3542 xmlreader_GetQualifiedName,
3543 xmlreader_GetNamespaceUri,
3544 xmlreader_GetLocalName,
3545 xmlreader_GetPrefix,
3546 xmlreader_GetValue,
3547 xmlreader_ReadValueChunk,
3548 xmlreader_GetBaseUri,
3549 xmlreader_IsDefault,
3550 xmlreader_IsEmptyElement,
3551 xmlreader_GetLineNumber,
3552 xmlreader_GetLinePosition,
3553 xmlreader_GetAttributeCount,
3554 xmlreader_GetDepth,
3555 xmlreader_IsEOF
3558 /** IXmlReaderInput **/
3559 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3561 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3563 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3565 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3566 IsEqualGUID(riid, &IID_IUnknown))
3568 *ppvObject = iface;
3570 else
3572 WARN("interface %s not implemented\n", debugstr_guid(riid));
3573 *ppvObject = NULL;
3574 return E_NOINTERFACE;
3577 IUnknown_AddRef(iface);
3579 return S_OK;
3582 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3584 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3585 ULONG ref = InterlockedIncrement(&This->ref);
3586 TRACE("(%p)->(%d)\n", This, ref);
3587 return ref;
3590 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3592 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3593 LONG ref = InterlockedDecrement(&This->ref);
3595 TRACE("(%p)->(%d)\n", This, ref);
3597 if (ref == 0)
3599 IMalloc *imalloc = This->imalloc;
3600 if (This->input) IUnknown_Release(This->input);
3601 if (This->stream) ISequentialStream_Release(This->stream);
3602 if (This->buffer) free_input_buffer(This->buffer);
3603 readerinput_free(This, This->baseuri);
3604 readerinput_free(This, This);
3605 if (imalloc) IMalloc_Release(imalloc);
3608 return ref;
3611 static const struct IUnknownVtbl xmlreaderinputvtbl =
3613 xmlreaderinput_QueryInterface,
3614 xmlreaderinput_AddRef,
3615 xmlreaderinput_Release
3618 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3620 xmlreader *reader;
3621 HRESULT hr;
3622 int i;
3624 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3626 if (imalloc)
3627 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3628 else
3629 reader = heap_alloc(sizeof(*reader));
3630 if (!reader)
3631 return E_OUTOFMEMORY;
3633 memset(reader, 0, sizeof(*reader));
3634 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3635 reader->ref = 1;
3636 reader->state = XmlReadState_Closed;
3637 reader->instate = XmlReadInState_Initial;
3638 reader->resumestate = XmlReadResumeState_Initial;
3639 reader->dtdmode = DtdProcessing_Prohibit;
3640 reader->imalloc = imalloc;
3641 if (imalloc) IMalloc_AddRef(imalloc);
3642 reader->nodetype = XmlNodeType_None;
3643 list_init(&reader->attrs);
3644 list_init(&reader->nsdef);
3645 list_init(&reader->ns);
3646 list_init(&reader->elements);
3647 reader->max_depth = 256;
3649 reader->chunk_read_off = 0;
3650 for (i = 0; i < StringValue_Last; i++)
3651 reader->strvalues[i] = strval_empty;
3653 hr = IXmlReader_QueryInterface(&reader->IXmlReader_iface, riid, obj);
3654 IXmlReader_Release(&reader->IXmlReader_iface);
3656 TRACE("returning iface %p, hr %#x\n", *obj, hr);
3658 return hr;
3661 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3662 IMalloc *imalloc,
3663 LPCWSTR encoding,
3664 BOOL hint,
3665 LPCWSTR base_uri,
3666 IXmlReaderInput **ppInput)
3668 xmlreaderinput *readerinput;
3669 HRESULT hr;
3671 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3672 hint, wine_dbgstr_w(base_uri), ppInput);
3674 if (!stream || !ppInput) return E_INVALIDARG;
3676 if (imalloc)
3677 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3678 else
3679 readerinput = heap_alloc(sizeof(*readerinput));
3680 if(!readerinput) return E_OUTOFMEMORY;
3682 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3683 readerinput->ref = 1;
3684 readerinput->imalloc = imalloc;
3685 readerinput->stream = NULL;
3686 if (imalloc) IMalloc_AddRef(imalloc);
3687 readerinput->encoding = parse_encoding_name(encoding, -1);
3688 readerinput->hint = hint;
3689 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3690 readerinput->pending = 0;
3692 hr = alloc_input_buffer(readerinput);
3693 if (hr != S_OK)
3695 readerinput_free(readerinput, readerinput->baseuri);
3696 readerinput_free(readerinput, readerinput);
3697 if (imalloc) IMalloc_Release(imalloc);
3698 return hr;
3700 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3702 *ppInput = &readerinput->IXmlReaderInput_iface;
3704 TRACE("returning iface %p\n", *ppInput);
3706 return S_OK;