xmllite: Don't lose terminating character when shrinking buffer.
[wine.git] / dlls / xmllite / reader.c
blob834c36ae18c6367b2741349babc7eae7b7b7d435
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "initguid.h"
29 #include "objbase.h"
30 #include "xmllite.h"
31 #include "xmllite_private.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 BOOL is_namestartchar(WCHAR ch);
88 static const char *debugstr_nodetype(XmlNodeType nodetype)
90 static const char * const type_names[] =
92 "None",
93 "Element",
94 "Attribute",
95 "Text",
96 "CDATA",
97 "",
98 "",
99 "ProcessingInstruction",
100 "Comment",
102 "DocumentType",
105 "Whitespace",
107 "EndElement",
109 "XmlDeclaration"
112 if (nodetype > _XmlNodeType_Last)
113 return wine_dbg_sprintf("unknown type=%d", nodetype);
115 return type_names[nodetype];
118 static const char *debugstr_reader_prop(XmlReaderProperty prop)
120 static const char * const prop_names[] =
122 "MultiLanguage",
123 "ConformanceLevel",
124 "RandomAccess",
125 "XmlResolver",
126 "DtdProcessing",
127 "ReadState",
128 "MaxElementDepth",
129 "MaxEntityExpansion"
132 if (prop > _XmlReaderProperty_Last)
133 return wine_dbg_sprintf("unknown property=%d", prop);
135 return prop_names[prop];
138 struct xml_encoding_data
140 const WCHAR *name;
141 xml_encoding enc;
142 UINT cp;
145 static const struct xml_encoding_data xml_encoding_map[] =
147 { L"US-ASCII", XmlEncoding_USASCII, 20127 },
148 { L"UTF-16", XmlEncoding_UTF16, 1200 },
149 { L"UTF-8", XmlEncoding_UTF8, CP_UTF8 },
152 const WCHAR *get_encoding_name(xml_encoding encoding)
154 return xml_encoding_map[encoding].name;
157 xml_encoding get_encoding_from_codepage(UINT codepage)
159 int i;
160 for (i = 0; i < ARRAY_SIZE(xml_encoding_map); i++)
162 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
164 return XmlEncoding_Unknown;
167 typedef struct
169 char *data;
170 UINT cur;
171 unsigned int allocated;
172 unsigned int written;
173 BOOL prev_cr;
174 } encoded_buffer;
176 typedef struct input_buffer input_buffer;
178 typedef struct
180 IXmlReaderInput IXmlReaderInput_iface;
181 LONG ref;
182 /* reference passed on IXmlReaderInput creation, is kept when input is created */
183 IUnknown *input;
184 IMalloc *imalloc;
185 xml_encoding encoding;
186 BOOL hint;
187 WCHAR *baseuri;
188 /* stream reference set after SetInput() call from reader,
189 stored as sequential stream, cause currently
190 optimizations possible with IStream aren't implemented */
191 ISequentialStream *stream;
192 input_buffer *buffer;
193 unsigned int pending : 1;
194 } xmlreaderinput;
196 static const struct IUnknownVtbl xmlreaderinputvtbl;
198 /* Structure to hold parsed string of specific length.
200 Reader stores node value as 'start' pointer, on request
201 a null-terminated version of it is allocated.
203 To init a strval variable use reader_init_strval(),
204 to set strval as a reader value use reader_set_strval().
206 typedef struct
208 WCHAR *str; /* allocated null-terminated string */
209 UINT len; /* length in WCHARs, altered after ReadValueChunk */
210 UINT start; /* input position where value starts */
211 } strval;
213 static WCHAR emptyW[] = L"";
214 static WCHAR xmlW[] = L"xml";
215 static WCHAR xmlnsW[] = L"xmlns";
216 static const strval strval_empty = { emptyW, 0 };
217 static const strval strval_xml = { xmlW, 3 };
218 static const strval strval_xmlns = { xmlnsW, 5 };
220 struct reader_position
222 UINT line_number;
223 UINT line_position;
226 enum attribute_flags
228 ATTRIBUTE_NS_DEFINITION = 0x1,
229 ATTRIBUTE_DEFAULT_NS_DEFINITION = 0x2,
232 struct attribute
234 struct list entry;
235 strval prefix;
236 strval localname;
237 strval qname;
238 strval value;
239 struct reader_position position;
240 unsigned int flags;
243 struct element
245 struct list entry;
246 strval prefix;
247 strval localname;
248 strval qname;
249 struct reader_position position;
252 struct ns
254 struct list entry;
255 strval prefix;
256 strval uri;
257 struct element *element;
260 typedef struct
262 IXmlReader IXmlReader_iface;
263 LONG ref;
264 xmlreaderinput *input;
265 IMalloc *imalloc;
266 XmlReadState state;
267 HRESULT error; /* error set on XmlReadState_Error */
268 XmlReaderInternalState instate;
269 XmlReaderResumeState resumestate;
270 XmlNodeType nodetype;
271 DtdProcessing dtdmode;
272 IXmlResolver *resolver;
273 IUnknown *mlang;
274 struct reader_position position;
275 struct list attrs; /* attributes list for current node */
276 struct attribute *attr; /* current attribute */
277 UINT attr_count;
278 struct list nsdef;
279 struct list ns;
280 struct list elements;
281 int chunk_read_off;
282 strval strvalues[StringValue_Last];
283 UINT depth;
284 UINT max_depth;
285 BOOL is_empty_element;
286 struct element empty_element; /* used for empty elements without end tag <a />,
287 and to keep <?xml reader position */
288 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
289 } xmlreader;
291 struct input_buffer
293 encoded_buffer utf16;
294 encoded_buffer encoded;
295 UINT code_page;
296 xmlreaderinput *input;
299 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
301 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
304 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
306 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
309 /* reader memory allocation functions */
310 static inline void *reader_alloc(xmlreader *reader, size_t len)
312 return m_alloc(reader->imalloc, len);
315 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
317 void *ret = reader_alloc(reader, len);
318 if (ret)
319 memset(ret, 0, len);
320 return ret;
323 static inline void reader_free(xmlreader *reader, void *mem)
325 m_free(reader->imalloc, mem);
328 /* Just return pointer from offset, no attempt to read more. */
329 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
331 encoded_buffer *buffer = &reader->input->buffer->utf16;
332 return (WCHAR*)buffer->data + offset;
335 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
337 return v->str ? v->str : reader_get_ptr2(reader, v->start);
340 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
342 *dest = *src;
344 if (src->str != strval_empty.str)
346 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
347 if (!dest->str) return E_OUTOFMEMORY;
348 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
349 dest->str[dest->len] = 0;
350 dest->start = 0;
353 return S_OK;
356 /* reader input memory allocation functions */
357 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
359 return m_alloc(input->imalloc, len);
362 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
364 return m_realloc(input->imalloc, mem, len);
367 static inline void readerinput_free(xmlreaderinput *input, void *mem)
369 m_free(input->imalloc, mem);
372 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
374 LPWSTR ret = NULL;
376 if(str) {
377 DWORD size;
379 size = (lstrlenW(str)+1)*sizeof(WCHAR);
380 ret = readerinput_alloc(input, size);
381 if (ret) memcpy(ret, str, size);
384 return ret;
387 /* This one frees stored string value if needed */
388 static void reader_free_strvalued(xmlreader *reader, strval *v)
390 if (v->str != strval_empty.str)
392 reader_free(reader, v->str);
393 *v = strval_empty;
397 static void reader_clear_attrs(xmlreader *reader)
399 struct attribute *attr, *attr2;
400 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
402 reader_free_strvalued(reader, &attr->localname);
403 reader_free_strvalued(reader, &attr->value);
404 reader_free(reader, attr);
406 list_init(&reader->attrs);
407 reader->attr_count = 0;
408 reader->attr = NULL;
411 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
412 while we are on a node with attributes */
413 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname,
414 strval *value, const struct reader_position *position, unsigned int flags)
416 struct attribute *attr;
417 HRESULT hr;
419 attr = reader_alloc(reader, sizeof(*attr));
420 if (!attr) return E_OUTOFMEMORY;
422 hr = reader_strvaldup(reader, localname, &attr->localname);
423 if (hr == S_OK)
425 hr = reader_strvaldup(reader, value, &attr->value);
426 if (hr != S_OK)
427 reader_free_strvalued(reader, &attr->value);
429 if (hr != S_OK)
431 reader_free(reader, attr);
432 return hr;
435 if (prefix)
436 attr->prefix = *prefix;
437 else
438 memset(&attr->prefix, 0, sizeof(attr->prefix));
439 attr->qname = qname ? *qname : *localname;
440 attr->position = *position;
441 attr->flags = flags;
442 list_add_tail(&reader->attrs, &attr->entry);
443 reader->attr_count++;
445 return S_OK;
448 /* Returns current element, doesn't check if reader is actually positioned on it. */
449 static struct element *reader_get_element(xmlreader *reader)
451 if (reader->is_empty_element)
452 return &reader->empty_element;
454 return LIST_ENTRY(list_head(&reader->elements), struct element, entry);
457 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
459 v->start = start;
460 v->len = len;
461 v->str = NULL;
464 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
466 return debugstr_wn(reader_get_strptr(reader, v), v->len);
469 /* used to initialize from constant string */
470 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
472 v->start = 0;
473 v->len = len;
474 v->str = str;
477 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
479 reader_free_strvalued(reader, &reader->strvalues[type]);
482 static void reader_free_strvalues(xmlreader *reader)
484 int type;
485 for (type = 0; type < StringValue_Last; type++)
486 reader_free_strvalue(reader, type);
489 /* This helper should only be used to test if strings are the same,
490 it doesn't try to sort. */
491 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
493 if (str1->len != str2->len) return 0;
494 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
497 static void reader_clear_elements(xmlreader *reader)
499 struct element *elem, *elem2;
500 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
502 reader_free_strvalued(reader, &elem->prefix);
503 reader_free_strvalued(reader, &elem->localname);
504 reader_free_strvalued(reader, &elem->qname);
505 reader_free(reader, elem);
507 list_init(&reader->elements);
508 reader_free_strvalued(reader, &reader->empty_element.localname);
509 reader_free_strvalued(reader, &reader->empty_element.qname);
510 reader->is_empty_element = FALSE;
513 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
515 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
516 struct ns *ns;
518 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
519 if (strval_eq(reader, prefix, &ns->prefix))
520 return ns;
523 return NULL;
526 static HRESULT reader_inc_depth(xmlreader *reader)
528 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
531 static void reader_dec_depth(xmlreader *reader)
533 if (reader->depth)
534 reader->depth--;
537 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
539 struct ns *ns;
540 HRESULT hr;
542 ns = reader_alloc(reader, sizeof(*ns));
543 if (!ns) return E_OUTOFMEMORY;
545 if (def)
546 memset(&ns->prefix, 0, sizeof(ns->prefix));
547 else {
548 hr = reader_strvaldup(reader, prefix, &ns->prefix);
549 if (FAILED(hr)) {
550 reader_free(reader, ns);
551 return hr;
555 hr = reader_strvaldup(reader, uri, &ns->uri);
556 if (FAILED(hr)) {
557 reader_free_strvalued(reader, &ns->prefix);
558 reader_free(reader, ns);
559 return hr;
562 ns->element = NULL;
563 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
564 return hr;
567 static void reader_free_element(xmlreader *reader, struct element *element)
569 reader_free_strvalued(reader, &element->prefix);
570 reader_free_strvalued(reader, &element->localname);
571 reader_free_strvalued(reader, &element->qname);
572 reader_free(reader, element);
575 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
577 struct ns *ns;
579 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
580 if (ns->element)
581 break;
582 ns->element = element;
585 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
586 if (ns->element)
587 break;
588 ns->element = element;
592 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
593 strval *qname, const struct reader_position *position)
595 struct element *element;
596 HRESULT hr;
598 element = reader_alloc_zero(reader, sizeof(*element));
599 if (!element)
600 return E_OUTOFMEMORY;
602 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK &&
603 (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK &&
604 (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK)
606 list_add_head(&reader->elements, &element->entry);
607 reader_mark_ns_nodes(reader, element);
608 reader->is_empty_element = FALSE;
609 element->position = *position;
611 else
612 reader_free_element(reader, element);
614 return hr;
617 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
619 struct ns *ns, *ns2;
621 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
622 if (ns->element != element)
623 break;
625 list_remove(&ns->entry);
626 reader_free_strvalued(reader, &ns->prefix);
627 reader_free_strvalued(reader, &ns->uri);
628 reader_free(reader, ns);
631 if (!list_empty(&reader->nsdef)) {
632 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
633 if (ns->element == element) {
634 list_remove(&ns->entry);
635 reader_free_strvalued(reader, &ns->prefix);
636 reader_free_strvalued(reader, &ns->uri);
637 reader_free(reader, ns);
642 static void reader_pop_element(xmlreader *reader)
644 struct element *element;
646 if (list_empty(&reader->elements))
647 return;
649 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
650 list_remove(&element->entry);
652 reader_pop_ns_nodes(reader, element);
653 reader_free_element(reader, element);
655 /* It was a root element, the rest is expected as Misc */
656 if (list_empty(&reader->elements))
657 reader->instate = XmlReadInState_MiscEnd;
660 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
661 means node value is to be determined. */
662 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
664 strval *v = &reader->strvalues[type];
666 reader_free_strvalue(reader, type);
667 if (!value)
669 v->str = NULL;
670 v->start = 0;
671 v->len = 0;
672 return;
675 if (value->str == strval_empty.str)
676 *v = *value;
677 else
679 if (type == StringValue_Value)
681 /* defer allocation for value string */
682 v->str = NULL;
683 v->start = value->start;
684 v->len = value->len;
686 else
688 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
689 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
690 v->str[value->len] = 0;
691 v->len = value->len;
696 static inline int is_reader_pending(xmlreader *reader)
698 return reader->input->pending;
701 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
703 const int initial_len = 0x2000;
704 buffer->data = readerinput_alloc(input, initial_len);
705 if (!buffer->data) return E_OUTOFMEMORY;
707 memset(buffer->data, 0, 4);
708 buffer->cur = 0;
709 buffer->allocated = initial_len;
710 buffer->written = 0;
711 buffer->prev_cr = FALSE;
713 return S_OK;
716 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
718 readerinput_free(input, buffer->data);
721 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
723 if (encoding == XmlEncoding_Unknown)
725 FIXME("unsupported encoding %d\n", encoding);
726 return E_NOTIMPL;
729 *cp = xml_encoding_map[encoding].cp;
731 return S_OK;
734 xml_encoding parse_encoding_name(const WCHAR *name, int len)
736 int min, max, n, c;
738 if (!name) return XmlEncoding_Unknown;
740 min = 0;
741 max = ARRAY_SIZE(xml_encoding_map) - 1;
743 while (min <= max)
745 n = (min+max)/2;
747 if (len != -1)
748 c = wcsnicmp(xml_encoding_map[n].name, name, len);
749 else
750 c = wcsicmp(xml_encoding_map[n].name, name);
751 if (!c)
752 return xml_encoding_map[n].enc;
754 if (c > 0)
755 max = n-1;
756 else
757 min = n+1;
760 return XmlEncoding_Unknown;
763 static HRESULT alloc_input_buffer(xmlreaderinput *input)
765 input_buffer *buffer;
766 HRESULT hr;
768 input->buffer = NULL;
770 buffer = readerinput_alloc(input, sizeof(*buffer));
771 if (!buffer) return E_OUTOFMEMORY;
773 buffer->input = input;
774 buffer->code_page = ~0; /* code page is unknown at this point */
775 hr = init_encoded_buffer(input, &buffer->utf16);
776 if (hr != S_OK) {
777 readerinput_free(input, buffer);
778 return hr;
781 hr = init_encoded_buffer(input, &buffer->encoded);
782 if (hr != S_OK) {
783 free_encoded_buffer(input, &buffer->utf16);
784 readerinput_free(input, buffer);
785 return hr;
788 input->buffer = buffer;
789 return S_OK;
792 static void free_input_buffer(input_buffer *buffer)
794 free_encoded_buffer(buffer->input, &buffer->encoded);
795 free_encoded_buffer(buffer->input, &buffer->utf16);
796 readerinput_free(buffer->input, buffer);
799 static void readerinput_release_stream(xmlreaderinput *readerinput)
801 if (readerinput->stream) {
802 ISequentialStream_Release(readerinput->stream);
803 readerinput->stream = NULL;
807 /* Queries already stored interface for IStream/ISequentialStream.
808 Interface supplied on creation will be overwritten */
809 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
811 HRESULT hr;
813 readerinput_release_stream(readerinput);
814 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
815 if (hr != S_OK)
816 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
818 return hr;
821 /* reads a chunk to raw buffer */
822 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
824 encoded_buffer *buffer = &readerinput->buffer->encoded;
825 /* to make sure aligned length won't exceed allocated length */
826 ULONG len = buffer->allocated - buffer->written - 4;
827 ULONG read;
828 HRESULT hr;
830 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
831 variable width encodings like UTF-8 */
832 len = (len + 3) & ~3;
833 /* try to use allocated space or grow */
834 if (buffer->allocated - buffer->written < len)
836 buffer->allocated *= 2;
837 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
838 len = buffer->allocated - buffer->written;
841 read = 0;
842 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
843 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
844 readerinput->pending = hr == E_PENDING;
845 if (FAILED(hr)) return hr;
846 buffer->written += read;
847 if (!buffer->written)
848 return MX_E_INPUTEND;
850 return hr;
853 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
854 static void readerinput_grow(xmlreaderinput *readerinput, int length)
856 encoded_buffer *buffer = &readerinput->buffer->utf16;
858 length *= sizeof(WCHAR);
859 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
860 if (buffer->allocated < buffer->written + length + 4)
862 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
863 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
864 buffer->allocated = grown_size;
868 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
870 static const char startA[] = {'<','?'};
871 static const char commentA[] = {'<','!'};
872 encoded_buffer *buffer = &readerinput->buffer->encoded;
873 unsigned char *ptr = (unsigned char*)buffer->data;
875 return !memcmp(buffer->data, startA, sizeof(startA)) ||
876 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
877 /* test start byte */
878 (ptr[0] == '<' &&
880 (ptr[1] && (ptr[1] <= 0x7f)) ||
881 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
882 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
883 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
887 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
889 encoded_buffer *buffer = &readerinput->buffer->encoded;
890 static const char utf8bom[] = {0xef,0xbb,0xbf};
891 static const char utf16lebom[] = {0xff,0xfe};
892 WCHAR *ptrW;
894 *enc = XmlEncoding_Unknown;
896 if (buffer->written <= 3)
898 HRESULT hr = readerinput_growraw(readerinput);
899 if (FAILED(hr)) return hr;
900 if (buffer->written < 3) return MX_E_INPUTEND;
903 ptrW = (WCHAR *)buffer->data;
904 /* try start symbols if we have enough data to do that, input buffer should contain
905 first chunk already */
906 if (readerinput_is_utf8(readerinput))
907 *enc = XmlEncoding_UTF8;
908 else if (*ptrW == '<')
910 ptrW++;
911 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
912 *enc = XmlEncoding_UTF16;
914 /* try with BOM now */
915 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
917 buffer->cur += sizeof(utf8bom);
918 *enc = XmlEncoding_UTF8;
920 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
922 buffer->cur += sizeof(utf16lebom);
923 *enc = XmlEncoding_UTF16;
926 return S_OK;
929 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
931 encoded_buffer *buffer = &readerinput->buffer->encoded;
932 int len = buffer->written;
934 assert(len);
936 /* complete single byte char */
937 if (!(buffer->data[len-1] & 0x80)) return len;
939 /* find start byte of multibyte char */
940 while (--len && !(buffer->data[len] & 0xc0))
943 return len;
946 /* Returns byte length of complete char sequence for buffer code page,
947 it's relative to current buffer position which is currently used for BOM handling
948 only. */
949 static int readerinput_get_convlen(xmlreaderinput *readerinput)
951 encoded_buffer *buffer = &readerinput->buffer->encoded;
952 int len;
954 if (readerinput->buffer->code_page == CP_UTF8)
955 len = readerinput_get_utf8_convlen(readerinput);
956 else
957 len = buffer->written;
959 TRACE("%d\n", len - buffer->cur);
960 return len - buffer->cur;
963 /* It's possible that raw buffer has some leftovers from last conversion - some char
964 sequence that doesn't represent a full code point. Length argument should be calculated with
965 readerinput_get_convlen(), if it's -1 it will be calculated here. */
966 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
968 encoded_buffer *buffer = &readerinput->buffer->encoded;
970 if (len == -1)
971 len = readerinput_get_convlen(readerinput);
973 assert(len >= 0);
974 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
975 /* everything below cur is lost too */
976 buffer->written -= len + buffer->cur;
977 /* after this point we don't need cur offset really,
978 it's used only to mark where actual data begins when first chunk is read */
979 buffer->cur = 0;
982 static void fixup_buffer_cr(encoded_buffer *buffer, int off)
984 BOOL prev_cr = buffer->prev_cr;
985 const WCHAR *src;
986 WCHAR *dest;
988 src = dest = (WCHAR*)buffer->data + off;
989 while ((const char*)src < buffer->data + buffer->written)
991 if (*src == '\r')
993 *dest++ = '\n';
994 src++;
995 prev_cr = TRUE;
996 continue;
998 if(prev_cr && *src == '\n')
999 src++;
1000 else
1001 *dest++ = *src++;
1002 prev_cr = FALSE;
1005 buffer->written = (char*)dest - buffer->data;
1006 buffer->prev_cr = prev_cr;
1007 *dest = 0;
1010 /* note that raw buffer content is kept */
1011 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
1013 encoded_buffer *src = &readerinput->buffer->encoded;
1014 encoded_buffer *dest = &readerinput->buffer->utf16;
1015 int len, dest_len;
1016 UINT cp = ~0u;
1017 HRESULT hr;
1018 WCHAR *ptr;
1020 hr = get_code_page(enc, &cp);
1021 if (FAILED(hr)) return;
1023 readerinput->buffer->code_page = cp;
1024 len = readerinput_get_convlen(readerinput);
1026 TRACE("switching to cp %d\n", cp);
1028 /* just copy in this case */
1029 if (enc == XmlEncoding_UTF16)
1031 readerinput_grow(readerinput, len);
1032 memcpy(dest->data, src->data + src->cur, len);
1033 dest->written += len*sizeof(WCHAR);
1035 else
1037 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1038 readerinput_grow(readerinput, dest_len);
1039 ptr = (WCHAR*)dest->data;
1040 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1041 ptr[dest_len] = 0;
1042 dest->written += dest_len*sizeof(WCHAR);
1045 fixup_buffer_cr(dest, 0);
1048 /* shrinks parsed data a buffer begins with */
1049 static void reader_shrink(xmlreader *reader)
1051 encoded_buffer *buffer = &reader->input->buffer->utf16;
1053 /* avoid to move too often using threshold shrink length */
1054 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
1056 buffer->written -= buffer->cur*sizeof(WCHAR);
1057 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
1058 buffer->cur = 0;
1059 *(WCHAR*)&buffer->data[buffer->written] = 0;
1063 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1064 It won't attempt to shrink but will grow destination buffer if needed */
1065 static HRESULT reader_more(xmlreader *reader)
1067 xmlreaderinput *readerinput = reader->input;
1068 encoded_buffer *src = &readerinput->buffer->encoded;
1069 encoded_buffer *dest = &readerinput->buffer->utf16;
1070 UINT cp = readerinput->buffer->code_page;
1071 int len, dest_len, prev_len;
1072 HRESULT hr;
1073 WCHAR *ptr;
1075 /* get some raw data from stream first */
1076 if (FAILED(hr = readerinput_growraw(readerinput)))
1077 return hr;
1079 len = readerinput_get_convlen(readerinput);
1080 prev_len = dest->written / sizeof(WCHAR);
1082 /* just copy for UTF-16 case */
1083 if (cp == 1200)
1085 readerinput_grow(readerinput, len);
1086 memcpy(dest->data + dest->written, src->data + src->cur, len);
1087 dest->written += len*sizeof(WCHAR);
1089 else
1091 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1092 readerinput_grow(readerinput, dest_len);
1093 ptr = (WCHAR*)(dest->data + dest->written);
1094 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1095 ptr[dest_len] = 0;
1096 dest->written += dest_len*sizeof(WCHAR);
1097 /* get rid of processed data */
1098 readerinput_shrinkraw(readerinput, len);
1101 fixup_buffer_cr(dest, prev_len);
1102 return hr;
1105 static inline UINT reader_get_cur(xmlreader *reader)
1107 return reader->input->buffer->utf16.cur;
1110 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1112 encoded_buffer *buffer = &reader->input->buffer->utf16;
1113 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1114 if (!*ptr) reader_more(reader);
1115 return (WCHAR*)buffer->data + buffer->cur;
1118 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1120 int i=0;
1121 const WCHAR *ptr = reader_get_ptr(reader);
1122 while (str[i])
1124 if (!ptr[i])
1126 reader_more(reader);
1127 ptr = reader_get_ptr(reader);
1129 if (str[i] != ptr[i])
1130 return ptr[i] - str[i];
1131 i++;
1133 return 0;
1136 static void reader_update_position(xmlreader *reader, WCHAR ch)
1138 if (ch == '\r')
1139 reader->position.line_position = 1;
1140 else if (ch == '\n')
1142 reader->position.line_number++;
1143 reader->position.line_position = 1;
1145 else
1146 reader->position.line_position++;
1149 /* moves cursor n WCHARs forward */
1150 static void reader_skipn(xmlreader *reader, int n)
1152 encoded_buffer *buffer = &reader->input->buffer->utf16;
1153 const WCHAR *ptr;
1155 while (*(ptr = reader_get_ptr(reader)) && n--)
1157 reader_update_position(reader, *ptr);
1158 buffer->cur++;
1162 static inline BOOL is_wchar_space(WCHAR ch)
1164 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1167 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1168 static int reader_skipspaces(xmlreader *reader)
1170 const WCHAR *ptr = reader_get_ptr(reader);
1171 UINT start = reader_get_cur(reader);
1173 while (is_wchar_space(*ptr))
1175 reader_skipn(reader, 1);
1176 ptr = reader_get_ptr(reader);
1179 return reader_get_cur(reader) - start;
1182 /* [26] VersionNum ::= '1.' [0-9]+ */
1183 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1185 WCHAR *ptr, *ptr2;
1186 UINT start;
1188 if (reader_cmp(reader, L"1.")) return WC_E_XMLDECL;
1190 start = reader_get_cur(reader);
1191 /* skip "1." */
1192 reader_skipn(reader, 2);
1194 ptr2 = ptr = reader_get_ptr(reader);
1195 while (*ptr >= '0' && *ptr <= '9')
1197 reader_skipn(reader, 1);
1198 ptr = reader_get_ptr(reader);
1201 if (ptr2 == ptr) return WC_E_DIGIT;
1202 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1203 TRACE("version=%s\n", debug_strval(reader, val));
1204 return S_OK;
1207 /* [25] Eq ::= S? '=' S? */
1208 static HRESULT reader_parse_eq(xmlreader *reader)
1210 reader_skipspaces(reader);
1211 if (reader_cmp(reader, L"=")) return WC_E_EQUAL;
1212 /* skip '=' */
1213 reader_skipn(reader, 1);
1214 reader_skipspaces(reader);
1215 return S_OK;
1218 static BOOL reader_is_quote(xmlreader *reader)
1220 return !reader_cmp(reader, L"\'") || !reader_cmp(reader, L"\"");
1223 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1224 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1226 struct reader_position position;
1227 strval val, name;
1228 HRESULT hr;
1230 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1232 position = reader->position;
1233 if (reader_cmp(reader, L"version")) return WC_E_XMLDECL;
1234 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1235 /* skip 'version' */
1236 reader_skipn(reader, 7);
1238 hr = reader_parse_eq(reader);
1239 if (FAILED(hr)) return hr;
1241 if (!reader_is_quote(reader))
1242 return WC_E_QUOTE;
1243 /* skip "'"|'"' */
1244 reader_skipn(reader, 1);
1246 hr = reader_parse_versionnum(reader, &val);
1247 if (FAILED(hr)) return hr;
1249 if (!reader_is_quote(reader))
1250 return WC_E_QUOTE;
1252 /* skip "'"|'"' */
1253 reader_skipn(reader, 1);
1255 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1258 /* ([A-Za-z0-9._] | '-') */
1259 static inline BOOL is_wchar_encname(WCHAR ch)
1261 return ((ch >= 'A' && ch <= 'Z') ||
1262 (ch >= 'a' && ch <= 'z') ||
1263 (ch >= '0' && ch <= '9') ||
1264 (ch == '.') || (ch == '_') ||
1265 (ch == '-'));
1268 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1269 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1271 WCHAR *start = reader_get_ptr(reader), *ptr;
1272 xml_encoding enc;
1273 int len;
1275 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1276 return WC_E_ENCNAME;
1278 val->start = reader_get_cur(reader);
1280 ptr = start;
1281 while (is_wchar_encname(*++ptr))
1284 len = ptr - start;
1285 enc = parse_encoding_name(start, len);
1286 TRACE("encoding name %s\n", debugstr_wn(start, len));
1287 val->str = start;
1288 val->len = len;
1290 if (enc == XmlEncoding_Unknown)
1291 return WC_E_ENCNAME;
1293 /* skip encoding name */
1294 reader_skipn(reader, len);
1295 return S_OK;
1298 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1299 static HRESULT reader_parse_encdecl(xmlreader *reader)
1301 struct reader_position position;
1302 strval name, val;
1303 HRESULT hr;
1305 if (!reader_skipspaces(reader)) return S_FALSE;
1307 position = reader->position;
1308 if (reader_cmp(reader, L"encoding")) return S_FALSE;
1309 name.str = reader_get_ptr(reader);
1310 name.start = reader_get_cur(reader);
1311 name.len = 8;
1312 /* skip 'encoding' */
1313 reader_skipn(reader, 8);
1315 hr = reader_parse_eq(reader);
1316 if (FAILED(hr)) return hr;
1318 if (!reader_is_quote(reader))
1319 return WC_E_QUOTE;
1320 /* skip "'"|'"' */
1321 reader_skipn(reader, 1);
1323 hr = reader_parse_encname(reader, &val);
1324 if (FAILED(hr)) return hr;
1326 if (!reader_is_quote(reader))
1327 return WC_E_QUOTE;
1329 /* skip "'"|'"' */
1330 reader_skipn(reader, 1);
1332 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1335 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1336 static HRESULT reader_parse_sddecl(xmlreader *reader)
1338 struct reader_position position;
1339 strval name, val;
1340 UINT start;
1341 HRESULT hr;
1343 if (!reader_skipspaces(reader)) return S_FALSE;
1345 position = reader->position;
1346 if (reader_cmp(reader, L"standalone")) return S_FALSE;
1347 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1348 /* skip 'standalone' */
1349 reader_skipn(reader, 10);
1351 hr = reader_parse_eq(reader);
1352 if (FAILED(hr)) return hr;
1354 if (!reader_is_quote(reader))
1355 return WC_E_QUOTE;
1356 /* skip "'"|'"' */
1357 reader_skipn(reader, 1);
1359 if (reader_cmp(reader, L"yes") && reader_cmp(reader, L"no"))
1360 return WC_E_XMLDECL;
1362 start = reader_get_cur(reader);
1363 /* skip 'yes'|'no' */
1364 reader_skipn(reader, reader_cmp(reader, L"yes") ? 2 : 3);
1365 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1366 TRACE("standalone=%s\n", debug_strval(reader, &val));
1368 if (!reader_is_quote(reader))
1369 return WC_E_QUOTE;
1370 /* skip "'"|'"' */
1371 reader_skipn(reader, 1);
1373 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1376 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1377 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1379 struct reader_position position;
1380 HRESULT hr;
1382 if (reader_cmp(reader, L"<?xml "))
1383 return S_FALSE;
1385 reader_skipn(reader, 2);
1386 position = reader->position;
1387 reader_skipn(reader, 3);
1388 hr = reader_parse_versioninfo(reader);
1389 if (FAILED(hr))
1390 return hr;
1392 hr = reader_parse_encdecl(reader);
1393 if (FAILED(hr))
1394 return hr;
1396 hr = reader_parse_sddecl(reader);
1397 if (FAILED(hr))
1398 return hr;
1400 reader_skipspaces(reader);
1401 if (reader_cmp(reader, L"?>"))
1402 return WC_E_XMLDECL;
1404 /* skip '?>' */
1405 reader_skipn(reader, 2);
1407 reader->nodetype = XmlNodeType_XmlDeclaration;
1408 reader->empty_element.position = position;
1409 reader_set_strvalue(reader, StringValue_LocalName, &strval_xml);
1410 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml);
1412 return S_OK;
1415 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1416 static HRESULT reader_parse_comment(xmlreader *reader)
1418 WCHAR *ptr;
1419 UINT start;
1421 if (reader->resumestate == XmlReadResumeState_Comment)
1423 start = reader->resume[XmlReadResume_Body];
1424 ptr = reader_get_ptr(reader);
1426 else
1428 /* skip '<!--' */
1429 reader_skipn(reader, 4);
1430 reader_shrink(reader);
1431 ptr = reader_get_ptr(reader);
1432 start = reader_get_cur(reader);
1433 reader->nodetype = XmlNodeType_Comment;
1434 reader->resume[XmlReadResume_Body] = start;
1435 reader->resumestate = XmlReadResumeState_Comment;
1436 reader_set_strvalue(reader, StringValue_Value, NULL);
1439 /* will exit when there's no more data, it won't attempt to
1440 read more from stream */
1441 while (*ptr)
1443 if (ptr[0] == '-')
1445 if (ptr[1] == '-')
1447 if (ptr[2] == '>')
1449 strval value;
1451 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1452 TRACE("%s\n", debug_strval(reader, &value));
1454 /* skip rest of markup '->' */
1455 reader_skipn(reader, 3);
1457 reader_set_strvalue(reader, StringValue_Value, &value);
1458 reader->resume[XmlReadResume_Body] = 0;
1459 reader->resumestate = XmlReadResumeState_Initial;
1460 return S_OK;
1462 else
1463 return WC_E_COMMENT;
1467 reader_skipn(reader, 1);
1468 ptr++;
1471 return S_OK;
1474 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1475 static inline BOOL is_char(WCHAR ch)
1477 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1478 (ch >= 0x20 && ch <= 0xd7ff) ||
1479 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1480 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1481 (ch >= 0xe000 && ch <= 0xfffd);
1484 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1485 BOOL is_pubchar(WCHAR ch)
1487 return (ch == ' ') ||
1488 (ch >= 'a' && ch <= 'z') ||
1489 (ch >= 'A' && ch <= 'Z') ||
1490 (ch >= '0' && ch <= '9') ||
1491 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1492 (ch == '=') || (ch == '?') ||
1493 (ch == '@') || (ch == '!') ||
1494 (ch >= '#' && ch <= '%') || /* #$% */
1495 (ch == '_') || (ch == '\r') || (ch == '\n');
1498 BOOL is_namestartchar(WCHAR ch)
1500 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1501 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1502 (ch >= 0xc0 && ch <= 0xd6) ||
1503 (ch >= 0xd8 && ch <= 0xf6) ||
1504 (ch >= 0xf8 && ch <= 0x2ff) ||
1505 (ch >= 0x370 && ch <= 0x37d) ||
1506 (ch >= 0x37f && ch <= 0x1fff) ||
1507 (ch >= 0x200c && ch <= 0x200d) ||
1508 (ch >= 0x2070 && ch <= 0x218f) ||
1509 (ch >= 0x2c00 && ch <= 0x2fef) ||
1510 (ch >= 0x3001 && ch <= 0xd7ff) ||
1511 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1512 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1513 (ch >= 0xf900 && ch <= 0xfdcf) ||
1514 (ch >= 0xfdf0 && ch <= 0xfffd);
1517 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1518 BOOL is_ncnamechar(WCHAR ch)
1520 return (ch >= 'A' && ch <= 'Z') ||
1521 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1522 (ch == '-') || (ch == '.') ||
1523 (ch >= '0' && ch <= '9') ||
1524 (ch == 0xb7) ||
1525 (ch >= 0xc0 && ch <= 0xd6) ||
1526 (ch >= 0xd8 && ch <= 0xf6) ||
1527 (ch >= 0xf8 && ch <= 0x2ff) ||
1528 (ch >= 0x300 && ch <= 0x36f) ||
1529 (ch >= 0x370 && ch <= 0x37d) ||
1530 (ch >= 0x37f && ch <= 0x1fff) ||
1531 (ch >= 0x200c && ch <= 0x200d) ||
1532 (ch >= 0x203f && ch <= 0x2040) ||
1533 (ch >= 0x2070 && ch <= 0x218f) ||
1534 (ch >= 0x2c00 && ch <= 0x2fef) ||
1535 (ch >= 0x3001 && ch <= 0xd7ff) ||
1536 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1537 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1538 (ch >= 0xf900 && ch <= 0xfdcf) ||
1539 (ch >= 0xfdf0 && ch <= 0xfffd);
1542 BOOL is_namechar(WCHAR ch)
1544 return (ch == ':') || is_ncnamechar(ch);
1547 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1549 /* When we're on attribute always return attribute type, container node type is kept.
1550 Note that container is not necessarily an element, and attribute doesn't mean it's
1551 an attribute in XML spec terms. */
1552 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1555 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1556 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1557 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1558 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1559 [5] Name ::= NameStartChar (NameChar)* */
1560 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1562 WCHAR *ptr;
1563 UINT start;
1565 if (reader->resume[XmlReadResume_Name])
1567 start = reader->resume[XmlReadResume_Name];
1568 ptr = reader_get_ptr(reader);
1570 else
1572 ptr = reader_get_ptr(reader);
1573 start = reader_get_cur(reader);
1574 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1577 while (is_namechar(*ptr))
1579 reader_skipn(reader, 1);
1580 ptr = reader_get_ptr(reader);
1583 if (is_reader_pending(reader))
1585 reader->resume[XmlReadResume_Name] = start;
1586 return E_PENDING;
1588 else
1589 reader->resume[XmlReadResume_Name] = 0;
1591 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1592 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1594 return S_OK;
1597 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1598 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1600 static const strval xmlval = { (WCHAR *)L"xml", 3 };
1601 strval name;
1602 WCHAR *ptr;
1603 HRESULT hr;
1604 UINT i;
1606 hr = reader_parse_name(reader, &name);
1607 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1609 /* now that we got name check for illegal content */
1610 if (strval_eq(reader, &name, &xmlval))
1611 return WC_E_LEADINGXML;
1613 /* PITarget can't be a qualified name */
1614 ptr = reader_get_strptr(reader, &name);
1615 for (i = 0; i < name.len; i++)
1616 if (ptr[i] == ':')
1617 return i ? NC_E_NAMECOLON : WC_E_PI;
1619 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1620 *target = name;
1621 return S_OK;
1624 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1625 static HRESULT reader_parse_pi(xmlreader *reader)
1627 strval target;
1628 WCHAR *ptr;
1629 UINT start;
1630 HRESULT hr;
1632 switch (reader->resumestate)
1634 case XmlReadResumeState_Initial:
1635 /* skip '<?' */
1636 reader_skipn(reader, 2);
1637 reader_shrink(reader);
1638 reader->resumestate = XmlReadResumeState_PITarget;
1639 case XmlReadResumeState_PITarget:
1640 hr = reader_parse_pitarget(reader, &target);
1641 if (FAILED(hr)) return hr;
1642 reader_set_strvalue(reader, StringValue_LocalName, &target);
1643 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1644 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1645 reader->resumestate = XmlReadResumeState_PIBody;
1646 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1647 default:
1651 start = reader->resume[XmlReadResume_Body];
1652 ptr = reader_get_ptr(reader);
1653 while (*ptr)
1655 if (ptr[0] == '?')
1657 if (ptr[1] == '>')
1659 UINT cur = reader_get_cur(reader);
1660 strval value;
1662 /* strip all leading whitespace chars */
1663 while (start < cur)
1665 ptr = reader_get_ptr2(reader, start);
1666 if (!is_wchar_space(*ptr)) break;
1667 start++;
1670 reader_init_strvalue(start, cur-start, &value);
1672 /* skip '?>' */
1673 reader_skipn(reader, 2);
1674 TRACE("%s\n", debug_strval(reader, &value));
1675 reader->nodetype = XmlNodeType_ProcessingInstruction;
1676 reader->resumestate = XmlReadResumeState_Initial;
1677 reader->resume[XmlReadResume_Body] = 0;
1678 reader_set_strvalue(reader, StringValue_Value, &value);
1679 return S_OK;
1683 reader_skipn(reader, 1);
1684 ptr = reader_get_ptr(reader);
1687 return S_OK;
1690 /* This one is used to parse significant whitespace nodes, like in Misc production */
1691 static HRESULT reader_parse_whitespace(xmlreader *reader)
1693 switch (reader->resumestate)
1695 case XmlReadResumeState_Initial:
1696 reader_shrink(reader);
1697 reader->resumestate = XmlReadResumeState_Whitespace;
1698 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1699 reader->nodetype = XmlNodeType_Whitespace;
1700 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1701 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1702 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1703 /* fallthrough */
1704 case XmlReadResumeState_Whitespace:
1706 strval value;
1707 UINT start;
1709 reader_skipspaces(reader);
1710 if (is_reader_pending(reader)) return S_OK;
1712 start = reader->resume[XmlReadResume_Body];
1713 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1714 reader_set_strvalue(reader, StringValue_Value, &value);
1715 TRACE("%s\n", debug_strval(reader, &value));
1716 reader->resumestate = XmlReadResumeState_Initial;
1718 default:
1722 return S_OK;
1725 /* [27] Misc ::= Comment | PI | S */
1726 static HRESULT reader_parse_misc(xmlreader *reader)
1728 HRESULT hr = S_FALSE;
1730 if (reader->resumestate != XmlReadResumeState_Initial)
1732 hr = reader_more(reader);
1733 if (FAILED(hr)) return hr;
1735 /* finish current node */
1736 switch (reader->resumestate)
1738 case XmlReadResumeState_PITarget:
1739 case XmlReadResumeState_PIBody:
1740 return reader_parse_pi(reader);
1741 case XmlReadResumeState_Comment:
1742 return reader_parse_comment(reader);
1743 case XmlReadResumeState_Whitespace:
1744 return reader_parse_whitespace(reader);
1745 default:
1746 ERR("unknown resume state %d\n", reader->resumestate);
1750 while (1)
1752 const WCHAR *cur = reader_get_ptr(reader);
1754 if (is_wchar_space(*cur))
1755 hr = reader_parse_whitespace(reader);
1756 else if (!reader_cmp(reader, L"<!--"))
1757 hr = reader_parse_comment(reader);
1758 else if (!reader_cmp(reader, L"<?"))
1759 hr = reader_parse_pi(reader);
1760 else
1761 break;
1763 if (hr != S_FALSE) return hr;
1766 return hr;
1769 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1770 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1772 WCHAR *cur = reader_get_ptr(reader), quote;
1773 UINT start;
1775 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1777 quote = *cur;
1778 reader_skipn(reader, 1);
1780 cur = reader_get_ptr(reader);
1781 start = reader_get_cur(reader);
1782 while (is_char(*cur) && *cur != quote)
1784 reader_skipn(reader, 1);
1785 cur = reader_get_ptr(reader);
1787 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1788 if (*cur == quote) reader_skipn(reader, 1);
1790 TRACE("%s\n", debug_strval(reader, literal));
1791 return S_OK;
1794 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1795 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1796 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1798 WCHAR *cur = reader_get_ptr(reader), quote;
1799 UINT start;
1801 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1803 quote = *cur;
1804 reader_skipn(reader, 1);
1806 start = reader_get_cur(reader);
1807 cur = reader_get_ptr(reader);
1808 while (is_pubchar(*cur) && *cur != quote)
1810 reader_skipn(reader, 1);
1811 cur = reader_get_ptr(reader);
1813 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1814 if (*cur == quote) reader_skipn(reader, 1);
1816 TRACE("%s\n", debug_strval(reader, literal));
1817 return S_OK;
1820 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1821 static HRESULT reader_parse_externalid(xmlreader *reader)
1823 static WCHAR systemW[] = L"SYSTEM";
1824 static WCHAR publicW[] = L"PUBLIC";
1825 struct reader_position position = reader->position;
1826 strval name, sys;
1827 HRESULT hr;
1828 int cnt;
1830 if (!reader_cmp(reader, publicW)) {
1831 strval pub;
1833 /* public id */
1834 reader_skipn(reader, 6);
1835 cnt = reader_skipspaces(reader);
1836 if (!cnt) return WC_E_WHITESPACE;
1838 hr = reader_parse_pub_literal(reader, &pub);
1839 if (FAILED(hr)) return hr;
1841 reader_init_cstrvalue(publicW, lstrlenW(publicW), &name);
1842 hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0);
1843 if (FAILED(hr)) return hr;
1845 cnt = reader_skipspaces(reader);
1846 if (!cnt) return S_OK;
1848 /* optional system id */
1849 hr = reader_parse_sys_literal(reader, &sys);
1850 if (FAILED(hr)) return S_OK;
1852 reader_init_cstrvalue(systemW, lstrlenW(systemW), &name);
1853 hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1854 if (FAILED(hr)) return hr;
1856 return S_OK;
1857 } else if (!reader_cmp(reader, systemW)) {
1858 /* system id */
1859 reader_skipn(reader, 6);
1860 cnt = reader_skipspaces(reader);
1861 if (!cnt) return WC_E_WHITESPACE;
1863 hr = reader_parse_sys_literal(reader, &sys);
1864 if (FAILED(hr)) return hr;
1866 reader_init_cstrvalue(systemW, lstrlenW(systemW), &name);
1867 return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1870 return S_FALSE;
1873 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1874 static HRESULT reader_parse_dtd(xmlreader *reader)
1876 strval name;
1877 WCHAR *cur;
1878 HRESULT hr;
1880 if (reader_cmp(reader, L"<!DOCTYPE")) return S_FALSE;
1881 reader_shrink(reader);
1883 /* DTD processing is not allowed by default */
1884 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1886 reader_skipn(reader, 9);
1887 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1889 /* name */
1890 hr = reader_parse_name(reader, &name);
1891 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1893 reader_skipspaces(reader);
1895 hr = reader_parse_externalid(reader);
1896 if (FAILED(hr)) return hr;
1898 reader_skipspaces(reader);
1900 cur = reader_get_ptr(reader);
1901 if (*cur != '>')
1903 FIXME("internal subset parsing not implemented\n");
1904 return E_NOTIMPL;
1907 /* skip '>' */
1908 reader_skipn(reader, 1);
1910 reader->nodetype = XmlNodeType_DocumentType;
1911 reader_set_strvalue(reader, StringValue_LocalName, &name);
1912 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1914 return S_OK;
1917 /* [11 NS] LocalPart ::= NCName */
1918 static HRESULT reader_parse_local(xmlreader *reader, strval *local, BOOL check_for_separator)
1920 WCHAR *ptr;
1921 UINT start;
1923 if (reader->resume[XmlReadResume_Local])
1925 start = reader->resume[XmlReadResume_Local];
1926 ptr = reader_get_ptr(reader);
1928 else
1930 ptr = reader_get_ptr(reader);
1931 start = reader_get_cur(reader);
1934 while (is_ncnamechar(*ptr))
1936 reader_skipn(reader, 1);
1937 ptr = reader_get_ptr(reader);
1940 if (check_for_separator && *ptr == ':')
1941 return NC_E_QNAMECOLON;
1943 if (is_reader_pending(reader))
1945 reader->resume[XmlReadResume_Local] = start;
1946 return E_PENDING;
1948 else
1949 reader->resume[XmlReadResume_Local] = 0;
1951 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1953 return S_OK;
1956 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1957 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1958 [9 NS] UnprefixedName ::= LocalPart
1959 [10 NS] Prefix ::= NCName */
1960 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1962 WCHAR *ptr;
1963 UINT start;
1964 HRESULT hr;
1966 if (reader->resume[XmlReadResume_Name])
1968 start = reader->resume[XmlReadResume_Name];
1969 ptr = reader_get_ptr(reader);
1971 else
1973 ptr = reader_get_ptr(reader);
1974 start = reader_get_cur(reader);
1975 reader->resume[XmlReadResume_Name] = start;
1976 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1979 if (reader->resume[XmlReadResume_Local])
1981 hr = reader_parse_local(reader, local, FALSE);
1982 if (FAILED(hr)) return hr;
1984 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1985 local->start - reader->resume[XmlReadResume_Name] - 1,
1986 prefix);
1988 else
1990 /* skip prefix part */
1991 while (is_ncnamechar(*ptr))
1993 reader_skipn(reader, 1);
1994 ptr = reader_get_ptr(reader);
1997 if (is_reader_pending(reader)) return E_PENDING;
1999 /* got a qualified name */
2000 if (*ptr == ':')
2002 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
2004 /* skip ':' */
2005 reader_skipn(reader, 1);
2006 hr = reader_parse_local(reader, local, TRUE);
2007 if (FAILED(hr)) return hr;
2009 else
2011 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
2012 reader_init_strvalue(0, 0, prefix);
2016 if (prefix->len)
2017 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
2018 else
2019 TRACE("ncname %s\n", debug_strval(reader, local));
2021 reader_init_strvalue(prefix->len ? prefix->start : local->start,
2022 /* count ':' too */
2023 (prefix->len ? prefix->len + 1 : 0) + local->len,
2024 qname);
2026 reader->resume[XmlReadResume_Name] = 0;
2027 reader->resume[XmlReadResume_Local] = 0;
2029 return S_OK;
2032 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
2034 static const strval lt = { (WCHAR *)L"lt", 2 };
2035 static const strval gt = { (WCHAR *)L"gt", 2 };
2036 static const strval amp = { (WCHAR *)L"amp", 3 };
2037 static const strval apos = { (WCHAR *)L"apos", 4 };
2038 static const strval quot = { (WCHAR *)L"quot", 4 };
2039 WCHAR *str = reader_get_strptr(reader, name);
2041 switch (*str)
2043 case 'l':
2044 if (strval_eq(reader, name, &lt)) return '<';
2045 break;
2046 case 'g':
2047 if (strval_eq(reader, name, &gt)) return '>';
2048 break;
2049 case 'a':
2050 if (strval_eq(reader, name, &amp))
2051 return '&';
2052 else if (strval_eq(reader, name, &apos))
2053 return '\'';
2054 break;
2055 case 'q':
2056 if (strval_eq(reader, name, &quot)) return '\"';
2057 break;
2058 default:
2062 return 0;
2065 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2066 [67] Reference ::= EntityRef | CharRef
2067 [68] EntityRef ::= '&' Name ';' */
2068 static HRESULT reader_parse_reference(xmlreader *reader)
2070 encoded_buffer *buffer = &reader->input->buffer->utf16;
2071 WCHAR *start = reader_get_ptr(reader), *ptr;
2072 UINT cur = reader_get_cur(reader);
2073 WCHAR ch = 0;
2074 int len;
2076 /* skip '&' */
2077 reader_skipn(reader, 1);
2078 ptr = reader_get_ptr(reader);
2080 if (*ptr == '#')
2082 reader_skipn(reader, 1);
2083 ptr = reader_get_ptr(reader);
2085 /* hex char or decimal */
2086 if (*ptr == 'x')
2088 reader_skipn(reader, 1);
2089 ptr = reader_get_ptr(reader);
2091 while (*ptr != ';')
2093 if ((*ptr >= '0' && *ptr <= '9'))
2094 ch = ch*16 + *ptr - '0';
2095 else if ((*ptr >= 'a' && *ptr <= 'f'))
2096 ch = ch*16 + *ptr - 'a' + 10;
2097 else if ((*ptr >= 'A' && *ptr <= 'F'))
2098 ch = ch*16 + *ptr - 'A' + 10;
2099 else
2100 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2101 reader_skipn(reader, 1);
2102 ptr = reader_get_ptr(reader);
2105 else
2107 while (*ptr != ';')
2109 if ((*ptr >= '0' && *ptr <= '9'))
2111 ch = ch*10 + *ptr - '0';
2112 reader_skipn(reader, 1);
2113 ptr = reader_get_ptr(reader);
2115 else
2116 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2120 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2122 /* normalize */
2123 if (is_wchar_space(ch)) ch = ' ';
2125 ptr = reader_get_ptr(reader);
2126 start = reader_get_ptr2(reader, cur);
2127 len = buffer->written - ((char *)ptr - buffer->data);
2128 memmove(start + 1, ptr + 1, len);
2130 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2131 *(WCHAR*)(buffer->data + buffer->written) = 0;
2132 buffer->cur = cur + 1;
2134 *start = ch;
2136 else
2138 strval name;
2139 HRESULT hr;
2141 hr = reader_parse_name(reader, &name);
2142 if (FAILED(hr)) return hr;
2144 ptr = reader_get_ptr(reader);
2145 if (*ptr != ';') return WC_E_SEMICOLON;
2147 /* predefined entities resolve to a single character */
2148 ch = get_predefined_entity(reader, &name);
2149 if (ch)
2151 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2152 memmove(start+1, ptr+1, len);
2153 buffer->cur = cur + 1;
2154 buffer->written -= (ptr - start) * sizeof(WCHAR);
2155 *(WCHAR*)(buffer->data + buffer->written) = 0;
2157 *start = ch;
2159 else
2161 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2162 return WC_E_UNDECLAREDENTITY;
2167 return S_OK;
2170 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2171 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2173 WCHAR *ptr, quote;
2174 UINT start;
2176 ptr = reader_get_ptr(reader);
2178 /* skip opening quote */
2179 quote = *ptr;
2180 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2181 reader_skipn(reader, 1);
2183 ptr = reader_get_ptr(reader);
2184 start = reader_get_cur(reader);
2185 while (*ptr)
2187 if (*ptr == '<') return WC_E_LESSTHAN;
2189 if (*ptr == quote)
2191 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2192 /* skip closing quote */
2193 reader_skipn(reader, 1);
2194 return S_OK;
2197 if (*ptr == '&')
2199 HRESULT hr = reader_parse_reference(reader);
2200 if (FAILED(hr)) return hr;
2202 else
2204 /* replace all whitespace chars with ' ' */
2205 if (is_wchar_space(*ptr)) *ptr = ' ';
2206 reader_skipn(reader, 1);
2208 ptr = reader_get_ptr(reader);
2211 return WC_E_QUOTE;
2214 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2215 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2216 [3 NS] DefaultAttName ::= 'xmlns'
2217 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2218 static HRESULT reader_parse_attribute(xmlreader *reader)
2220 struct reader_position position = reader->position;
2221 strval prefix, local, qname, value;
2222 enum attribute_flags flags = 0;
2223 HRESULT hr;
2225 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2226 if (FAILED(hr)) return hr;
2228 if (strval_eq(reader, &prefix, &strval_xmlns))
2229 flags |= ATTRIBUTE_NS_DEFINITION;
2231 if (strval_eq(reader, &qname, &strval_xmlns))
2232 flags |= ATTRIBUTE_DEFAULT_NS_DEFINITION;
2234 hr = reader_parse_eq(reader);
2235 if (FAILED(hr)) return hr;
2237 hr = reader_parse_attvalue(reader, &value);
2238 if (FAILED(hr)) return hr;
2240 if (flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
2241 reader_push_ns(reader, &local, &value, !!(flags & ATTRIBUTE_DEFAULT_NS_DEFINITION));
2243 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2244 return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags);
2247 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2248 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2249 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
2251 struct reader_position position = reader->position;
2252 HRESULT hr;
2254 hr = reader_parse_qname(reader, prefix, local, qname);
2255 if (FAILED(hr)) return hr;
2257 for (;;)
2259 reader_skipspaces(reader);
2261 /* empty element */
2262 if ((reader->is_empty_element = !reader_cmp(reader, L"/>")))
2264 struct element *element = &reader->empty_element;
2266 /* skip '/>' */
2267 reader_skipn(reader, 2);
2269 reader_free_strvalued(reader, &element->qname);
2270 reader_free_strvalued(reader, &element->localname);
2272 element->prefix = *prefix;
2273 reader_strvaldup(reader, qname, &element->qname);
2274 reader_strvaldup(reader, local, &element->localname);
2275 element->position = position;
2276 reader_mark_ns_nodes(reader, element);
2277 return S_OK;
2280 /* got a start tag */
2281 if (!reader_cmp(reader, L">"))
2283 /* skip '>' */
2284 reader_skipn(reader, 1);
2285 return reader_push_element(reader, prefix, local, qname, &position);
2288 hr = reader_parse_attribute(reader);
2289 if (FAILED(hr)) return hr;
2292 return S_OK;
2295 /* [39] element ::= EmptyElemTag | STag content ETag */
2296 static HRESULT reader_parse_element(xmlreader *reader)
2298 HRESULT hr;
2300 switch (reader->resumestate)
2302 case XmlReadResumeState_Initial:
2303 /* check if we are really on element */
2304 if (reader_cmp(reader, L"<")) return S_FALSE;
2306 /* skip '<' */
2307 reader_skipn(reader, 1);
2309 reader_shrink(reader);
2310 reader->resumestate = XmlReadResumeState_STag;
2311 case XmlReadResumeState_STag:
2313 strval qname, prefix, local;
2315 /* this handles empty elements too */
2316 hr = reader_parse_stag(reader, &prefix, &local, &qname);
2317 if (FAILED(hr)) return hr;
2319 /* FIXME: need to check for defined namespace to reject invalid prefix */
2321 /* if we got empty element and stack is empty go straight to Misc */
2322 if (reader->is_empty_element && list_empty(&reader->elements))
2323 reader->instate = XmlReadInState_MiscEnd;
2324 else
2325 reader->instate = XmlReadInState_Content;
2327 reader->nodetype = XmlNodeType_Element;
2328 reader->resumestate = XmlReadResumeState_Initial;
2329 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2330 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2331 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
2332 break;
2334 default:
2335 hr = E_FAIL;
2338 return hr;
2341 /* [13 NS] ETag ::= '</' QName S? '>' */
2342 static HRESULT reader_parse_endtag(xmlreader *reader)
2344 struct reader_position position;
2345 strval prefix, local, qname;
2346 struct element *element;
2347 HRESULT hr;
2349 /* skip '</' */
2350 reader_skipn(reader, 2);
2352 position = reader->position;
2353 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2354 if (FAILED(hr)) return hr;
2356 reader_skipspaces(reader);
2358 if (reader_cmp(reader, L">")) return WC_E_GREATERTHAN;
2360 /* skip '>' */
2361 reader_skipn(reader, 1);
2363 /* Element stack should never be empty at this point, cause we shouldn't get to
2364 content parsing if it's empty. */
2365 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2366 if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH;
2368 /* update position stored for start tag, we won't be using it */
2369 element->position = position;
2371 reader->nodetype = XmlNodeType_EndElement;
2372 reader->is_empty_element = FALSE;
2373 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2375 return S_OK;
2378 /* [18] CDSect ::= CDStart CData CDEnd
2379 [19] CDStart ::= '<![CDATA['
2380 [20] CData ::= (Char* - (Char* ']]>' Char*))
2381 [21] CDEnd ::= ']]>' */
2382 static HRESULT reader_parse_cdata(xmlreader *reader)
2384 WCHAR *ptr;
2385 UINT start;
2387 if (reader->resumestate == XmlReadResumeState_CDATA)
2389 start = reader->resume[XmlReadResume_Body];
2390 ptr = reader_get_ptr(reader);
2392 else
2394 /* skip markup '<![CDATA[' */
2395 reader_skipn(reader, 9);
2396 reader_shrink(reader);
2397 ptr = reader_get_ptr(reader);
2398 start = reader_get_cur(reader);
2399 reader->nodetype = XmlNodeType_CDATA;
2400 reader->resume[XmlReadResume_Body] = start;
2401 reader->resumestate = XmlReadResumeState_CDATA;
2402 reader_set_strvalue(reader, StringValue_Value, NULL);
2405 while (*ptr)
2407 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2409 strval value;
2411 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2413 /* skip ']]>' */
2414 reader_skipn(reader, 3);
2415 TRACE("%s\n", debug_strval(reader, &value));
2417 reader_set_strvalue(reader, StringValue_Value, &value);
2418 reader->resume[XmlReadResume_Body] = 0;
2419 reader->resumestate = XmlReadResumeState_Initial;
2420 return S_OK;
2422 else
2424 reader_skipn(reader, 1);
2425 ptr = reader_get_ptr(reader);
2429 return S_OK;
2432 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2433 static HRESULT reader_parse_chardata(xmlreader *reader)
2435 struct reader_position position;
2436 WCHAR *ptr;
2437 UINT start;
2439 if (reader->resumestate == XmlReadResumeState_CharData)
2441 start = reader->resume[XmlReadResume_Body];
2442 ptr = reader_get_ptr(reader);
2444 else
2446 reader_shrink(reader);
2447 ptr = reader_get_ptr(reader);
2448 start = reader_get_cur(reader);
2449 /* There's no text */
2450 if (!*ptr || *ptr == '<') return S_OK;
2451 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2452 reader->resume[XmlReadResume_Body] = start;
2453 reader->resumestate = XmlReadResumeState_CharData;
2454 reader_set_strvalue(reader, StringValue_Value, NULL);
2457 position = reader->position;
2458 while (*ptr)
2460 /* CDATA closing sequence ']]>' is not allowed */
2461 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2462 return WC_E_CDSECTEND;
2464 /* Found next markup part */
2465 if (ptr[0] == '<')
2467 strval value;
2469 reader->empty_element.position = position;
2470 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2471 reader_set_strvalue(reader, StringValue_Value, &value);
2472 reader->resume[XmlReadResume_Body] = 0;
2473 reader->resumestate = XmlReadResumeState_Initial;
2474 return S_OK;
2477 /* this covers a case when text has leading whitespace chars */
2478 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2480 if (!reader_cmp(reader, L"&"))
2481 reader_parse_reference(reader);
2482 else
2483 reader_skipn(reader, 1);
2485 ptr = reader_get_ptr(reader);
2488 return S_OK;
2491 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2492 static HRESULT reader_parse_content(xmlreader *reader)
2494 if (reader->resumestate != XmlReadResumeState_Initial)
2496 switch (reader->resumestate)
2498 case XmlReadResumeState_CDATA:
2499 return reader_parse_cdata(reader);
2500 case XmlReadResumeState_Comment:
2501 return reader_parse_comment(reader);
2502 case XmlReadResumeState_PIBody:
2503 case XmlReadResumeState_PITarget:
2504 return reader_parse_pi(reader);
2505 case XmlReadResumeState_CharData:
2506 return reader_parse_chardata(reader);
2507 default:
2508 ERR("unknown resume state %d\n", reader->resumestate);
2512 reader_shrink(reader);
2514 /* handle end tag here, it indicates end of content as well */
2515 if (!reader_cmp(reader, L"</"))
2516 return reader_parse_endtag(reader);
2518 if (!reader_cmp(reader, L"<!--"))
2519 return reader_parse_comment(reader);
2521 if (!reader_cmp(reader, L"<?"))
2522 return reader_parse_pi(reader);
2524 if (!reader_cmp(reader, L"<![CDATA["))
2525 return reader_parse_cdata(reader);
2527 if (!reader_cmp(reader, L"<"))
2528 return reader_parse_element(reader);
2530 /* what's left must be CharData */
2531 return reader_parse_chardata(reader);
2534 static HRESULT reader_parse_nextnode(xmlreader *reader)
2536 XmlNodeType nodetype = reader_get_nodetype(reader);
2537 HRESULT hr;
2539 if (!is_reader_pending(reader))
2541 reader->chunk_read_off = 0;
2542 reader_clear_attrs(reader);
2545 /* When moving from EndElement or empty element, pop its own namespace definitions */
2546 switch (nodetype)
2548 case XmlNodeType_Attribute:
2549 reader_dec_depth(reader);
2550 /* fallthrough */
2551 case XmlNodeType_Element:
2552 if (reader->is_empty_element)
2553 reader_pop_ns_nodes(reader, &reader->empty_element);
2554 else if (FAILED(hr = reader_inc_depth(reader)))
2555 return hr;
2556 break;
2557 case XmlNodeType_EndElement:
2558 reader_pop_element(reader);
2559 reader_dec_depth(reader);
2560 break;
2561 default:
2565 for (;;)
2567 switch (reader->instate)
2569 /* if it's a first call for a new input we need to detect stream encoding */
2570 case XmlReadInState_Initial:
2572 xml_encoding enc;
2574 hr = readerinput_growraw(reader->input);
2575 if (FAILED(hr)) return hr;
2577 reader->position.line_number = 1;
2578 reader->position.line_position = 1;
2580 /* try to detect encoding by BOM or data and set input code page */
2581 hr = readerinput_detectencoding(reader->input, &enc);
2582 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2583 debugstr_w(xml_encoding_map[enc].name), hr);
2584 if (FAILED(hr)) return hr;
2586 /* always switch first time cause we have to put something in */
2587 readerinput_switchencoding(reader->input, enc);
2589 /* parse xml declaration */
2590 hr = reader_parse_xmldecl(reader);
2591 if (FAILED(hr)) return hr;
2593 readerinput_shrinkraw(reader->input, -1);
2594 reader->instate = XmlReadInState_Misc_DTD;
2595 if (hr == S_OK) return hr;
2597 break;
2598 case XmlReadInState_Misc_DTD:
2599 hr = reader_parse_misc(reader);
2600 if (FAILED(hr)) return hr;
2602 if (hr == S_FALSE)
2603 reader->instate = XmlReadInState_DTD;
2604 else
2605 return hr;
2606 break;
2607 case XmlReadInState_DTD:
2608 hr = reader_parse_dtd(reader);
2609 if (FAILED(hr)) return hr;
2611 if (hr == S_OK)
2613 reader->instate = XmlReadInState_DTD_Misc;
2614 return hr;
2616 else
2617 reader->instate = XmlReadInState_Element;
2618 break;
2619 case XmlReadInState_DTD_Misc:
2620 hr = reader_parse_misc(reader);
2621 if (FAILED(hr)) return hr;
2623 if (hr == S_FALSE)
2624 reader->instate = XmlReadInState_Element;
2625 else
2626 return hr;
2627 break;
2628 case XmlReadInState_Element:
2629 return reader_parse_element(reader);
2630 case XmlReadInState_Content:
2631 return reader_parse_content(reader);
2632 case XmlReadInState_MiscEnd:
2633 hr = reader_parse_misc(reader);
2634 if (hr != S_FALSE) return hr;
2636 if (*reader_get_ptr(reader))
2638 WARN("found garbage in the end of XML\n");
2639 return WC_E_SYNTAX;
2642 reader->instate = XmlReadInState_Eof;
2643 reader->state = XmlReadState_EndOfFile;
2644 reader->nodetype = XmlNodeType_None;
2645 return hr;
2646 case XmlReadInState_Eof:
2647 return S_FALSE;
2648 default:
2649 FIXME("internal state %d not handled\n", reader->instate);
2650 return E_NOTIMPL;
2654 return E_NOTIMPL;
2657 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2659 xmlreader *This = impl_from_IXmlReader(iface);
2661 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2663 if (IsEqualGUID(riid, &IID_IUnknown) ||
2664 IsEqualGUID(riid, &IID_IXmlReader))
2666 *ppvObject = iface;
2668 else
2670 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2671 *ppvObject = NULL;
2672 return E_NOINTERFACE;
2675 IXmlReader_AddRef(iface);
2677 return S_OK;
2680 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2682 xmlreader *This = impl_from_IXmlReader(iface);
2683 ULONG ref = InterlockedIncrement(&This->ref);
2684 TRACE("(%p)->(%d)\n", This, ref);
2685 return ref;
2688 static void reader_clear_ns(xmlreader *reader)
2690 struct ns *ns, *ns2;
2692 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2693 list_remove(&ns->entry);
2694 reader_free_strvalued(reader, &ns->prefix);
2695 reader_free_strvalued(reader, &ns->uri);
2696 reader_free(reader, ns);
2699 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2700 list_remove(&ns->entry);
2701 reader_free_strvalued(reader, &ns->uri);
2702 reader_free(reader, ns);
2706 static void reader_reset_parser(xmlreader *reader)
2708 reader->position.line_number = 0;
2709 reader->position.line_position = 0;
2711 reader_clear_elements(reader);
2712 reader_clear_attrs(reader);
2713 reader_clear_ns(reader);
2714 reader_free_strvalues(reader);
2716 reader->depth = 0;
2717 reader->nodetype = XmlNodeType_None;
2718 reader->resumestate = XmlReadResumeState_Initial;
2719 memset(reader->resume, 0, sizeof(reader->resume));
2720 reader->is_empty_element = FALSE;
2723 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2725 xmlreader *This = impl_from_IXmlReader(iface);
2726 LONG ref = InterlockedDecrement(&This->ref);
2728 TRACE("(%p)->(%d)\n", This, ref);
2730 if (ref == 0)
2732 IMalloc *imalloc = This->imalloc;
2733 reader_reset_parser(This);
2734 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2735 if (This->resolver) IXmlResolver_Release(This->resolver);
2736 if (This->mlang) IUnknown_Release(This->mlang);
2737 reader_free(This, This);
2738 if (imalloc) IMalloc_Release(imalloc);
2741 return ref;
2744 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2746 xmlreader *This = impl_from_IXmlReader(iface);
2747 IXmlReaderInput *readerinput;
2748 HRESULT hr;
2750 TRACE("(%p)->(%p)\n", This, input);
2752 if (This->input)
2754 readerinput_release_stream(This->input);
2755 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2756 This->input = NULL;
2759 reader_reset_parser(This);
2761 /* just reset current input */
2762 if (!input)
2764 This->state = XmlReadState_Initial;
2765 return S_OK;
2768 /* now try IXmlReaderInput, ISequentialStream, IStream */
2769 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2770 if (hr == S_OK)
2772 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2773 This->input = impl_from_IXmlReaderInput(readerinput);
2774 else
2776 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2777 readerinput, readerinput->lpVtbl);
2778 IUnknown_Release(readerinput);
2779 return E_FAIL;
2784 if (hr != S_OK || !readerinput)
2786 /* create IXmlReaderInput basing on supplied interface */
2787 hr = CreateXmlReaderInputWithEncodingName(input,
2788 This->imalloc, NULL, FALSE, NULL, &readerinput);
2789 if (hr != S_OK) return hr;
2790 This->input = impl_from_IXmlReaderInput(readerinput);
2793 /* set stream for supplied IXmlReaderInput */
2794 hr = readerinput_query_for_stream(This->input);
2795 if (hr == S_OK)
2797 This->state = XmlReadState_Initial;
2798 This->instate = XmlReadInState_Initial;
2800 return hr;
2803 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2805 xmlreader *This = impl_from_IXmlReader(iface);
2807 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2809 if (!value) return E_INVALIDARG;
2811 switch (property)
2813 case XmlReaderProperty_MultiLanguage:
2814 *value = (LONG_PTR)This->mlang;
2815 if (This->mlang)
2816 IUnknown_AddRef(This->mlang);
2817 break;
2818 case XmlReaderProperty_XmlResolver:
2819 *value = (LONG_PTR)This->resolver;
2820 if (This->resolver)
2821 IXmlResolver_AddRef(This->resolver);
2822 break;
2823 case XmlReaderProperty_DtdProcessing:
2824 *value = This->dtdmode;
2825 break;
2826 case XmlReaderProperty_ReadState:
2827 *value = This->state;
2828 break;
2829 case XmlReaderProperty_MaxElementDepth:
2830 *value = This->max_depth;
2831 break;
2832 default:
2833 FIXME("Unimplemented property (%u)\n", property);
2834 return E_NOTIMPL;
2837 return S_OK;
2840 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2842 xmlreader *This = impl_from_IXmlReader(iface);
2844 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2846 switch (property)
2848 case XmlReaderProperty_MultiLanguage:
2849 if (This->mlang)
2850 IUnknown_Release(This->mlang);
2851 This->mlang = (IUnknown*)value;
2852 if (This->mlang)
2853 IUnknown_AddRef(This->mlang);
2854 if (This->mlang)
2855 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2856 break;
2857 case XmlReaderProperty_XmlResolver:
2858 if (This->resolver)
2859 IXmlResolver_Release(This->resolver);
2860 This->resolver = (IXmlResolver*)value;
2861 if (This->resolver)
2862 IXmlResolver_AddRef(This->resolver);
2863 break;
2864 case XmlReaderProperty_DtdProcessing:
2865 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2866 This->dtdmode = value;
2867 break;
2868 case XmlReaderProperty_MaxElementDepth:
2869 This->max_depth = value;
2870 break;
2871 default:
2872 FIXME("Unimplemented property (%u)\n", property);
2873 return E_NOTIMPL;
2876 return S_OK;
2879 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2881 xmlreader *This = impl_from_IXmlReader(iface);
2882 XmlNodeType oldtype = This->nodetype;
2883 XmlNodeType type;
2884 HRESULT hr;
2886 TRACE("(%p)->(%p)\n", This, nodetype);
2888 if (!nodetype)
2889 nodetype = &type;
2891 switch (This->state)
2893 case XmlReadState_Closed:
2894 hr = S_FALSE;
2895 break;
2896 case XmlReadState_Error:
2897 hr = This->error;
2898 break;
2899 default:
2900 hr = reader_parse_nextnode(This);
2901 if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype)
2902 This->state = XmlReadState_Interactive;
2904 if (FAILED(hr))
2906 This->state = XmlReadState_Error;
2907 This->nodetype = XmlNodeType_None;
2908 This->depth = 0;
2909 This->error = hr;
2913 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2914 *nodetype = This->nodetype;
2916 return hr;
2919 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2921 xmlreader *This = impl_from_IXmlReader(iface);
2923 TRACE("(%p)->(%p)\n", This, node_type);
2925 if (!node_type)
2926 return E_INVALIDARG;
2928 *node_type = reader_get_nodetype(This);
2929 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2932 static void reader_set_current_attribute(xmlreader *reader, struct attribute *attr)
2934 reader->attr = attr;
2935 reader->chunk_read_off = 0;
2936 reader_set_strvalue(reader, StringValue_Prefix, &attr->prefix);
2937 reader_set_strvalue(reader, StringValue_QualifiedName, &attr->qname);
2938 reader_set_strvalue(reader, StringValue_Value, &attr->value);
2941 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2943 if (!reader->attr_count)
2944 return S_FALSE;
2946 if (!reader->attr)
2947 reader_inc_depth(reader);
2949 reader_set_current_attribute(reader, LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry));
2951 return S_OK;
2954 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2956 xmlreader *This = impl_from_IXmlReader(iface);
2958 TRACE("(%p)\n", This);
2960 return reader_move_to_first_attribute(This);
2963 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2965 xmlreader *This = impl_from_IXmlReader(iface);
2966 const struct list *next;
2968 TRACE("(%p)\n", This);
2970 if (!This->attr_count) return S_FALSE;
2972 if (!This->attr)
2973 return reader_move_to_first_attribute(This);
2975 next = list_next(&This->attrs, &This->attr->entry);
2976 if (next)
2977 reader_set_current_attribute(This, LIST_ENTRY(next, struct attribute, entry));
2979 return next ? S_OK : S_FALSE;
2982 static void reader_get_attribute_ns_uri(xmlreader *reader, struct attribute *attr, const WCHAR **uri, UINT *len)
2984 static const WCHAR xmlns_uriW[] = L"http://www.w3.org/2000/xmlns/";
2985 static const WCHAR xml_uriW[] = L"http://www.w3.org/XML/1998/namespace";
2987 /* Check for reserved prefixes first */
2988 if ((strval_eq(reader, &attr->prefix, &strval_empty) && strval_eq(reader, &attr->localname, &strval_xmlns)) ||
2989 strval_eq(reader, &attr->prefix, &strval_xmlns))
2991 *uri = xmlns_uriW;
2992 *len = ARRAY_SIZE(xmlns_uriW) - 1;
2994 else if (strval_eq(reader, &attr->prefix, &strval_xml))
2996 *uri = xml_uriW;
2997 *len = ARRAY_SIZE(xml_uriW) - 1;
2999 else
3001 *uri = NULL;
3002 *len = 0;
3005 if (!*uri)
3007 struct ns *ns;
3009 if ((ns = reader_lookup_ns(reader, &attr->prefix)))
3011 *uri = ns->uri.str;
3012 *len = ns->uri.len;
3014 else
3016 *uri = emptyW;
3017 *len = 0;
3022 static void reader_get_attribute_local_name(xmlreader *reader, struct attribute *attr, const WCHAR **name, UINT *len)
3024 if (attr->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3026 *name = xmlnsW;
3027 *len = 5;
3029 else if (attr->flags & ATTRIBUTE_NS_DEFINITION)
3031 const struct ns *ns = reader_lookup_ns(reader, &attr->localname);
3032 *name = ns->prefix.str;
3033 *len = ns->prefix.len;
3035 else
3037 *name = attr->localname.str;
3038 *len = attr->localname.len;
3042 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
3043 const WCHAR *local_name, const WCHAR *namespace_uri)
3045 xmlreader *This = impl_from_IXmlReader(iface);
3046 UINT target_name_len, target_uri_len;
3047 struct attribute *attr;
3049 TRACE("(%p)->(%s %s)\n", This, debugstr_w(local_name), debugstr_w(namespace_uri));
3051 if (!local_name)
3052 return E_INVALIDARG;
3054 if (!This->attr_count)
3055 return S_FALSE;
3057 if (!namespace_uri)
3058 namespace_uri = emptyW;
3060 target_name_len = lstrlenW(local_name);
3061 target_uri_len = lstrlenW(namespace_uri);
3063 LIST_FOR_EACH_ENTRY(attr, &This->attrs, struct attribute, entry)
3065 UINT name_len, uri_len;
3066 const WCHAR *name, *uri;
3068 reader_get_attribute_local_name(This, attr, &name, &name_len);
3069 reader_get_attribute_ns_uri(This, attr, &uri, &uri_len);
3071 if (name_len == target_name_len && uri_len == target_uri_len &&
3072 !wcscmp(name, local_name) && !wcscmp(uri, namespace_uri))
3074 reader_set_current_attribute(This, attr);
3075 return S_OK;
3079 return S_FALSE;
3082 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
3084 xmlreader *This = impl_from_IXmlReader(iface);
3086 TRACE("(%p)\n", This);
3088 if (!This->attr_count) return S_FALSE;
3090 if (This->attr)
3091 reader_dec_depth(This);
3093 This->attr = NULL;
3095 /* FIXME: support other node types with 'attributes' like DTD */
3096 if (This->is_empty_element) {
3097 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
3098 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
3100 else {
3101 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3102 if (element) {
3103 reader_set_strvalue(This, StringValue_Prefix, &element->prefix);
3104 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
3107 This->chunk_read_off = 0;
3108 reader_set_strvalue(This, StringValue_Value, &strval_empty);
3110 return S_OK;
3113 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3115 xmlreader *This = impl_from_IXmlReader(iface);
3116 struct attribute *attribute = This->attr;
3117 struct element *element;
3118 UINT length;
3120 TRACE("(%p)->(%p %p)\n", This, name, len);
3122 if (!len)
3123 len = &length;
3125 switch (reader_get_nodetype(This))
3127 case XmlNodeType_Text:
3128 case XmlNodeType_CDATA:
3129 case XmlNodeType_Comment:
3130 case XmlNodeType_Whitespace:
3131 *name = emptyW;
3132 *len = 0;
3133 break;
3134 case XmlNodeType_Element:
3135 case XmlNodeType_EndElement:
3136 element = reader_get_element(This);
3137 if (element->prefix.len)
3139 *name = element->qname.str;
3140 *len = element->qname.len;
3142 else
3144 *name = element->localname.str;
3145 *len = element->localname.len;
3147 break;
3148 case XmlNodeType_Attribute:
3149 if (attribute->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3151 *name = xmlnsW;
3152 *len = 5;
3153 } else if (attribute->prefix.len)
3155 *name = This->strvalues[StringValue_QualifiedName].str;
3156 *len = This->strvalues[StringValue_QualifiedName].len;
3158 else
3160 *name = attribute->localname.str;
3161 *len = attribute->localname.len;
3163 break;
3164 default:
3165 *name = This->strvalues[StringValue_QualifiedName].str;
3166 *len = This->strvalues[StringValue_QualifiedName].len;
3167 break;
3170 return S_OK;
3173 static struct ns *reader_lookup_nsdef(xmlreader *reader)
3175 if (list_empty(&reader->nsdef))
3176 return NULL;
3178 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
3181 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
3183 xmlreader *This = impl_from_IXmlReader(iface);
3184 const strval *prefix = &This->strvalues[StringValue_Prefix];
3185 XmlNodeType nodetype;
3186 struct ns *ns;
3187 UINT length;
3189 TRACE("(%p %p %p)\n", iface, uri, len);
3191 if (!len)
3192 len = &length;
3194 switch ((nodetype = reader_get_nodetype(This)))
3196 case XmlNodeType_Attribute:
3197 reader_get_attribute_ns_uri(This, This->attr, uri, len);
3198 break;
3199 case XmlNodeType_Element:
3200 case XmlNodeType_EndElement:
3202 ns = reader_lookup_ns(This, prefix);
3204 /* pick top default ns if any */
3205 if (!ns)
3206 ns = reader_lookup_nsdef(This);
3208 if (ns) {
3209 *uri = ns->uri.str;
3210 *len = ns->uri.len;
3212 else {
3213 *uri = emptyW;
3214 *len = 0;
3217 break;
3218 case XmlNodeType_Text:
3219 case XmlNodeType_CDATA:
3220 case XmlNodeType_ProcessingInstruction:
3221 case XmlNodeType_Comment:
3222 case XmlNodeType_Whitespace:
3223 case XmlNodeType_XmlDeclaration:
3224 *uri = emptyW;
3225 *len = 0;
3226 break;
3227 default:
3228 FIXME("Unhandled node type %d\n", nodetype);
3229 *uri = NULL;
3230 *len = 0;
3231 return E_NOTIMPL;
3234 return S_OK;
3237 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3239 xmlreader *This = impl_from_IXmlReader(iface);
3240 struct element *element;
3241 UINT length;
3243 TRACE("(%p)->(%p %p)\n", This, name, len);
3245 if (!len)
3246 len = &length;
3248 switch (reader_get_nodetype(This))
3250 case XmlNodeType_Text:
3251 case XmlNodeType_CDATA:
3252 case XmlNodeType_Comment:
3253 case XmlNodeType_Whitespace:
3254 *name = emptyW;
3255 *len = 0;
3256 break;
3257 case XmlNodeType_Element:
3258 case XmlNodeType_EndElement:
3259 element = reader_get_element(This);
3260 *name = element->localname.str;
3261 *len = element->localname.len;
3262 break;
3263 case XmlNodeType_Attribute:
3264 reader_get_attribute_local_name(This, This->attr, name, len);
3265 break;
3266 default:
3267 *name = This->strvalues[StringValue_LocalName].str;
3268 *len = This->strvalues[StringValue_LocalName].len;
3269 break;
3272 return S_OK;
3275 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, const WCHAR **ret, UINT *len)
3277 xmlreader *This = impl_from_IXmlReader(iface);
3278 XmlNodeType nodetype;
3279 UINT length;
3281 TRACE("(%p)->(%p %p)\n", This, ret, len);
3283 if (!len)
3284 len = &length;
3286 *ret = emptyW;
3287 *len = 0;
3289 switch ((nodetype = reader_get_nodetype(This)))
3291 case XmlNodeType_Element:
3292 case XmlNodeType_EndElement:
3293 case XmlNodeType_Attribute:
3295 const strval *prefix = &This->strvalues[StringValue_Prefix];
3296 struct ns *ns;
3298 if (strval_eq(This, prefix, &strval_xml))
3300 *ret = xmlW;
3301 *len = 3;
3303 else if (strval_eq(This, prefix, &strval_xmlns))
3305 *ret = xmlnsW;
3306 *len = 5;
3308 else if ((ns = reader_lookup_ns(This, prefix)))
3310 *ret = ns->prefix.str;
3311 *len = ns->prefix.len;
3314 break;
3316 default:
3320 return S_OK;
3323 static const strval *reader_get_value(xmlreader *reader, BOOL ensure_allocated)
3325 strval *val;
3327 switch (reader_get_nodetype(reader))
3329 case XmlNodeType_XmlDeclaration:
3330 case XmlNodeType_EndElement:
3331 case XmlNodeType_None:
3332 return &strval_empty;
3333 case XmlNodeType_Attribute:
3334 /* For namespace definition attributes return values from namespace list */
3335 if (reader->attr->flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
3337 struct ns *ns;
3339 if (!(ns = reader_lookup_ns(reader, &reader->attr->localname)))
3340 ns = reader_lookup_nsdef(reader);
3342 return &ns->uri;
3344 return &reader->attr->value;
3345 default:
3346 break;
3349 val = &reader->strvalues[StringValue_Value];
3350 if (!val->str && ensure_allocated)
3352 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3353 if (!ptr) return NULL;
3354 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3355 ptr[val->len] = 0;
3356 val->str = ptr;
3359 return val;
3362 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3364 xmlreader *reader = impl_from_IXmlReader(iface);
3365 const strval *val = &reader->strvalues[StringValue_Value];
3366 UINT off;
3368 TRACE("(%p)->(%p %p)\n", reader, value, len);
3370 *value = NULL;
3372 if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader))
3374 XmlNodeType type;
3375 HRESULT hr;
3377 hr = IXmlReader_Read(iface, &type);
3378 if (FAILED(hr)) return hr;
3380 /* return if still pending, partially read values are not reported */
3381 if (is_reader_pending(reader)) return E_PENDING;
3384 val = reader_get_value(reader, TRUE);
3385 if (!val)
3386 return E_OUTOFMEMORY;
3388 off = abs(reader->chunk_read_off);
3389 assert(off <= val->len);
3390 *value = val->str + off;
3391 if (len) *len = val->len - off;
3392 reader->chunk_read_off = -off;
3393 return S_OK;
3396 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3398 xmlreader *reader = impl_from_IXmlReader(iface);
3399 const strval *val;
3400 UINT len = 0;
3402 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3404 val = reader_get_value(reader, FALSE);
3406 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3407 if (reader->chunk_read_off >= 0)
3409 assert(reader->chunk_read_off <= val->len);
3410 len = min(val->len - reader->chunk_read_off, chunk_size);
3412 if (read) *read = len;
3414 if (len)
3416 memcpy(buffer, reader_get_strptr(reader, val) + reader->chunk_read_off, len*sizeof(WCHAR));
3417 reader->chunk_read_off += len;
3420 return len || !chunk_size ? S_OK : S_FALSE;
3423 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3424 LPCWSTR *baseUri,
3425 UINT *baseUri_length)
3427 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3428 return E_NOTIMPL;
3431 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3433 FIXME("(%p): stub\n", iface);
3434 return FALSE;
3437 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3439 xmlreader *This = impl_from_IXmlReader(iface);
3440 TRACE("(%p)\n", This);
3441 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3442 when current node is start tag of an element */
3443 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3446 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *line_number)
3448 xmlreader *This = impl_from_IXmlReader(iface);
3449 const struct element *element;
3451 TRACE("(%p %p)\n", This, line_number);
3453 if (!line_number)
3454 return E_INVALIDARG;
3456 switch (reader_get_nodetype(This))
3458 case XmlNodeType_Element:
3459 case XmlNodeType_EndElement:
3460 element = reader_get_element(This);
3461 *line_number = element->position.line_number;
3462 break;
3463 case XmlNodeType_Attribute:
3464 *line_number = This->attr->position.line_number;
3465 break;
3466 case XmlNodeType_Whitespace:
3467 case XmlNodeType_XmlDeclaration:
3468 *line_number = This->empty_element.position.line_number;
3469 break;
3470 default:
3471 *line_number = This->position.line_number;
3472 break;
3475 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3478 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *line_position)
3480 xmlreader *This = impl_from_IXmlReader(iface);
3481 const struct element *element;
3483 TRACE("(%p %p)\n", This, line_position);
3485 if (!line_position)
3486 return E_INVALIDARG;
3488 switch (reader_get_nodetype(This))
3490 case XmlNodeType_Element:
3491 case XmlNodeType_EndElement:
3492 element = reader_get_element(This);
3493 *line_position = element->position.line_position;
3494 break;
3495 case XmlNodeType_Attribute:
3496 *line_position = This->attr->position.line_position;
3497 break;
3498 case XmlNodeType_Whitespace:
3499 case XmlNodeType_XmlDeclaration:
3500 *line_position = This->empty_element.position.line_position;
3501 break;
3502 default:
3503 *line_position = This->position.line_position;
3504 break;
3507 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3510 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3512 xmlreader *This = impl_from_IXmlReader(iface);
3514 TRACE("(%p)->(%p)\n", This, count);
3516 if (!count) return E_INVALIDARG;
3518 *count = This->attr_count;
3519 return S_OK;
3522 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3524 xmlreader *This = impl_from_IXmlReader(iface);
3525 TRACE("(%p)->(%p)\n", This, depth);
3526 *depth = This->depth;
3527 return S_OK;
3530 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3532 xmlreader *This = impl_from_IXmlReader(iface);
3533 TRACE("(%p)\n", iface);
3534 return This->state == XmlReadState_EndOfFile;
3537 static const struct IXmlReaderVtbl xmlreader_vtbl =
3539 xmlreader_QueryInterface,
3540 xmlreader_AddRef,
3541 xmlreader_Release,
3542 xmlreader_SetInput,
3543 xmlreader_GetProperty,
3544 xmlreader_SetProperty,
3545 xmlreader_Read,
3546 xmlreader_GetNodeType,
3547 xmlreader_MoveToFirstAttribute,
3548 xmlreader_MoveToNextAttribute,
3549 xmlreader_MoveToAttributeByName,
3550 xmlreader_MoveToElement,
3551 xmlreader_GetQualifiedName,
3552 xmlreader_GetNamespaceUri,
3553 xmlreader_GetLocalName,
3554 xmlreader_GetPrefix,
3555 xmlreader_GetValue,
3556 xmlreader_ReadValueChunk,
3557 xmlreader_GetBaseUri,
3558 xmlreader_IsDefault,
3559 xmlreader_IsEmptyElement,
3560 xmlreader_GetLineNumber,
3561 xmlreader_GetLinePosition,
3562 xmlreader_GetAttributeCount,
3563 xmlreader_GetDepth,
3564 xmlreader_IsEOF
3567 /** IXmlReaderInput **/
3568 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3570 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3572 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3574 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3575 IsEqualGUID(riid, &IID_IUnknown))
3577 *ppvObject = iface;
3579 else
3581 WARN("interface %s not implemented\n", debugstr_guid(riid));
3582 *ppvObject = NULL;
3583 return E_NOINTERFACE;
3586 IUnknown_AddRef(iface);
3588 return S_OK;
3591 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3593 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3594 ULONG ref = InterlockedIncrement(&This->ref);
3595 TRACE("(%p)->(%d)\n", This, ref);
3596 return ref;
3599 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3601 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3602 LONG ref = InterlockedDecrement(&This->ref);
3604 TRACE("(%p)->(%d)\n", This, ref);
3606 if (ref == 0)
3608 IMalloc *imalloc = This->imalloc;
3609 if (This->input) IUnknown_Release(This->input);
3610 if (This->stream) ISequentialStream_Release(This->stream);
3611 if (This->buffer) free_input_buffer(This->buffer);
3612 readerinput_free(This, This->baseuri);
3613 readerinput_free(This, This);
3614 if (imalloc) IMalloc_Release(imalloc);
3617 return ref;
3620 static const struct IUnknownVtbl xmlreaderinputvtbl =
3622 xmlreaderinput_QueryInterface,
3623 xmlreaderinput_AddRef,
3624 xmlreaderinput_Release
3627 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3629 xmlreader *reader;
3630 HRESULT hr;
3631 int i;
3633 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3635 if (imalloc)
3636 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3637 else
3638 reader = heap_alloc(sizeof(*reader));
3639 if (!reader)
3640 return E_OUTOFMEMORY;
3642 memset(reader, 0, sizeof(*reader));
3643 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3644 reader->ref = 1;
3645 reader->state = XmlReadState_Closed;
3646 reader->instate = XmlReadInState_Initial;
3647 reader->resumestate = XmlReadResumeState_Initial;
3648 reader->dtdmode = DtdProcessing_Prohibit;
3649 reader->imalloc = imalloc;
3650 if (imalloc) IMalloc_AddRef(imalloc);
3651 reader->nodetype = XmlNodeType_None;
3652 list_init(&reader->attrs);
3653 list_init(&reader->nsdef);
3654 list_init(&reader->ns);
3655 list_init(&reader->elements);
3656 reader->max_depth = 256;
3658 reader->chunk_read_off = 0;
3659 for (i = 0; i < StringValue_Last; i++)
3660 reader->strvalues[i] = strval_empty;
3662 hr = IXmlReader_QueryInterface(&reader->IXmlReader_iface, riid, obj);
3663 IXmlReader_Release(&reader->IXmlReader_iface);
3665 TRACE("returning iface %p, hr %#x\n", *obj, hr);
3667 return hr;
3670 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3671 IMalloc *imalloc,
3672 LPCWSTR encoding,
3673 BOOL hint,
3674 LPCWSTR base_uri,
3675 IXmlReaderInput **ppInput)
3677 xmlreaderinput *readerinput;
3678 HRESULT hr;
3680 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3681 hint, wine_dbgstr_w(base_uri), ppInput);
3683 if (!stream || !ppInput) return E_INVALIDARG;
3685 if (imalloc)
3686 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3687 else
3688 readerinput = heap_alloc(sizeof(*readerinput));
3689 if(!readerinput) return E_OUTOFMEMORY;
3691 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3692 readerinput->ref = 1;
3693 readerinput->imalloc = imalloc;
3694 readerinput->stream = NULL;
3695 if (imalloc) IMalloc_AddRef(imalloc);
3696 readerinput->encoding = parse_encoding_name(encoding, -1);
3697 readerinput->hint = hint;
3698 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3699 readerinput->pending = 0;
3701 hr = alloc_input_buffer(readerinput);
3702 if (hr != S_OK)
3704 readerinput_free(readerinput, readerinput->baseuri);
3705 readerinput_free(readerinput, readerinput);
3706 if (imalloc) IMalloc_Release(imalloc);
3707 return hr;
3709 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3711 *ppInput = &readerinput->IXmlReaderInput_iface;
3713 TRACE("returning iface %p\n", *ppInput);
3715 return S_OK;