winepulse: Remove AudioSessionManager.
[wine.git] / dlls / xmllite / reader.c
blob1e146493aa636d732f8bc261e17e37c3a7384b8a
1 /*
2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #define COBJMACROS
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "initguid.h"
29 #include "objbase.h"
30 #include "xmllite.h"
31 #include "xmllite_private.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 typedef enum
43 XmlReadInState_Initial,
44 XmlReadInState_XmlDecl,
45 XmlReadInState_Misc_DTD,
46 XmlReadInState_DTD,
47 XmlReadInState_DTD_Misc,
48 XmlReadInState_Element,
49 XmlReadInState_Content,
50 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
51 XmlReadInState_Eof
52 } XmlReaderInternalState;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
56 typedef enum
58 XmlReadResumeState_Initial,
59 XmlReadResumeState_PITarget,
60 XmlReadResumeState_PIBody,
61 XmlReadResumeState_CDATA,
62 XmlReadResumeState_Comment,
63 XmlReadResumeState_STag,
64 XmlReadResumeState_CharData,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState;
68 /* saved pointer index to resume from particular input position */
69 typedef enum
71 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local, /* local for QName */
73 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
74 XmlReadResume_Last
75 } XmlReaderResume;
77 typedef enum
79 StringValue_LocalName,
80 StringValue_Prefix,
81 StringValue_QualifiedName,
82 StringValue_Value,
83 StringValue_Last
84 } XmlReaderStringValue;
86 BOOL is_namestartchar(WCHAR ch);
88 static const char *debugstr_nodetype(XmlNodeType nodetype)
90 static const char * const type_names[] =
92 "None",
93 "Element",
94 "Attribute",
95 "Text",
96 "CDATA",
97 "",
98 "",
99 "ProcessingInstruction",
100 "Comment",
102 "DocumentType",
105 "Whitespace",
107 "EndElement",
109 "XmlDeclaration"
112 if (nodetype > _XmlNodeType_Last)
113 return wine_dbg_sprintf("unknown type=%d", nodetype);
115 return type_names[nodetype];
118 static const char *debugstr_reader_prop(XmlReaderProperty prop)
120 static const char * const prop_names[] =
122 "MultiLanguage",
123 "ConformanceLevel",
124 "RandomAccess",
125 "XmlResolver",
126 "DtdProcessing",
127 "ReadState",
128 "MaxElementDepth",
129 "MaxEntityExpansion"
132 if (prop > _XmlReaderProperty_Last)
133 return wine_dbg_sprintf("unknown property=%d", prop);
135 return prop_names[prop];
138 struct xml_encoding_data
140 const WCHAR *name;
141 xml_encoding enc;
142 UINT cp;
145 static const struct xml_encoding_data xml_encoding_map[] =
147 { L"US-ASCII", XmlEncoding_USASCII, 20127 },
148 { L"UTF-16", XmlEncoding_UTF16, 1200 },
149 { L"UTF-8", XmlEncoding_UTF8, CP_UTF8 },
152 const WCHAR *get_encoding_name(xml_encoding encoding)
154 return xml_encoding_map[encoding].name;
157 xml_encoding get_encoding_from_codepage(UINT codepage)
159 int i;
160 for (i = 0; i < ARRAY_SIZE(xml_encoding_map); i++)
162 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
164 return XmlEncoding_Unknown;
167 typedef struct
169 char *data;
170 UINT cur;
171 unsigned int allocated;
172 unsigned int written;
173 BOOL prev_cr;
174 } encoded_buffer;
176 typedef struct input_buffer input_buffer;
178 typedef struct
180 IXmlReaderInput IXmlReaderInput_iface;
181 LONG ref;
182 /* reference passed on IXmlReaderInput creation, is kept when input is created */
183 IUnknown *input;
184 IMalloc *imalloc;
185 xml_encoding encoding;
186 BOOL hint;
187 WCHAR *baseuri;
188 /* stream reference set after SetInput() call from reader,
189 stored as sequential stream, cause currently
190 optimizations possible with IStream aren't implemented */
191 ISequentialStream *stream;
192 input_buffer *buffer;
193 unsigned int pending : 1;
194 } xmlreaderinput;
196 static const struct IUnknownVtbl xmlreaderinputvtbl;
198 /* Structure to hold parsed string of specific length.
200 Reader stores node value as 'start' pointer, on request
201 a null-terminated version of it is allocated.
203 To init a strval variable use reader_init_strval(),
204 to set strval as a reader value use reader_set_strval().
206 typedef struct
208 WCHAR *str; /* allocated null-terminated string */
209 UINT len; /* length in WCHARs, altered after ReadValueChunk */
210 UINT start; /* input position where value starts */
211 } strval;
213 static WCHAR emptyW[] = L"";
214 static WCHAR xmlW[] = L"xml";
215 static WCHAR xmlnsW[] = L"xmlns";
216 static const strval strval_empty = { emptyW, 0 };
217 static const strval strval_xml = { xmlW, 3 };
218 static const strval strval_xmlns = { xmlnsW, 5 };
220 struct reader_position
222 UINT line_number;
223 UINT line_position;
226 enum attribute_flags
228 ATTRIBUTE_NS_DEFINITION = 0x1,
229 ATTRIBUTE_DEFAULT_NS_DEFINITION = 0x2,
232 struct attribute
234 struct list entry;
235 strval prefix;
236 strval localname;
237 strval qname;
238 strval value;
239 struct reader_position position;
240 unsigned int flags;
243 struct element
245 struct list entry;
246 strval prefix;
247 strval localname;
248 strval qname;
249 struct reader_position position;
252 struct ns
254 struct list entry;
255 strval prefix;
256 strval uri;
257 struct element *element;
260 typedef struct
262 IXmlReader IXmlReader_iface;
263 LONG ref;
264 xmlreaderinput *input;
265 IMalloc *imalloc;
266 XmlReadState state;
267 HRESULT error; /* error set on XmlReadState_Error */
268 XmlReaderInternalState instate;
269 XmlReaderResumeState resumestate;
270 XmlNodeType nodetype;
271 DtdProcessing dtdmode;
272 IXmlResolver *resolver;
273 IUnknown *mlang;
274 struct reader_position position;
275 struct list attrs; /* attributes list for current node */
276 struct attribute *attr; /* current attribute */
277 UINT attr_count;
278 struct list nsdef;
279 struct list ns;
280 struct list elements;
281 int chunk_read_off;
282 strval strvalues[StringValue_Last];
283 UINT depth;
284 UINT max_depth;
285 BOOL is_empty_element;
286 struct element empty_element; /* used for empty elements without end tag <a />,
287 and to keep <?xml reader position */
288 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
289 } xmlreader;
291 struct input_buffer
293 encoded_buffer utf16;
294 encoded_buffer encoded;
295 UINT code_page;
296 xmlreaderinput *input;
299 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
301 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
304 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
306 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
309 /* reader memory allocation functions */
310 static inline void *reader_alloc(xmlreader *reader, size_t len)
312 return m_alloc(reader->imalloc, len);
315 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
317 void *ret = reader_alloc(reader, len);
318 if (ret)
319 memset(ret, 0, len);
320 return ret;
323 static inline void reader_free(xmlreader *reader, void *mem)
325 m_free(reader->imalloc, mem);
328 /* Just return pointer from offset, no attempt to read more. */
329 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
331 encoded_buffer *buffer = &reader->input->buffer->utf16;
332 return (WCHAR*)buffer->data + offset;
335 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
337 return v->str ? v->str : reader_get_ptr2(reader, v->start);
340 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
342 *dest = *src;
344 if (src->str != strval_empty.str)
346 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
347 if (!dest->str) return E_OUTOFMEMORY;
348 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
349 dest->str[dest->len] = 0;
350 dest->start = 0;
353 return S_OK;
356 /* reader input memory allocation functions */
357 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
359 return m_alloc(input->imalloc, len);
362 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
364 return m_realloc(input->imalloc, mem, len);
367 static inline void readerinput_free(xmlreaderinput *input, void *mem)
369 m_free(input->imalloc, mem);
372 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
374 LPWSTR ret = NULL;
376 if(str) {
377 DWORD size;
379 size = (lstrlenW(str)+1)*sizeof(WCHAR);
380 ret = readerinput_alloc(input, size);
381 if (ret) memcpy(ret, str, size);
384 return ret;
387 /* This one frees stored string value if needed */
388 static void reader_free_strvalued(xmlreader *reader, strval *v)
390 if (v->str != strval_empty.str)
392 reader_free(reader, v->str);
393 *v = strval_empty;
397 static void reader_clear_attrs(xmlreader *reader)
399 struct attribute *attr, *attr2;
400 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
402 reader_free_strvalued(reader, &attr->localname);
403 reader_free_strvalued(reader, &attr->value);
404 reader_free(reader, attr);
406 list_init(&reader->attrs);
407 reader->attr_count = 0;
408 reader->attr = NULL;
411 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
412 while we are on a node with attributes */
413 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname,
414 strval *value, const struct reader_position *position, unsigned int flags)
416 struct attribute *attr;
417 HRESULT hr;
419 attr = reader_alloc(reader, sizeof(*attr));
420 if (!attr) return E_OUTOFMEMORY;
422 hr = reader_strvaldup(reader, localname, &attr->localname);
423 if (hr == S_OK)
425 hr = reader_strvaldup(reader, value, &attr->value);
426 if (hr != S_OK)
427 reader_free_strvalued(reader, &attr->localname);
429 if (hr != S_OK)
431 reader_free(reader, attr);
432 return hr;
435 if (prefix)
436 attr->prefix = *prefix;
437 else
438 memset(&attr->prefix, 0, sizeof(attr->prefix));
439 attr->qname = qname ? *qname : *localname;
440 attr->position = *position;
441 attr->flags = flags;
442 list_add_tail(&reader->attrs, &attr->entry);
443 reader->attr_count++;
445 return S_OK;
448 /* Returns current element, doesn't check if reader is actually positioned on it. */
449 static struct element *reader_get_element(xmlreader *reader)
451 if (reader->is_empty_element)
452 return &reader->empty_element;
454 return LIST_ENTRY(list_head(&reader->elements), struct element, entry);
457 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
459 v->start = start;
460 v->len = len;
461 v->str = NULL;
464 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
466 return debugstr_wn(reader_get_strptr(reader, v), v->len);
469 /* used to initialize from constant string */
470 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
472 v->start = 0;
473 v->len = len;
474 v->str = str;
477 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
479 reader_free_strvalued(reader, &reader->strvalues[type]);
482 static void reader_free_strvalues(xmlreader *reader)
484 int type;
485 for (type = 0; type < StringValue_Last; type++)
486 reader_free_strvalue(reader, type);
489 /* This helper should only be used to test if strings are the same,
490 it doesn't try to sort. */
491 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
493 if (str1->len != str2->len) return 0;
494 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
497 static void reader_clear_elements(xmlreader *reader)
499 struct element *elem, *elem2;
500 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
502 reader_free_strvalued(reader, &elem->prefix);
503 reader_free_strvalued(reader, &elem->localname);
504 reader_free_strvalued(reader, &elem->qname);
505 reader_free(reader, elem);
507 list_init(&reader->elements);
508 reader_free_strvalued(reader, &reader->empty_element.localname);
509 reader_free_strvalued(reader, &reader->empty_element.qname);
510 reader->is_empty_element = FALSE;
513 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
515 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
516 struct ns *ns;
518 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
519 if (strval_eq(reader, prefix, &ns->prefix))
520 return ns;
523 return NULL;
526 static HRESULT reader_inc_depth(xmlreader *reader)
528 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
531 static void reader_dec_depth(xmlreader *reader)
533 if (reader->depth)
534 reader->depth--;
537 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
539 struct ns *ns;
540 HRESULT hr;
542 ns = reader_alloc(reader, sizeof(*ns));
543 if (!ns) return E_OUTOFMEMORY;
545 if (def)
546 memset(&ns->prefix, 0, sizeof(ns->prefix));
547 else {
548 hr = reader_strvaldup(reader, prefix, &ns->prefix);
549 if (FAILED(hr)) {
550 reader_free(reader, ns);
551 return hr;
555 hr = reader_strvaldup(reader, uri, &ns->uri);
556 if (FAILED(hr)) {
557 reader_free_strvalued(reader, &ns->prefix);
558 reader_free(reader, ns);
559 return hr;
562 ns->element = NULL;
563 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
564 return hr;
567 static void reader_free_element(xmlreader *reader, struct element *element)
569 reader_free_strvalued(reader, &element->prefix);
570 reader_free_strvalued(reader, &element->localname);
571 reader_free_strvalued(reader, &element->qname);
572 reader_free(reader, element);
575 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
577 struct ns *ns;
579 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
580 if (ns->element)
581 break;
582 ns->element = element;
585 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
586 if (ns->element)
587 break;
588 ns->element = element;
592 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
593 strval *qname, const struct reader_position *position)
595 struct element *element;
596 HRESULT hr;
598 element = reader_alloc_zero(reader, sizeof(*element));
599 if (!element)
600 return E_OUTOFMEMORY;
602 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK &&
603 (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK &&
604 (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK)
606 list_add_head(&reader->elements, &element->entry);
607 reader_mark_ns_nodes(reader, element);
608 reader->is_empty_element = FALSE;
609 element->position = *position;
611 else
612 reader_free_element(reader, element);
614 return hr;
617 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
619 struct ns *ns, *ns2;
621 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
622 if (ns->element != element)
623 break;
625 list_remove(&ns->entry);
626 reader_free_strvalued(reader, &ns->prefix);
627 reader_free_strvalued(reader, &ns->uri);
628 reader_free(reader, ns);
631 if (!list_empty(&reader->nsdef)) {
632 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
633 if (ns->element == element) {
634 list_remove(&ns->entry);
635 reader_free_strvalued(reader, &ns->prefix);
636 reader_free_strvalued(reader, &ns->uri);
637 reader_free(reader, ns);
642 static void reader_pop_element(xmlreader *reader)
644 struct element *element;
646 if (list_empty(&reader->elements))
647 return;
649 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
650 list_remove(&element->entry);
652 reader_pop_ns_nodes(reader, element);
653 reader_free_element(reader, element);
655 /* It was a root element, the rest is expected as Misc */
656 if (list_empty(&reader->elements))
657 reader->instate = XmlReadInState_MiscEnd;
660 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
661 means node value is to be determined. */
662 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
664 strval *v = &reader->strvalues[type];
666 reader_free_strvalue(reader, type);
667 if (!value)
669 v->str = NULL;
670 v->start = 0;
671 v->len = 0;
672 return;
675 if (value->str == strval_empty.str)
676 *v = *value;
677 else
679 if (type == StringValue_Value)
681 /* defer allocation for value string */
682 v->str = NULL;
683 v->start = value->start;
684 v->len = value->len;
686 else
688 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
689 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
690 v->str[value->len] = 0;
691 v->len = value->len;
696 static inline int is_reader_pending(xmlreader *reader)
698 return reader->input->pending;
701 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
703 const int initial_len = 0x2000;
704 buffer->data = readerinput_alloc(input, initial_len);
705 if (!buffer->data) return E_OUTOFMEMORY;
707 memset(buffer->data, 0, 4);
708 buffer->cur = 0;
709 buffer->allocated = initial_len;
710 buffer->written = 0;
711 buffer->prev_cr = FALSE;
713 return S_OK;
716 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
718 readerinput_free(input, buffer->data);
721 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
723 if (encoding == XmlEncoding_Unknown)
725 FIXME("unsupported encoding %d\n", encoding);
726 return E_NOTIMPL;
729 *cp = xml_encoding_map[encoding].cp;
731 return S_OK;
734 xml_encoding parse_encoding_name(const WCHAR *name, int len)
736 int min, max, n, c;
738 if (!name) return XmlEncoding_Unknown;
740 min = 0;
741 max = ARRAY_SIZE(xml_encoding_map) - 1;
743 while (min <= max)
745 n = (min+max)/2;
747 if (len != -1)
748 c = wcsnicmp(xml_encoding_map[n].name, name, len);
749 else
750 c = wcsicmp(xml_encoding_map[n].name, name);
751 if (!c)
752 return xml_encoding_map[n].enc;
754 if (c > 0)
755 max = n-1;
756 else
757 min = n+1;
760 return XmlEncoding_Unknown;
763 static HRESULT alloc_input_buffer(xmlreaderinput *input)
765 input_buffer *buffer;
766 HRESULT hr;
768 input->buffer = NULL;
770 buffer = readerinput_alloc(input, sizeof(*buffer));
771 if (!buffer) return E_OUTOFMEMORY;
773 buffer->input = input;
774 buffer->code_page = ~0; /* code page is unknown at this point */
775 hr = init_encoded_buffer(input, &buffer->utf16);
776 if (hr != S_OK) {
777 readerinput_free(input, buffer);
778 return hr;
781 hr = init_encoded_buffer(input, &buffer->encoded);
782 if (hr != S_OK) {
783 free_encoded_buffer(input, &buffer->utf16);
784 readerinput_free(input, buffer);
785 return hr;
788 input->buffer = buffer;
789 return S_OK;
792 static void free_input_buffer(input_buffer *buffer)
794 free_encoded_buffer(buffer->input, &buffer->encoded);
795 free_encoded_buffer(buffer->input, &buffer->utf16);
796 readerinput_free(buffer->input, buffer);
799 static void readerinput_release_stream(xmlreaderinput *readerinput)
801 if (readerinput->stream) {
802 ISequentialStream_Release(readerinput->stream);
803 readerinput->stream = NULL;
807 /* Queries already stored interface for IStream/ISequentialStream.
808 Interface supplied on creation will be overwritten */
809 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
811 HRESULT hr;
813 readerinput_release_stream(readerinput);
814 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
815 if (hr != S_OK)
816 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
818 return hr;
821 /* reads a chunk to raw buffer */
822 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
824 encoded_buffer *buffer = &readerinput->buffer->encoded;
825 /* to make sure aligned length won't exceed allocated length */
826 ULONG len = buffer->allocated - buffer->written - 4;
827 ULONG read;
828 HRESULT hr;
830 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
831 variable width encodings like UTF-8 */
832 len = (len + 3) & ~3;
833 /* try to use allocated space or grow */
834 if (buffer->allocated - buffer->written < len)
836 buffer->allocated *= 2;
837 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
838 len = buffer->allocated - buffer->written;
841 read = 0;
842 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
843 TRACE("written=%d, alloc=%d, requested=%ld, read=%ld, ret=%#lx\n", buffer->written, buffer->allocated, len, read, hr);
844 readerinput->pending = hr == E_PENDING;
845 if (FAILED(hr)) return hr;
846 buffer->written += read;
847 if (!buffer->written)
848 return MX_E_INPUTEND;
850 return hr;
853 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
854 static void readerinput_grow(xmlreaderinput *readerinput, int length)
856 encoded_buffer *buffer = &readerinput->buffer->utf16;
858 length *= sizeof(WCHAR);
859 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
860 if (buffer->allocated < buffer->written + length + 4)
862 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
863 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
864 buffer->allocated = grown_size;
868 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
870 static const char startA[] = {'<','?'};
871 static const char commentA[] = {'<','!'};
872 encoded_buffer *buffer = &readerinput->buffer->encoded;
873 unsigned char *ptr = (unsigned char*)buffer->data;
875 return !memcmp(buffer->data, startA, sizeof(startA)) ||
876 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
877 /* test start byte */
878 (ptr[0] == '<' &&
880 (ptr[1] && (ptr[1] <= 0x7f)) ||
881 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
882 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
883 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
887 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
889 encoded_buffer *buffer = &readerinput->buffer->encoded;
890 static const char utf8bom[] = {0xef,0xbb,0xbf};
891 static const char utf16lebom[] = {0xff,0xfe};
892 WCHAR *ptrW;
894 *enc = XmlEncoding_Unknown;
896 if (buffer->written <= 3)
898 HRESULT hr = readerinput_growraw(readerinput);
899 if (FAILED(hr)) return hr;
900 if (buffer->written < 3) return MX_E_INPUTEND;
903 ptrW = (WCHAR *)buffer->data;
904 /* try start symbols if we have enough data to do that, input buffer should contain
905 first chunk already */
906 if (readerinput_is_utf8(readerinput))
907 *enc = XmlEncoding_UTF8;
908 else if (*ptrW == '<')
910 ptrW++;
911 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
912 *enc = XmlEncoding_UTF16;
914 /* try with BOM now */
915 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
917 buffer->cur += sizeof(utf8bom);
918 *enc = XmlEncoding_UTF8;
920 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
922 buffer->cur += sizeof(utf16lebom);
923 *enc = XmlEncoding_UTF16;
926 return S_OK;
929 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
931 encoded_buffer *buffer = &readerinput->buffer->encoded;
932 int len = buffer->written;
934 assert(len);
936 /* complete single byte char */
937 if (!(buffer->data[len-1] & 0x80)) return len;
939 /* find start byte of multibyte char */
940 while (--len && !(buffer->data[len] & 0xc0))
943 return len;
946 /* Returns byte length of complete char sequence for buffer code page,
947 it's relative to current buffer position which is currently used for BOM handling
948 only. */
949 static int readerinput_get_convlen(xmlreaderinput *readerinput)
951 encoded_buffer *buffer = &readerinput->buffer->encoded;
952 int len;
954 if (readerinput->buffer->code_page == CP_UTF8)
955 len = readerinput_get_utf8_convlen(readerinput);
956 else
957 len = buffer->written;
959 TRACE("%d\n", len - buffer->cur);
960 return len - buffer->cur;
963 /* It's possible that raw buffer has some leftovers from last conversion - some char
964 sequence that doesn't represent a full code point. Length argument should be calculated with
965 readerinput_get_convlen(), if it's -1 it will be calculated here. */
966 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
968 encoded_buffer *buffer = &readerinput->buffer->encoded;
970 if (len == -1)
971 len = readerinput_get_convlen(readerinput);
973 assert(len >= 0);
974 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
975 /* everything below cur is lost too */
976 buffer->written -= len + buffer->cur;
977 /* after this point we don't need cur offset really,
978 it's used only to mark where actual data begins when first chunk is read */
979 buffer->cur = 0;
982 static void fixup_buffer_cr(encoded_buffer *buffer, int off)
984 BOOL prev_cr = buffer->prev_cr;
985 const WCHAR *src;
986 WCHAR *dest;
988 src = dest = (WCHAR*)buffer->data + off;
989 while ((const char*)src < buffer->data + buffer->written)
991 if (*src == '\r')
993 *dest++ = '\n';
994 src++;
995 prev_cr = TRUE;
996 continue;
998 if(prev_cr && *src == '\n')
999 src++;
1000 else
1001 *dest++ = *src++;
1002 prev_cr = FALSE;
1005 buffer->written = (char*)dest - buffer->data;
1006 buffer->prev_cr = prev_cr;
1007 *dest = 0;
1010 /* note that raw buffer content is kept */
1011 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
1013 encoded_buffer *src = &readerinput->buffer->encoded;
1014 encoded_buffer *dest = &readerinput->buffer->utf16;
1015 int len, dest_len;
1016 UINT cp = ~0u;
1017 HRESULT hr;
1018 WCHAR *ptr;
1020 hr = get_code_page(enc, &cp);
1021 if (FAILED(hr)) return;
1023 readerinput->buffer->code_page = cp;
1024 len = readerinput_get_convlen(readerinput);
1026 TRACE("switching to cp %d\n", cp);
1028 /* just copy in this case */
1029 if (enc == XmlEncoding_UTF16)
1031 readerinput_grow(readerinput, len);
1032 memcpy(dest->data, src->data + src->cur, len);
1033 dest->written += len;
1035 else
1037 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1038 readerinput_grow(readerinput, dest_len);
1039 ptr = (WCHAR*)dest->data;
1040 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1041 ptr[dest_len] = 0;
1042 dest->written += dest_len*sizeof(WCHAR);
1045 fixup_buffer_cr(dest, 0);
1048 /* shrinks parsed data a buffer begins with */
1049 static void reader_shrink(xmlreader *reader)
1051 encoded_buffer *buffer = &reader->input->buffer->utf16;
1053 /* avoid to move too often using threshold shrink length */
1054 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
1056 buffer->written -= buffer->cur*sizeof(WCHAR);
1057 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
1058 buffer->cur = 0;
1059 *(WCHAR*)&buffer->data[buffer->written] = 0;
1063 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1064 It won't attempt to shrink but will grow destination buffer if needed */
1065 static HRESULT reader_more(xmlreader *reader)
1067 xmlreaderinput *readerinput = reader->input;
1068 encoded_buffer *src = &readerinput->buffer->encoded;
1069 encoded_buffer *dest = &readerinput->buffer->utf16;
1070 UINT cp = readerinput->buffer->code_page;
1071 int len, dest_len, prev_len;
1072 HRESULT hr;
1073 WCHAR *ptr;
1075 /* get some raw data from stream first */
1076 if (FAILED(hr = readerinput_growraw(readerinput)))
1077 return hr;
1079 len = readerinput_get_convlen(readerinput);
1080 prev_len = dest->written / sizeof(WCHAR);
1082 /* just copy for UTF-16 case */
1083 if (cp == 1200)
1085 readerinput_grow(readerinput, len);
1086 memcpy(dest->data + dest->written, src->data + src->cur, len);
1087 dest->written += len;
1089 else
1091 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1092 readerinput_grow(readerinput, dest_len);
1093 ptr = (WCHAR*)(dest->data + dest->written);
1094 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1095 ptr[dest_len] = 0;
1096 dest->written += dest_len*sizeof(WCHAR);
1097 /* get rid of processed data */
1098 readerinput_shrinkraw(readerinput, len);
1101 fixup_buffer_cr(dest, prev_len);
1102 return hr;
1105 static inline UINT reader_get_cur(xmlreader *reader)
1107 return reader->input->buffer->utf16.cur;
1110 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1112 encoded_buffer *buffer = &reader->input->buffer->utf16;
1113 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1114 if (!*ptr) reader_more(reader);
1115 return (WCHAR*)buffer->data + buffer->cur;
1118 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1120 int i=0;
1121 const WCHAR *ptr = reader_get_ptr(reader);
1122 while (str[i])
1124 if (!ptr[i])
1126 reader_more(reader);
1127 ptr = reader_get_ptr(reader);
1129 if (str[i] != ptr[i])
1130 return ptr[i] - str[i];
1131 i++;
1133 return 0;
1136 static void reader_update_position(xmlreader *reader, WCHAR ch)
1138 if (ch == '\r')
1139 reader->position.line_position = 1;
1140 else if (ch == '\n')
1142 reader->position.line_number++;
1143 reader->position.line_position = 1;
1145 else
1146 reader->position.line_position++;
1149 /* moves cursor n WCHARs forward */
1150 static void reader_skipn(xmlreader *reader, int n)
1152 encoded_buffer *buffer = &reader->input->buffer->utf16;
1153 const WCHAR *ptr;
1155 while (*(ptr = reader_get_ptr(reader)) && n--)
1157 reader_update_position(reader, *ptr);
1158 buffer->cur++;
1162 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1163 static int reader_skipspaces(xmlreader *reader)
1165 const WCHAR *ptr = reader_get_ptr(reader);
1166 UINT start = reader_get_cur(reader);
1168 while (is_wchar_space(*ptr))
1170 reader_skipn(reader, 1);
1171 ptr = reader_get_ptr(reader);
1174 return reader_get_cur(reader) - start;
1177 /* [26] VersionNum ::= '1.' [0-9]+ */
1178 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1180 WCHAR *ptr, *ptr2;
1181 UINT start;
1183 if (reader_cmp(reader, L"1.")) return WC_E_XMLDECL;
1185 start = reader_get_cur(reader);
1186 /* skip "1." */
1187 reader_skipn(reader, 2);
1189 ptr2 = ptr = reader_get_ptr(reader);
1190 while (*ptr >= '0' && *ptr <= '9')
1192 reader_skipn(reader, 1);
1193 ptr = reader_get_ptr(reader);
1196 if (ptr2 == ptr) return WC_E_DIGIT;
1197 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1198 TRACE("version=%s\n", debug_strval(reader, val));
1199 return S_OK;
1202 /* [25] Eq ::= S? '=' S? */
1203 static HRESULT reader_parse_eq(xmlreader *reader)
1205 reader_skipspaces(reader);
1206 if (reader_cmp(reader, L"=")) return WC_E_EQUAL;
1207 /* skip '=' */
1208 reader_skipn(reader, 1);
1209 reader_skipspaces(reader);
1210 return S_OK;
1213 static BOOL reader_is_quote(xmlreader *reader)
1215 return !reader_cmp(reader, L"\'") || !reader_cmp(reader, L"\"");
1218 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1219 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1221 struct reader_position position;
1222 strval val, name;
1223 HRESULT hr;
1225 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1227 position = reader->position;
1228 if (reader_cmp(reader, L"version")) return WC_E_XMLDECL;
1229 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1230 /* skip 'version' */
1231 reader_skipn(reader, 7);
1233 hr = reader_parse_eq(reader);
1234 if (FAILED(hr)) return hr;
1236 if (!reader_is_quote(reader))
1237 return WC_E_QUOTE;
1238 /* skip "'"|'"' */
1239 reader_skipn(reader, 1);
1241 hr = reader_parse_versionnum(reader, &val);
1242 if (FAILED(hr)) return hr;
1244 if (!reader_is_quote(reader))
1245 return WC_E_QUOTE;
1247 /* skip "'"|'"' */
1248 reader_skipn(reader, 1);
1250 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1253 /* ([A-Za-z0-9._] | '-') */
1254 static inline BOOL is_wchar_encname(WCHAR ch)
1256 return ((ch >= 'A' && ch <= 'Z') ||
1257 (ch >= 'a' && ch <= 'z') ||
1258 (ch >= '0' && ch <= '9') ||
1259 (ch == '.') || (ch == '_') ||
1260 (ch == '-'));
1263 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1264 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1266 WCHAR *start = reader_get_ptr(reader), *ptr;
1267 xml_encoding enc;
1268 int len;
1270 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1271 return WC_E_ENCNAME;
1273 val->start = reader_get_cur(reader);
1275 ptr = start;
1276 while (is_wchar_encname(*++ptr))
1279 len = ptr - start;
1280 enc = parse_encoding_name(start, len);
1281 TRACE("encoding name %s\n", debugstr_wn(start, len));
1282 val->str = start;
1283 val->len = len;
1285 if (enc == XmlEncoding_Unknown)
1286 return WC_E_ENCNAME;
1288 /* skip encoding name */
1289 reader_skipn(reader, len);
1290 return S_OK;
1293 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1294 static HRESULT reader_parse_encdecl(xmlreader *reader, BOOL *spaces)
1296 struct reader_position position;
1297 strval name, val;
1298 HRESULT hr;
1300 if (!(*spaces = reader_skipspaces(reader))) return S_FALSE;
1302 position = reader->position;
1303 if (reader_cmp(reader, L"encoding")) return S_FALSE;
1304 name.str = reader_get_ptr(reader);
1305 name.start = reader_get_cur(reader);
1306 name.len = 8;
1307 /* skip 'encoding' */
1308 reader_skipn(reader, 8);
1310 hr = reader_parse_eq(reader);
1311 if (FAILED(hr)) return hr;
1313 if (!reader_is_quote(reader))
1314 return WC_E_QUOTE;
1315 /* skip "'"|'"' */
1316 reader_skipn(reader, 1);
1318 hr = reader_parse_encname(reader, &val);
1319 if (FAILED(hr)) return hr;
1321 if (!reader_is_quote(reader))
1322 return WC_E_QUOTE;
1324 /* skip "'"|'"' */
1325 reader_skipn(reader, 1);
1326 *spaces = FALSE;
1328 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1331 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1332 static HRESULT reader_parse_sddecl(xmlreader *reader, BOOL spaces)
1334 struct reader_position position;
1335 strval name, val;
1336 UINT start;
1337 HRESULT hr;
1339 if (!spaces && !reader_skipspaces(reader)) return S_FALSE;
1341 position = reader->position;
1342 if (reader_cmp(reader, L"standalone")) return S_FALSE;
1343 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1344 /* skip 'standalone' */
1345 reader_skipn(reader, 10);
1347 hr = reader_parse_eq(reader);
1348 if (FAILED(hr)) return hr;
1350 if (!reader_is_quote(reader))
1351 return WC_E_QUOTE;
1352 /* skip "'"|'"' */
1353 reader_skipn(reader, 1);
1355 if (reader_cmp(reader, L"yes") && reader_cmp(reader, L"no"))
1356 return WC_E_XMLDECL;
1358 start = reader_get_cur(reader);
1359 /* skip 'yes'|'no' */
1360 reader_skipn(reader, reader_cmp(reader, L"yes") ? 2 : 3);
1361 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1362 TRACE("standalone=%s\n", debug_strval(reader, &val));
1364 if (!reader_is_quote(reader))
1365 return WC_E_QUOTE;
1366 /* skip "'"|'"' */
1367 reader_skipn(reader, 1);
1369 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1372 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1373 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1375 struct reader_position position;
1376 BOOL spaces;
1377 HRESULT hr;
1379 if (reader_cmp(reader, L"<?xml "))
1380 return S_FALSE;
1382 reader_skipn(reader, 2);
1383 position = reader->position;
1384 reader_skipn(reader, 3);
1385 hr = reader_parse_versioninfo(reader);
1386 if (FAILED(hr))
1387 return hr;
1389 if (FAILED(hr = reader_parse_encdecl(reader, &spaces)))
1390 return hr;
1392 if (FAILED(hr = reader_parse_sddecl(reader, spaces)))
1393 return hr;
1395 reader_skipspaces(reader);
1396 if (reader_cmp(reader, L"?>"))
1397 return WC_E_XMLDECL;
1399 /* skip '?>' */
1400 reader_skipn(reader, 2);
1402 reader->nodetype = XmlNodeType_XmlDeclaration;
1403 reader->empty_element.position = position;
1404 reader_set_strvalue(reader, StringValue_LocalName, &strval_xml);
1405 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml);
1407 return S_OK;
1410 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1411 static HRESULT reader_parse_comment(xmlreader *reader)
1413 WCHAR *ptr;
1414 UINT start;
1416 if (reader->resumestate == XmlReadResumeState_Comment)
1418 start = reader->resume[XmlReadResume_Body];
1419 ptr = reader_get_ptr(reader);
1421 else
1423 /* skip '<!--' */
1424 reader_skipn(reader, 4);
1425 reader_shrink(reader);
1426 ptr = reader_get_ptr(reader);
1427 start = reader_get_cur(reader);
1428 reader->nodetype = XmlNodeType_Comment;
1429 reader->resume[XmlReadResume_Body] = start;
1430 reader->resumestate = XmlReadResumeState_Comment;
1431 reader_set_strvalue(reader, StringValue_Value, NULL);
1434 /* will exit when there's no more data, it won't attempt to
1435 read more from stream */
1436 while (*ptr)
1438 if (ptr[0] == '-')
1440 if (ptr[1] == '-')
1442 if (ptr[2] == '>')
1444 strval value;
1446 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1447 TRACE("%s\n", debug_strval(reader, &value));
1449 /* skip rest of markup '->' */
1450 reader_skipn(reader, 3);
1452 reader_set_strvalue(reader, StringValue_Value, &value);
1453 reader->resume[XmlReadResume_Body] = 0;
1454 reader->resumestate = XmlReadResumeState_Initial;
1455 return S_OK;
1457 else
1458 return WC_E_COMMENT;
1462 reader_skipn(reader, 1);
1463 ptr++;
1466 return S_OK;
1469 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1470 BOOL is_pubchar(WCHAR ch)
1472 return (ch == ' ') ||
1473 (ch >= 'a' && ch <= 'z') ||
1474 (ch >= 'A' && ch <= 'Z') ||
1475 (ch >= '0' && ch <= '9') ||
1476 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1477 (ch == '=') || (ch == '?') ||
1478 (ch == '@') || (ch == '!') ||
1479 (ch >= '#' && ch <= '%') || /* #$% */
1480 (ch == '_') || (ch == '\r') || (ch == '\n');
1483 BOOL is_namestartchar(WCHAR ch)
1485 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1486 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1487 (ch >= 0xc0 && ch <= 0xd6) ||
1488 (ch >= 0xd8 && ch <= 0xf6) ||
1489 (ch >= 0xf8 && ch <= 0x2ff) ||
1490 (ch >= 0x370 && ch <= 0x37d) ||
1491 (ch >= 0x37f && ch <= 0x1fff) ||
1492 (ch >= 0x200c && ch <= 0x200d) ||
1493 (ch >= 0x2070 && ch <= 0x218f) ||
1494 (ch >= 0x2c00 && ch <= 0x2fef) ||
1495 (ch >= 0x3001 && ch <= 0xd7ff) ||
1496 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1497 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1498 (ch >= 0xf900 && ch <= 0xfdcf) ||
1499 (ch >= 0xfdf0 && ch <= 0xfffd);
1502 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1503 BOOL is_ncnamechar(WCHAR ch)
1505 return (ch >= 'A' && ch <= 'Z') ||
1506 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1507 (ch == '-') || (ch == '.') ||
1508 (ch >= '0' && ch <= '9') ||
1509 (ch == 0xb7) ||
1510 (ch >= 0xc0 && ch <= 0xd6) ||
1511 (ch >= 0xd8 && ch <= 0xf6) ||
1512 (ch >= 0xf8 && ch <= 0x2ff) ||
1513 (ch >= 0x300 && ch <= 0x36f) ||
1514 (ch >= 0x370 && ch <= 0x37d) ||
1515 (ch >= 0x37f && ch <= 0x1fff) ||
1516 (ch >= 0x200c && ch <= 0x200d) ||
1517 (ch >= 0x203f && ch <= 0x2040) ||
1518 (ch >= 0x2070 && ch <= 0x218f) ||
1519 (ch >= 0x2c00 && ch <= 0x2fef) ||
1520 (ch >= 0x3001 && ch <= 0xd7ff) ||
1521 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1522 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1523 (ch >= 0xf900 && ch <= 0xfdcf) ||
1524 (ch >= 0xfdf0 && ch <= 0xfffd);
1527 BOOL is_namechar(WCHAR ch)
1529 return (ch == ':') || is_ncnamechar(ch);
1532 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1534 /* When we're on attribute always return attribute type, container node type is kept.
1535 Note that container is not necessarily an element, and attribute doesn't mean it's
1536 an attribute in XML spec terms. */
1537 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1540 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1541 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1542 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1543 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1544 [5] Name ::= NameStartChar (NameChar)* */
1545 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1547 WCHAR *ptr;
1548 UINT start;
1550 if (reader->resume[XmlReadResume_Name])
1552 start = reader->resume[XmlReadResume_Name];
1553 ptr = reader_get_ptr(reader);
1555 else
1557 ptr = reader_get_ptr(reader);
1558 start = reader_get_cur(reader);
1559 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1562 while (is_namechar(*ptr))
1564 reader_skipn(reader, 1);
1565 ptr = reader_get_ptr(reader);
1568 if (is_reader_pending(reader))
1570 reader->resume[XmlReadResume_Name] = start;
1571 return E_PENDING;
1573 else
1574 reader->resume[XmlReadResume_Name] = 0;
1576 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1577 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1579 return S_OK;
1582 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1583 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1585 static const strval xmlval = { (WCHAR *)L"xml", 3 };
1586 strval name;
1587 WCHAR *ptr;
1588 HRESULT hr;
1589 UINT i;
1591 hr = reader_parse_name(reader, &name);
1592 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1594 /* now that we got name check for illegal content */
1595 if (strval_eq(reader, &name, &xmlval))
1596 return WC_E_LEADINGXML;
1598 /* PITarget can't be a qualified name */
1599 ptr = reader_get_strptr(reader, &name);
1600 for (i = 0; i < name.len; i++)
1601 if (ptr[i] == ':')
1602 return i ? NC_E_NAMECOLON : WC_E_PI;
1604 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1605 *target = name;
1606 return S_OK;
1609 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1610 static HRESULT reader_parse_pi(xmlreader *reader)
1612 strval target;
1613 WCHAR *ptr;
1614 UINT start;
1615 HRESULT hr;
1617 switch (reader->resumestate)
1619 case XmlReadResumeState_Initial:
1620 /* skip '<?' */
1621 reader_skipn(reader, 2);
1622 reader_shrink(reader);
1623 reader->resumestate = XmlReadResumeState_PITarget;
1624 case XmlReadResumeState_PITarget:
1625 hr = reader_parse_pitarget(reader, &target);
1626 if (FAILED(hr)) return hr;
1627 reader_set_strvalue(reader, StringValue_LocalName, &target);
1628 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1629 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1630 reader->resumestate = XmlReadResumeState_PIBody;
1631 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1632 default:
1636 start = reader->resume[XmlReadResume_Body];
1637 ptr = reader_get_ptr(reader);
1638 while (*ptr)
1640 if (ptr[0] == '?')
1642 if (ptr[1] == '>')
1644 UINT cur = reader_get_cur(reader);
1645 strval value;
1647 /* strip all leading whitespace chars */
1648 while (start < cur)
1650 ptr = reader_get_ptr2(reader, start);
1651 if (!is_wchar_space(*ptr)) break;
1652 start++;
1655 reader_init_strvalue(start, cur-start, &value);
1657 /* skip '?>' */
1658 reader_skipn(reader, 2);
1659 TRACE("%s\n", debug_strval(reader, &value));
1660 reader->nodetype = XmlNodeType_ProcessingInstruction;
1661 reader->resumestate = XmlReadResumeState_Initial;
1662 reader->resume[XmlReadResume_Body] = 0;
1663 reader_set_strvalue(reader, StringValue_Value, &value);
1664 return S_OK;
1668 reader_skipn(reader, 1);
1669 ptr = reader_get_ptr(reader);
1672 return S_OK;
1675 /* This one is used to parse significant whitespace nodes, like in Misc production */
1676 static HRESULT reader_parse_whitespace(xmlreader *reader)
1678 switch (reader->resumestate)
1680 case XmlReadResumeState_Initial:
1681 reader_shrink(reader);
1682 reader->resumestate = XmlReadResumeState_Whitespace;
1683 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1684 reader->nodetype = XmlNodeType_Whitespace;
1685 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1686 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1687 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1688 /* fallthrough */
1689 case XmlReadResumeState_Whitespace:
1691 strval value;
1692 UINT start;
1694 reader_skipspaces(reader);
1695 if (is_reader_pending(reader)) return S_OK;
1697 start = reader->resume[XmlReadResume_Body];
1698 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1699 reader_set_strvalue(reader, StringValue_Value, &value);
1700 TRACE("%s\n", debug_strval(reader, &value));
1701 reader->resumestate = XmlReadResumeState_Initial;
1703 default:
1707 return S_OK;
1710 /* [27] Misc ::= Comment | PI | S */
1711 static HRESULT reader_parse_misc(xmlreader *reader)
1713 HRESULT hr = S_FALSE;
1715 if (reader->resumestate != XmlReadResumeState_Initial)
1717 hr = reader_more(reader);
1718 if (FAILED(hr)) return hr;
1720 /* finish current node */
1721 switch (reader->resumestate)
1723 case XmlReadResumeState_PITarget:
1724 case XmlReadResumeState_PIBody:
1725 return reader_parse_pi(reader);
1726 case XmlReadResumeState_Comment:
1727 return reader_parse_comment(reader);
1728 case XmlReadResumeState_Whitespace:
1729 return reader_parse_whitespace(reader);
1730 default:
1731 ERR("unknown resume state %d\n", reader->resumestate);
1735 while (1)
1737 const WCHAR *cur = reader_get_ptr(reader);
1739 if (is_wchar_space(*cur))
1740 hr = reader_parse_whitespace(reader);
1741 else if (!reader_cmp(reader, L"<!--"))
1742 hr = reader_parse_comment(reader);
1743 else if (!reader_cmp(reader, L"<?"))
1744 hr = reader_parse_pi(reader);
1745 else
1746 break;
1748 if (hr != S_FALSE) return hr;
1751 return hr;
1754 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1755 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1757 WCHAR *cur = reader_get_ptr(reader), quote;
1758 UINT start;
1760 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1762 quote = *cur;
1763 reader_skipn(reader, 1);
1765 cur = reader_get_ptr(reader);
1766 start = reader_get_cur(reader);
1767 while (is_char(*cur) && *cur != quote)
1769 reader_skipn(reader, 1);
1770 cur = reader_get_ptr(reader);
1772 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1773 if (*cur == quote) reader_skipn(reader, 1);
1775 TRACE("%s\n", debug_strval(reader, literal));
1776 return S_OK;
1779 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1780 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1781 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1783 WCHAR *cur = reader_get_ptr(reader), quote;
1784 UINT start;
1786 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1788 quote = *cur;
1789 reader_skipn(reader, 1);
1791 start = reader_get_cur(reader);
1792 cur = reader_get_ptr(reader);
1793 while (is_pubchar(*cur) && *cur != quote)
1795 reader_skipn(reader, 1);
1796 cur = reader_get_ptr(reader);
1798 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1799 if (*cur == quote) reader_skipn(reader, 1);
1801 TRACE("%s\n", debug_strval(reader, literal));
1802 return S_OK;
1805 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1806 static HRESULT reader_parse_externalid(xmlreader *reader)
1808 static WCHAR systemW[] = L"SYSTEM";
1809 static WCHAR publicW[] = L"PUBLIC";
1810 struct reader_position position = reader->position;
1811 strval name, sys;
1812 HRESULT hr;
1813 int cnt;
1815 if (!reader_cmp(reader, publicW)) {
1816 strval pub;
1818 /* public id */
1819 reader_skipn(reader, 6);
1820 cnt = reader_skipspaces(reader);
1821 if (!cnt) return WC_E_WHITESPACE;
1823 hr = reader_parse_pub_literal(reader, &pub);
1824 if (FAILED(hr)) return hr;
1826 reader_init_cstrvalue(publicW, lstrlenW(publicW), &name);
1827 hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0);
1828 if (FAILED(hr)) return hr;
1830 cnt = reader_skipspaces(reader);
1831 if (!cnt) return S_OK;
1833 /* optional system id */
1834 hr = reader_parse_sys_literal(reader, &sys);
1835 if (FAILED(hr)) return S_OK;
1837 reader_init_cstrvalue(systemW, lstrlenW(systemW), &name);
1838 hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1839 if (FAILED(hr)) return hr;
1841 return S_OK;
1842 } else if (!reader_cmp(reader, systemW)) {
1843 /* system id */
1844 reader_skipn(reader, 6);
1845 cnt = reader_skipspaces(reader);
1846 if (!cnt) return WC_E_WHITESPACE;
1848 hr = reader_parse_sys_literal(reader, &sys);
1849 if (FAILED(hr)) return hr;
1851 reader_init_cstrvalue(systemW, lstrlenW(systemW), &name);
1852 return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1855 return S_FALSE;
1858 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1859 static HRESULT reader_parse_dtd(xmlreader *reader)
1861 strval name;
1862 WCHAR *cur;
1863 HRESULT hr;
1865 if (reader_cmp(reader, L"<!DOCTYPE")) return S_FALSE;
1866 reader_shrink(reader);
1868 /* DTD processing is not allowed by default */
1869 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1871 reader_skipn(reader, 9);
1872 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1874 /* name */
1875 hr = reader_parse_name(reader, &name);
1876 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1878 reader_skipspaces(reader);
1880 hr = reader_parse_externalid(reader);
1881 if (FAILED(hr)) return hr;
1883 reader_skipspaces(reader);
1885 cur = reader_get_ptr(reader);
1886 if (*cur != '>')
1888 FIXME("internal subset parsing not implemented\n");
1889 return E_NOTIMPL;
1892 /* skip '>' */
1893 reader_skipn(reader, 1);
1895 reader->nodetype = XmlNodeType_DocumentType;
1896 reader_set_strvalue(reader, StringValue_LocalName, &name);
1897 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1899 return S_OK;
1902 /* [11 NS] LocalPart ::= NCName */
1903 static HRESULT reader_parse_local(xmlreader *reader, strval *local, BOOL check_for_separator)
1905 WCHAR *ptr;
1906 UINT start;
1908 if (reader->resume[XmlReadResume_Local])
1910 start = reader->resume[XmlReadResume_Local];
1911 ptr = reader_get_ptr(reader);
1913 else
1915 ptr = reader_get_ptr(reader);
1916 start = reader_get_cur(reader);
1919 while (is_ncnamechar(*ptr))
1921 reader_skipn(reader, 1);
1922 ptr = reader_get_ptr(reader);
1925 if (check_for_separator && *ptr == ':')
1926 return NC_E_QNAMECOLON;
1928 if (is_reader_pending(reader))
1930 reader->resume[XmlReadResume_Local] = start;
1931 return E_PENDING;
1933 else
1934 reader->resume[XmlReadResume_Local] = 0;
1936 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1938 return S_OK;
1941 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1942 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1943 [9 NS] UnprefixedName ::= LocalPart
1944 [10 NS] Prefix ::= NCName */
1945 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1947 WCHAR *ptr;
1948 UINT start;
1949 HRESULT hr;
1951 if (reader->resume[XmlReadResume_Name])
1953 start = reader->resume[XmlReadResume_Name];
1954 ptr = reader_get_ptr(reader);
1956 else
1958 ptr = reader_get_ptr(reader);
1959 start = reader_get_cur(reader);
1960 reader->resume[XmlReadResume_Name] = start;
1961 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1964 if (reader->resume[XmlReadResume_Local])
1966 hr = reader_parse_local(reader, local, FALSE);
1967 if (FAILED(hr)) return hr;
1969 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1970 local->start - reader->resume[XmlReadResume_Name] - 1,
1971 prefix);
1973 else
1975 /* skip prefix part */
1976 while (is_ncnamechar(*ptr))
1978 reader_skipn(reader, 1);
1979 ptr = reader_get_ptr(reader);
1982 if (is_reader_pending(reader)) return E_PENDING;
1984 /* got a qualified name */
1985 if (*ptr == ':')
1987 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1989 /* skip ':' */
1990 reader_skipn(reader, 1);
1991 hr = reader_parse_local(reader, local, TRUE);
1992 if (FAILED(hr)) return hr;
1994 else
1996 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1997 reader_init_strvalue(0, 0, prefix);
2001 if (prefix->len)
2002 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
2003 else
2004 TRACE("ncname %s\n", debug_strval(reader, local));
2006 reader_init_strvalue(prefix->len ? prefix->start : local->start,
2007 /* count ':' too */
2008 (prefix->len ? prefix->len + 1 : 0) + local->len,
2009 qname);
2011 reader->resume[XmlReadResume_Name] = 0;
2012 reader->resume[XmlReadResume_Local] = 0;
2014 return S_OK;
2017 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
2019 static const strval lt = { (WCHAR *)L"lt", 2 };
2020 static const strval gt = { (WCHAR *)L"gt", 2 };
2021 static const strval amp = { (WCHAR *)L"amp", 3 };
2022 static const strval apos = { (WCHAR *)L"apos", 4 };
2023 static const strval quot = { (WCHAR *)L"quot", 4 };
2024 WCHAR *str = reader_get_strptr(reader, name);
2026 switch (*str)
2028 case 'l':
2029 if (strval_eq(reader, name, &lt)) return '<';
2030 break;
2031 case 'g':
2032 if (strval_eq(reader, name, &gt)) return '>';
2033 break;
2034 case 'a':
2035 if (strval_eq(reader, name, &amp))
2036 return '&';
2037 else if (strval_eq(reader, name, &apos))
2038 return '\'';
2039 break;
2040 case 'q':
2041 if (strval_eq(reader, name, &quot)) return '\"';
2042 break;
2043 default:
2047 return 0;
2050 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2051 [67] Reference ::= EntityRef | CharRef
2052 [68] EntityRef ::= '&' Name ';' */
2053 static HRESULT reader_parse_reference(xmlreader *reader)
2055 encoded_buffer *buffer = &reader->input->buffer->utf16;
2056 WCHAR *start = reader_get_ptr(reader), *ptr;
2057 UINT cur = reader_get_cur(reader);
2058 WCHAR ch = 0;
2059 int len;
2061 /* skip '&' */
2062 reader_skipn(reader, 1);
2063 ptr = reader_get_ptr(reader);
2065 if (*ptr == '#')
2067 reader_skipn(reader, 1);
2068 ptr = reader_get_ptr(reader);
2070 /* hex char or decimal */
2071 if (*ptr == 'x')
2073 reader_skipn(reader, 1);
2074 ptr = reader_get_ptr(reader);
2076 while (*ptr != ';')
2078 if ((*ptr >= '0' && *ptr <= '9'))
2079 ch = ch*16 + *ptr - '0';
2080 else if ((*ptr >= 'a' && *ptr <= 'f'))
2081 ch = ch*16 + *ptr - 'a' + 10;
2082 else if ((*ptr >= 'A' && *ptr <= 'F'))
2083 ch = ch*16 + *ptr - 'A' + 10;
2084 else
2085 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2086 reader_skipn(reader, 1);
2087 ptr = reader_get_ptr(reader);
2090 else
2092 while (*ptr != ';')
2094 if ((*ptr >= '0' && *ptr <= '9'))
2096 ch = ch*10 + *ptr - '0';
2097 reader_skipn(reader, 1);
2098 ptr = reader_get_ptr(reader);
2100 else
2101 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2105 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2107 /* normalize */
2108 if (is_wchar_space(ch)) ch = ' ';
2110 ptr = reader_get_ptr(reader);
2111 start = reader_get_ptr2(reader, cur);
2112 len = buffer->written - ((char *)ptr - buffer->data);
2113 memmove(start + 1, ptr + 1, len);
2115 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2116 *(WCHAR*)(buffer->data + buffer->written) = 0;
2117 buffer->cur = cur + 1;
2119 *start = ch;
2121 else
2123 strval name;
2124 HRESULT hr;
2126 hr = reader_parse_name(reader, &name);
2127 if (FAILED(hr)) return hr;
2129 ptr = reader_get_ptr(reader);
2130 if (*ptr != ';') return WC_E_SEMICOLON;
2132 /* predefined entities resolve to a single character */
2133 ch = get_predefined_entity(reader, &name);
2134 if (ch)
2136 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2137 memmove(start+1, ptr+1, len);
2138 buffer->cur = cur + 1;
2139 buffer->written -= (ptr - start) * sizeof(WCHAR);
2140 *(WCHAR*)(buffer->data + buffer->written) = 0;
2142 *start = ch;
2144 else
2146 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2147 return WC_E_UNDECLAREDENTITY;
2152 return S_OK;
2155 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2156 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2158 WCHAR *ptr, quote;
2159 UINT start;
2161 ptr = reader_get_ptr(reader);
2163 /* skip opening quote */
2164 quote = *ptr;
2165 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2166 reader_skipn(reader, 1);
2168 ptr = reader_get_ptr(reader);
2169 start = reader_get_cur(reader);
2170 while (*ptr)
2172 if (*ptr == '<') return WC_E_LESSTHAN;
2174 if (*ptr == quote)
2176 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2177 /* skip closing quote */
2178 reader_skipn(reader, 1);
2179 return S_OK;
2182 if (*ptr == '&')
2184 HRESULT hr = reader_parse_reference(reader);
2185 if (FAILED(hr)) return hr;
2187 else
2189 /* replace all whitespace chars with ' ' */
2190 if (is_wchar_space(*ptr)) *ptr = ' ';
2191 reader_skipn(reader, 1);
2193 ptr = reader_get_ptr(reader);
2196 return WC_E_QUOTE;
2199 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2200 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2201 [3 NS] DefaultAttName ::= 'xmlns'
2202 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2203 static HRESULT reader_parse_attribute(xmlreader *reader)
2205 struct reader_position position = reader->position;
2206 strval prefix, local, qname, value;
2207 enum attribute_flags flags = 0;
2208 HRESULT hr;
2210 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2211 if (FAILED(hr)) return hr;
2213 if (strval_eq(reader, &prefix, &strval_xmlns))
2214 flags |= ATTRIBUTE_NS_DEFINITION;
2216 if (strval_eq(reader, &qname, &strval_xmlns))
2217 flags |= ATTRIBUTE_DEFAULT_NS_DEFINITION;
2219 hr = reader_parse_eq(reader);
2220 if (FAILED(hr)) return hr;
2222 hr = reader_parse_attvalue(reader, &value);
2223 if (FAILED(hr)) return hr;
2225 if (flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
2226 reader_push_ns(reader, &local, &value, !!(flags & ATTRIBUTE_DEFAULT_NS_DEFINITION));
2228 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2229 return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags);
2232 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2233 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2234 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
2236 struct reader_position position = reader->position;
2237 HRESULT hr;
2239 hr = reader_parse_qname(reader, prefix, local, qname);
2240 if (FAILED(hr)) return hr;
2242 for (;;)
2244 reader_skipspaces(reader);
2246 /* empty element */
2247 if ((reader->is_empty_element = !reader_cmp(reader, L"/>")))
2249 struct element *element = &reader->empty_element;
2251 /* skip '/>' */
2252 reader_skipn(reader, 2);
2254 reader_free_strvalued(reader, &element->qname);
2255 reader_free_strvalued(reader, &element->localname);
2257 element->prefix = *prefix;
2258 reader_strvaldup(reader, qname, &element->qname);
2259 reader_strvaldup(reader, local, &element->localname);
2260 element->position = position;
2261 reader_mark_ns_nodes(reader, element);
2262 return S_OK;
2265 /* got a start tag */
2266 if (!reader_cmp(reader, L">"))
2268 /* skip '>' */
2269 reader_skipn(reader, 1);
2270 return reader_push_element(reader, prefix, local, qname, &position);
2273 hr = reader_parse_attribute(reader);
2274 if (FAILED(hr)) return hr;
2277 return S_OK;
2280 /* [39] element ::= EmptyElemTag | STag content ETag */
2281 static HRESULT reader_parse_element(xmlreader *reader)
2283 HRESULT hr;
2285 switch (reader->resumestate)
2287 case XmlReadResumeState_Initial:
2288 /* check if we are really on element */
2289 if (reader_cmp(reader, L"<")) return S_FALSE;
2291 /* skip '<' */
2292 reader_skipn(reader, 1);
2294 reader_shrink(reader);
2295 reader->resumestate = XmlReadResumeState_STag;
2296 case XmlReadResumeState_STag:
2298 strval qname, prefix, local;
2300 /* this handles empty elements too */
2301 hr = reader_parse_stag(reader, &prefix, &local, &qname);
2302 if (FAILED(hr)) return hr;
2304 /* FIXME: need to check for defined namespace to reject invalid prefix */
2306 /* if we got empty element and stack is empty go straight to Misc */
2307 if (reader->is_empty_element && list_empty(&reader->elements))
2308 reader->instate = XmlReadInState_MiscEnd;
2309 else
2310 reader->instate = XmlReadInState_Content;
2312 reader->nodetype = XmlNodeType_Element;
2313 reader->resumestate = XmlReadResumeState_Initial;
2314 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2315 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2316 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
2317 break;
2319 default:
2320 hr = E_FAIL;
2323 return hr;
2326 /* [13 NS] ETag ::= '</' QName S? '>' */
2327 static HRESULT reader_parse_endtag(xmlreader *reader)
2329 struct reader_position position;
2330 strval prefix, local, qname;
2331 struct element *element;
2332 HRESULT hr;
2334 /* skip '</' */
2335 reader_skipn(reader, 2);
2337 position = reader->position;
2338 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2339 if (FAILED(hr)) return hr;
2341 reader_skipspaces(reader);
2343 if (reader_cmp(reader, L">")) return WC_E_GREATERTHAN;
2345 /* skip '>' */
2346 reader_skipn(reader, 1);
2348 /* Element stack should never be empty at this point, cause we shouldn't get to
2349 content parsing if it's empty. */
2350 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2351 if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH;
2353 /* update position stored for start tag, we won't be using it */
2354 element->position = position;
2356 reader->nodetype = XmlNodeType_EndElement;
2357 reader->is_empty_element = FALSE;
2358 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2360 return S_OK;
2363 /* [18] CDSect ::= CDStart CData CDEnd
2364 [19] CDStart ::= '<![CDATA['
2365 [20] CData ::= (Char* - (Char* ']]>' Char*))
2366 [21] CDEnd ::= ']]>' */
2367 static HRESULT reader_parse_cdata(xmlreader *reader)
2369 WCHAR *ptr;
2370 UINT start;
2372 if (reader->resumestate == XmlReadResumeState_CDATA)
2374 start = reader->resume[XmlReadResume_Body];
2375 ptr = reader_get_ptr(reader);
2377 else
2379 /* skip markup '<![CDATA[' */
2380 reader_skipn(reader, 9);
2381 reader_shrink(reader);
2382 ptr = reader_get_ptr(reader);
2383 start = reader_get_cur(reader);
2384 reader->nodetype = XmlNodeType_CDATA;
2385 reader->resume[XmlReadResume_Body] = start;
2386 reader->resumestate = XmlReadResumeState_CDATA;
2387 reader_set_strvalue(reader, StringValue_Value, NULL);
2390 while (*ptr)
2392 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2394 strval value;
2396 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2398 /* skip ']]>' */
2399 reader_skipn(reader, 3);
2400 TRACE("%s\n", debug_strval(reader, &value));
2402 reader_set_strvalue(reader, StringValue_Value, &value);
2403 reader->resume[XmlReadResume_Body] = 0;
2404 reader->resumestate = XmlReadResumeState_Initial;
2405 return S_OK;
2407 else
2409 reader_skipn(reader, 1);
2410 ptr = reader_get_ptr(reader);
2414 return S_OK;
2417 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2418 static HRESULT reader_parse_chardata(xmlreader *reader)
2420 struct reader_position position;
2421 WCHAR *ptr;
2422 UINT start;
2424 if (reader->resumestate == XmlReadResumeState_CharData)
2426 start = reader->resume[XmlReadResume_Body];
2427 ptr = reader_get_ptr(reader);
2429 else
2431 reader_shrink(reader);
2432 ptr = reader_get_ptr(reader);
2433 start = reader_get_cur(reader);
2434 /* There's no text */
2435 if (!*ptr || *ptr == '<') return S_OK;
2436 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2437 reader->resume[XmlReadResume_Body] = start;
2438 reader->resumestate = XmlReadResumeState_CharData;
2439 reader_set_strvalue(reader, StringValue_Value, NULL);
2442 position = reader->position;
2443 while (*ptr)
2445 /* CDATA closing sequence ']]>' is not allowed */
2446 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2447 return WC_E_CDSECTEND;
2449 /* Found next markup part */
2450 if (ptr[0] == '<')
2452 strval value;
2454 reader->empty_element.position = position;
2455 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2456 reader_set_strvalue(reader, StringValue_Value, &value);
2457 reader->resume[XmlReadResume_Body] = 0;
2458 reader->resumestate = XmlReadResumeState_Initial;
2459 return S_OK;
2462 /* this covers a case when text has leading whitespace chars */
2463 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2465 if (!reader_cmp(reader, L"&"))
2466 reader_parse_reference(reader);
2467 else
2468 reader_skipn(reader, 1);
2470 ptr = reader_get_ptr(reader);
2473 return S_OK;
2476 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2477 static HRESULT reader_parse_content(xmlreader *reader)
2479 if (reader->resumestate != XmlReadResumeState_Initial)
2481 switch (reader->resumestate)
2483 case XmlReadResumeState_CDATA:
2484 return reader_parse_cdata(reader);
2485 case XmlReadResumeState_Comment:
2486 return reader_parse_comment(reader);
2487 case XmlReadResumeState_PIBody:
2488 case XmlReadResumeState_PITarget:
2489 return reader_parse_pi(reader);
2490 case XmlReadResumeState_CharData:
2491 return reader_parse_chardata(reader);
2492 default:
2493 ERR("unknown resume state %d\n", reader->resumestate);
2497 reader_shrink(reader);
2499 /* handle end tag here, it indicates end of content as well */
2500 if (!reader_cmp(reader, L"</"))
2501 return reader_parse_endtag(reader);
2503 if (!reader_cmp(reader, L"<!--"))
2504 return reader_parse_comment(reader);
2506 if (!reader_cmp(reader, L"<?"))
2507 return reader_parse_pi(reader);
2509 if (!reader_cmp(reader, L"<![CDATA["))
2510 return reader_parse_cdata(reader);
2512 if (!reader_cmp(reader, L"<"))
2513 return reader_parse_element(reader);
2515 /* what's left must be CharData */
2516 return reader_parse_chardata(reader);
2519 static HRESULT reader_parse_nextnode(xmlreader *reader)
2521 XmlNodeType nodetype = reader_get_nodetype(reader);
2522 HRESULT hr;
2524 if (!is_reader_pending(reader))
2526 reader->chunk_read_off = 0;
2527 reader_clear_attrs(reader);
2530 /* When moving from EndElement or empty element, pop its own namespace definitions */
2531 switch (nodetype)
2533 case XmlNodeType_Attribute:
2534 reader_dec_depth(reader);
2535 /* fallthrough */
2536 case XmlNodeType_Element:
2537 if (reader->is_empty_element)
2538 reader_pop_ns_nodes(reader, &reader->empty_element);
2539 else if (FAILED(hr = reader_inc_depth(reader)))
2540 return hr;
2541 break;
2542 case XmlNodeType_EndElement:
2543 reader_pop_element(reader);
2544 reader_dec_depth(reader);
2545 break;
2546 default:
2550 for (;;)
2552 switch (reader->instate)
2554 /* if it's a first call for a new input we need to detect stream encoding */
2555 case XmlReadInState_Initial:
2557 xml_encoding enc;
2559 hr = readerinput_growraw(reader->input);
2560 if (FAILED(hr)) return hr;
2562 reader->position.line_number = 1;
2563 reader->position.line_position = 1;
2565 /* try to detect encoding by BOM or data and set input code page */
2566 hr = readerinput_detectencoding(reader->input, &enc);
2567 TRACE("detected encoding %s, %#lx.\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2568 debugstr_w(xml_encoding_map[enc].name), hr);
2569 if (FAILED(hr)) return hr;
2571 /* always switch first time cause we have to put something in */
2572 readerinput_switchencoding(reader->input, enc);
2574 /* parse xml declaration */
2575 hr = reader_parse_xmldecl(reader);
2576 if (FAILED(hr)) return hr;
2578 readerinput_shrinkraw(reader->input, -1);
2579 reader->instate = XmlReadInState_Misc_DTD;
2580 if (hr == S_OK) return hr;
2582 break;
2583 case XmlReadInState_Misc_DTD:
2584 hr = reader_parse_misc(reader);
2585 if (FAILED(hr)) return hr;
2587 if (hr == S_FALSE)
2588 reader->instate = XmlReadInState_DTD;
2589 else
2590 return hr;
2591 break;
2592 case XmlReadInState_DTD:
2593 hr = reader_parse_dtd(reader);
2594 if (FAILED(hr)) return hr;
2596 if (hr == S_OK)
2598 reader->instate = XmlReadInState_DTD_Misc;
2599 return hr;
2601 else
2602 reader->instate = XmlReadInState_Element;
2603 break;
2604 case XmlReadInState_DTD_Misc:
2605 hr = reader_parse_misc(reader);
2606 if (FAILED(hr)) return hr;
2608 if (hr == S_FALSE)
2609 reader->instate = XmlReadInState_Element;
2610 else
2611 return hr;
2612 break;
2613 case XmlReadInState_Element:
2614 return reader_parse_element(reader);
2615 case XmlReadInState_Content:
2616 return reader_parse_content(reader);
2617 case XmlReadInState_MiscEnd:
2618 hr = reader_parse_misc(reader);
2619 if (hr != S_FALSE) return hr;
2621 if (*reader_get_ptr(reader))
2623 WARN("found garbage in the end of XML\n");
2624 return WC_E_SYNTAX;
2627 reader->instate = XmlReadInState_Eof;
2628 reader->state = XmlReadState_EndOfFile;
2629 reader->nodetype = XmlNodeType_None;
2630 return hr;
2631 case XmlReadInState_Eof:
2632 return S_FALSE;
2633 default:
2634 FIXME("internal state %d not handled\n", reader->instate);
2635 return E_NOTIMPL;
2639 return E_NOTIMPL;
2642 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2644 TRACE("%p, %s, %p.\n", iface, debugstr_guid(riid), ppvObject);
2646 if (IsEqualGUID(riid, &IID_IUnknown) ||
2647 IsEqualGUID(riid, &IID_IXmlReader))
2649 *ppvObject = iface;
2651 else
2653 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2654 *ppvObject = NULL;
2655 return E_NOINTERFACE;
2658 IXmlReader_AddRef(iface);
2660 return S_OK;
2663 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2665 xmlreader *reader = impl_from_IXmlReader(iface);
2666 ULONG ref = InterlockedIncrement(&reader->ref);
2667 TRACE("%p, refcount %ld.\n", iface, ref);
2668 return ref;
2671 static void reader_clear_ns(xmlreader *reader)
2673 struct ns *ns, *ns2;
2675 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2676 list_remove(&ns->entry);
2677 reader_free_strvalued(reader, &ns->prefix);
2678 reader_free_strvalued(reader, &ns->uri);
2679 reader_free(reader, ns);
2682 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2683 list_remove(&ns->entry);
2684 reader_free_strvalued(reader, &ns->uri);
2685 reader_free(reader, ns);
2689 static void reader_reset_parser(xmlreader *reader)
2691 reader->position.line_number = 0;
2692 reader->position.line_position = 0;
2694 reader_clear_elements(reader);
2695 reader_clear_attrs(reader);
2696 reader_clear_ns(reader);
2697 reader_free_strvalues(reader);
2699 reader->depth = 0;
2700 reader->nodetype = XmlNodeType_None;
2701 reader->resumestate = XmlReadResumeState_Initial;
2702 memset(reader->resume, 0, sizeof(reader->resume));
2703 reader->is_empty_element = FALSE;
2706 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2708 xmlreader *This = impl_from_IXmlReader(iface);
2709 LONG ref = InterlockedDecrement(&This->ref);
2711 TRACE("%p, refcount %ld.\n", iface, ref);
2713 if (ref == 0)
2715 IMalloc *imalloc = This->imalloc;
2716 reader_reset_parser(This);
2717 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2718 if (This->resolver) IXmlResolver_Release(This->resolver);
2719 if (This->mlang) IUnknown_Release(This->mlang);
2720 reader_free(This, This);
2721 if (imalloc) IMalloc_Release(imalloc);
2724 return ref;
2727 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2729 xmlreader *This = impl_from_IXmlReader(iface);
2730 IXmlReaderInput *readerinput;
2731 HRESULT hr;
2733 TRACE("%p, %p.\n", iface, input);
2735 if (This->input)
2737 readerinput_release_stream(This->input);
2738 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2739 This->input = NULL;
2742 reader_reset_parser(This);
2744 /* just reset current input */
2745 if (!input)
2747 This->state = XmlReadState_Initial;
2748 return S_OK;
2751 /* now try IXmlReaderInput, ISequentialStream, IStream */
2752 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2753 if (hr == S_OK)
2755 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2756 This->input = impl_from_IXmlReaderInput(readerinput);
2757 else
2759 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2760 readerinput, readerinput->lpVtbl);
2761 IUnknown_Release(readerinput);
2762 return E_FAIL;
2767 if (hr != S_OK || !readerinput)
2769 /* create IXmlReaderInput basing on supplied interface */
2770 hr = CreateXmlReaderInputWithEncodingName(input,
2771 This->imalloc, NULL, FALSE, NULL, &readerinput);
2772 if (hr != S_OK) return hr;
2773 This->input = impl_from_IXmlReaderInput(readerinput);
2776 /* set stream for supplied IXmlReaderInput */
2777 hr = readerinput_query_for_stream(This->input);
2778 if (hr == S_OK)
2780 This->state = XmlReadState_Initial;
2781 This->instate = XmlReadInState_Initial;
2783 return hr;
2786 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2788 xmlreader *This = impl_from_IXmlReader(iface);
2790 TRACE("%p, %s, %p.\n", iface, debugstr_reader_prop(property), value);
2792 if (!value) return E_INVALIDARG;
2794 switch (property)
2796 case XmlReaderProperty_MultiLanguage:
2797 *value = (LONG_PTR)This->mlang;
2798 if (This->mlang)
2799 IUnknown_AddRef(This->mlang);
2800 break;
2801 case XmlReaderProperty_XmlResolver:
2802 *value = (LONG_PTR)This->resolver;
2803 if (This->resolver)
2804 IXmlResolver_AddRef(This->resolver);
2805 break;
2806 case XmlReaderProperty_DtdProcessing:
2807 *value = This->dtdmode;
2808 break;
2809 case XmlReaderProperty_ReadState:
2810 *value = This->state;
2811 break;
2812 case XmlReaderProperty_MaxElementDepth:
2813 *value = This->max_depth;
2814 break;
2815 default:
2816 FIXME("Unimplemented property (%u)\n", property);
2817 return E_NOTIMPL;
2820 return S_OK;
2823 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2825 xmlreader *This = impl_from_IXmlReader(iface);
2827 TRACE("%p, %s, %Ix.\n", iface, debugstr_reader_prop(property), value);
2829 switch (property)
2831 case XmlReaderProperty_MultiLanguage:
2832 if (This->mlang)
2833 IUnknown_Release(This->mlang);
2834 This->mlang = (IUnknown*)value;
2835 if (This->mlang)
2836 IUnknown_AddRef(This->mlang);
2837 if (This->mlang)
2838 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2839 break;
2840 case XmlReaderProperty_XmlResolver:
2841 if (This->resolver)
2842 IXmlResolver_Release(This->resolver);
2843 This->resolver = (IXmlResolver*)value;
2844 if (This->resolver)
2845 IXmlResolver_AddRef(This->resolver);
2846 break;
2847 case XmlReaderProperty_DtdProcessing:
2848 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2849 This->dtdmode = value;
2850 break;
2851 case XmlReaderProperty_MaxElementDepth:
2852 This->max_depth = value;
2853 break;
2854 default:
2855 FIXME("Unimplemented property (%u)\n", property);
2856 return E_NOTIMPL;
2859 return S_OK;
2862 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2864 xmlreader *This = impl_from_IXmlReader(iface);
2865 XmlNodeType oldtype = This->nodetype;
2866 XmlNodeType type;
2867 HRESULT hr;
2869 TRACE("%p, %p.\n", iface, nodetype);
2871 if (!nodetype)
2872 nodetype = &type;
2874 switch (This->state)
2876 case XmlReadState_Closed:
2877 hr = S_FALSE;
2878 break;
2879 case XmlReadState_Error:
2880 hr = This->error;
2881 break;
2882 default:
2883 hr = reader_parse_nextnode(This);
2884 if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype)
2885 This->state = XmlReadState_Interactive;
2887 if (FAILED(hr))
2889 This->state = XmlReadState_Error;
2890 This->nodetype = XmlNodeType_None;
2891 This->depth = 0;
2892 This->error = hr;
2896 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2897 *nodetype = This->nodetype;
2899 return hr;
2902 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2904 xmlreader *This = impl_from_IXmlReader(iface);
2906 TRACE("%p, %p.\n", iface, node_type);
2908 if (!node_type)
2909 return E_INVALIDARG;
2911 *node_type = reader_get_nodetype(This);
2912 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2915 static void reader_set_current_attribute(xmlreader *reader, struct attribute *attr)
2917 if (!reader->attr)
2918 reader_inc_depth(reader);
2919 reader->attr = attr;
2920 reader->chunk_read_off = 0;
2921 reader_set_strvalue(reader, StringValue_Prefix, &attr->prefix);
2922 reader_set_strvalue(reader, StringValue_QualifiedName, &attr->qname);
2923 reader_set_strvalue(reader, StringValue_Value, &attr->value);
2926 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2928 if (!reader->attr_count)
2929 return S_FALSE;
2931 reader_set_current_attribute(reader, LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry));
2933 return S_OK;
2936 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2938 xmlreader *This = impl_from_IXmlReader(iface);
2940 TRACE("%p.\n", iface);
2942 return reader_move_to_first_attribute(This);
2945 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2947 xmlreader *This = impl_from_IXmlReader(iface);
2948 const struct list *next;
2950 TRACE("%p.\n", iface);
2952 if (!This->attr_count) return S_FALSE;
2954 if (!This->attr)
2955 return reader_move_to_first_attribute(This);
2957 next = list_next(&This->attrs, &This->attr->entry);
2958 if (next)
2959 reader_set_current_attribute(This, LIST_ENTRY(next, struct attribute, entry));
2961 return next ? S_OK : S_FALSE;
2964 static void reader_get_attribute_ns_uri(xmlreader *reader, struct attribute *attr, const WCHAR **uri, UINT *len)
2966 static const WCHAR xmlns_uriW[] = L"http://www.w3.org/2000/xmlns/";
2967 static const WCHAR xml_uriW[] = L"http://www.w3.org/XML/1998/namespace";
2969 /* Check for reserved prefixes first */
2970 if ((strval_eq(reader, &attr->prefix, &strval_empty) && strval_eq(reader, &attr->localname, &strval_xmlns)) ||
2971 strval_eq(reader, &attr->prefix, &strval_xmlns))
2973 *uri = xmlns_uriW;
2974 *len = ARRAY_SIZE(xmlns_uriW) - 1;
2976 else if (strval_eq(reader, &attr->prefix, &strval_xml))
2978 *uri = xml_uriW;
2979 *len = ARRAY_SIZE(xml_uriW) - 1;
2981 else
2983 *uri = NULL;
2984 *len = 0;
2987 if (!*uri)
2989 struct ns *ns;
2991 if ((ns = reader_lookup_ns(reader, &attr->prefix)))
2993 *uri = ns->uri.str;
2994 *len = ns->uri.len;
2996 else
2998 *uri = emptyW;
2999 *len = 0;
3004 static void reader_get_attribute_local_name(xmlreader *reader, struct attribute *attr, const WCHAR **name, UINT *len)
3006 if (attr->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3008 *name = xmlnsW;
3009 *len = 5;
3011 else if (attr->flags & ATTRIBUTE_NS_DEFINITION)
3013 const struct ns *ns = reader_lookup_ns(reader, &attr->localname);
3014 *name = ns->prefix.str;
3015 *len = ns->prefix.len;
3017 else
3019 *name = attr->localname.str;
3020 *len = attr->localname.len;
3024 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
3025 const WCHAR *local_name, const WCHAR *namespace_uri)
3027 xmlreader *This = impl_from_IXmlReader(iface);
3028 UINT target_name_len, target_uri_len;
3029 struct attribute *attr;
3031 TRACE("%p, %s, %s.\n", iface, debugstr_w(local_name), debugstr_w(namespace_uri));
3033 if (!local_name)
3034 return E_INVALIDARG;
3036 if (!This->attr_count)
3037 return S_FALSE;
3039 if (!namespace_uri)
3040 namespace_uri = emptyW;
3042 target_name_len = lstrlenW(local_name);
3043 target_uri_len = lstrlenW(namespace_uri);
3045 LIST_FOR_EACH_ENTRY(attr, &This->attrs, struct attribute, entry)
3047 UINT name_len, uri_len;
3048 const WCHAR *name, *uri;
3050 reader_get_attribute_local_name(This, attr, &name, &name_len);
3051 reader_get_attribute_ns_uri(This, attr, &uri, &uri_len);
3053 if (name_len == target_name_len && uri_len == target_uri_len &&
3054 !wcscmp(name, local_name) && !wcscmp(uri, namespace_uri))
3056 reader_set_current_attribute(This, attr);
3057 return S_OK;
3061 return S_FALSE;
3064 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
3066 xmlreader *This = impl_from_IXmlReader(iface);
3068 TRACE("%p.\n", iface);
3070 if (!This->attr_count) return S_FALSE;
3072 if (This->attr)
3073 reader_dec_depth(This);
3075 This->attr = NULL;
3077 /* FIXME: support other node types with 'attributes' like DTD */
3078 if (This->is_empty_element) {
3079 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
3080 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
3082 else {
3083 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3084 if (element) {
3085 reader_set_strvalue(This, StringValue_Prefix, &element->prefix);
3086 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
3089 This->chunk_read_off = 0;
3090 reader_set_strvalue(This, StringValue_Value, &strval_empty);
3092 return S_OK;
3095 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3097 xmlreader *This = impl_from_IXmlReader(iface);
3098 struct attribute *attribute = This->attr;
3099 struct element *element;
3100 UINT length;
3102 TRACE("%p, %p, %p.\n", iface, name, len);
3104 if (!len)
3105 len = &length;
3107 switch (reader_get_nodetype(This))
3109 case XmlNodeType_Text:
3110 case XmlNodeType_CDATA:
3111 case XmlNodeType_Comment:
3112 case XmlNodeType_Whitespace:
3113 *name = emptyW;
3114 *len = 0;
3115 break;
3116 case XmlNodeType_Element:
3117 case XmlNodeType_EndElement:
3118 element = reader_get_element(This);
3119 if (element->prefix.len)
3121 *name = element->qname.str;
3122 *len = element->qname.len;
3124 else
3126 *name = element->localname.str;
3127 *len = element->localname.len;
3129 break;
3130 case XmlNodeType_Attribute:
3131 if (attribute->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3133 *name = xmlnsW;
3134 *len = 5;
3135 } else if (attribute->prefix.len)
3137 *name = This->strvalues[StringValue_QualifiedName].str;
3138 *len = This->strvalues[StringValue_QualifiedName].len;
3140 else
3142 *name = attribute->localname.str;
3143 *len = attribute->localname.len;
3145 break;
3146 default:
3147 *name = This->strvalues[StringValue_QualifiedName].str;
3148 *len = This->strvalues[StringValue_QualifiedName].len;
3149 break;
3152 return S_OK;
3155 static struct ns *reader_lookup_nsdef(xmlreader *reader)
3157 if (list_empty(&reader->nsdef))
3158 return NULL;
3160 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
3163 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
3165 xmlreader *This = impl_from_IXmlReader(iface);
3166 const strval *prefix = &This->strvalues[StringValue_Prefix];
3167 XmlNodeType nodetype;
3168 struct ns *ns;
3169 UINT length;
3171 TRACE("%p, %p, %p.\n", iface, uri, len);
3173 if (!len)
3174 len = &length;
3176 switch ((nodetype = reader_get_nodetype(This)))
3178 case XmlNodeType_Attribute:
3179 reader_get_attribute_ns_uri(This, This->attr, uri, len);
3180 break;
3181 case XmlNodeType_Element:
3182 case XmlNodeType_EndElement:
3184 ns = reader_lookup_ns(This, prefix);
3186 /* pick top default ns if any */
3187 if (!ns)
3188 ns = reader_lookup_nsdef(This);
3190 if (ns) {
3191 *uri = ns->uri.str;
3192 *len = ns->uri.len;
3194 else {
3195 *uri = emptyW;
3196 *len = 0;
3199 break;
3200 case XmlNodeType_Text:
3201 case XmlNodeType_CDATA:
3202 case XmlNodeType_ProcessingInstruction:
3203 case XmlNodeType_Comment:
3204 case XmlNodeType_Whitespace:
3205 case XmlNodeType_XmlDeclaration:
3206 *uri = emptyW;
3207 *len = 0;
3208 break;
3209 default:
3210 FIXME("Unhandled node type %d\n", nodetype);
3211 *uri = NULL;
3212 *len = 0;
3213 return E_NOTIMPL;
3216 return S_OK;
3219 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3221 xmlreader *This = impl_from_IXmlReader(iface);
3222 struct element *element;
3223 UINT length;
3225 TRACE("%p, %p, %p.\n", iface, name, len);
3227 if (!len)
3228 len = &length;
3230 switch (reader_get_nodetype(This))
3232 case XmlNodeType_Text:
3233 case XmlNodeType_CDATA:
3234 case XmlNodeType_Comment:
3235 case XmlNodeType_Whitespace:
3236 *name = emptyW;
3237 *len = 0;
3238 break;
3239 case XmlNodeType_Element:
3240 case XmlNodeType_EndElement:
3241 element = reader_get_element(This);
3242 *name = element->localname.str;
3243 *len = element->localname.len;
3244 break;
3245 case XmlNodeType_Attribute:
3246 reader_get_attribute_local_name(This, This->attr, name, len);
3247 break;
3248 default:
3249 *name = This->strvalues[StringValue_LocalName].str;
3250 *len = This->strvalues[StringValue_LocalName].len;
3251 break;
3254 return S_OK;
3257 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, const WCHAR **ret, UINT *len)
3259 xmlreader *This = impl_from_IXmlReader(iface);
3260 XmlNodeType nodetype;
3261 UINT length;
3263 TRACE("%p, %p, %p.\n", iface, ret, len);
3265 if (!len)
3266 len = &length;
3268 *ret = emptyW;
3269 *len = 0;
3271 switch ((nodetype = reader_get_nodetype(This)))
3273 case XmlNodeType_Element:
3274 case XmlNodeType_EndElement:
3275 case XmlNodeType_Attribute:
3277 const strval *prefix = &This->strvalues[StringValue_Prefix];
3278 struct ns *ns;
3280 if (strval_eq(This, prefix, &strval_xml))
3282 *ret = xmlW;
3283 *len = 3;
3285 else if (strval_eq(This, prefix, &strval_xmlns))
3287 *ret = xmlnsW;
3288 *len = 5;
3290 else if ((ns = reader_lookup_ns(This, prefix)))
3292 *ret = ns->prefix.str;
3293 *len = ns->prefix.len;
3296 break;
3298 default:
3302 return S_OK;
3305 static const strval *reader_get_value(xmlreader *reader, BOOL ensure_allocated)
3307 strval *val;
3309 switch (reader_get_nodetype(reader))
3311 case XmlNodeType_XmlDeclaration:
3312 case XmlNodeType_EndElement:
3313 case XmlNodeType_None:
3314 return &strval_empty;
3315 case XmlNodeType_Attribute:
3316 /* For namespace definition attributes return values from namespace list */
3317 if (reader->attr->flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
3319 struct ns *ns;
3321 if (!(ns = reader_lookup_ns(reader, &reader->attr->localname)))
3322 ns = reader_lookup_nsdef(reader);
3324 return &ns->uri;
3326 return &reader->attr->value;
3327 default:
3328 break;
3331 val = &reader->strvalues[StringValue_Value];
3332 if (!val->str && ensure_allocated)
3334 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3335 if (!ptr) return NULL;
3336 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3337 ptr[val->len] = 0;
3338 val->str = ptr;
3341 return val;
3344 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3346 xmlreader *reader = impl_from_IXmlReader(iface);
3347 const strval *val = &reader->strvalues[StringValue_Value];
3348 UINT off;
3350 TRACE("%p, %p, %p.\n", iface, value, len);
3352 *value = NULL;
3354 if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader))
3356 XmlNodeType type;
3357 HRESULT hr;
3359 hr = IXmlReader_Read(iface, &type);
3360 if (FAILED(hr)) return hr;
3362 /* return if still pending, partially read values are not reported */
3363 if (is_reader_pending(reader)) return E_PENDING;
3366 val = reader_get_value(reader, TRUE);
3367 if (!val)
3368 return E_OUTOFMEMORY;
3370 off = abs(reader->chunk_read_off);
3371 assert(off <= val->len);
3372 *value = val->str + off;
3373 if (len) *len = val->len - off;
3374 reader->chunk_read_off = -off;
3375 return S_OK;
3378 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3380 xmlreader *reader = impl_from_IXmlReader(iface);
3381 const strval *val;
3382 UINT len = 0;
3384 TRACE("%p, %p, %u, %p.\n", iface, buffer, chunk_size, read);
3386 val = reader_get_value(reader, FALSE);
3388 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3389 if (reader->chunk_read_off >= 0)
3391 assert(reader->chunk_read_off <= val->len);
3392 len = min(val->len - reader->chunk_read_off, chunk_size);
3394 if (read) *read = len;
3396 if (len)
3398 memcpy(buffer, reader_get_strptr(reader, val) + reader->chunk_read_off, len*sizeof(WCHAR));
3399 reader->chunk_read_off += len;
3402 return len || !chunk_size ? S_OK : S_FALSE;
3405 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3406 LPCWSTR *baseUri,
3407 UINT *baseUri_length)
3409 FIXME("%p, %p, %p: stub\n", iface, baseUri, baseUri_length);
3410 return E_NOTIMPL;
3413 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3415 FIXME("%p: stub\n", iface);
3416 return FALSE;
3419 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3421 xmlreader *This = impl_from_IXmlReader(iface);
3423 TRACE("%p.\n", iface);
3425 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3426 when current node is start tag of an element */
3427 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3430 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *line_number)
3432 xmlreader *This = impl_from_IXmlReader(iface);
3433 const struct element *element;
3435 TRACE("%p, %p.\n", iface, line_number);
3437 if (!line_number)
3438 return E_INVALIDARG;
3440 switch (reader_get_nodetype(This))
3442 case XmlNodeType_Element:
3443 case XmlNodeType_EndElement:
3444 element = reader_get_element(This);
3445 *line_number = element->position.line_number;
3446 break;
3447 case XmlNodeType_Attribute:
3448 *line_number = This->attr->position.line_number;
3449 break;
3450 case XmlNodeType_Whitespace:
3451 case XmlNodeType_XmlDeclaration:
3452 *line_number = This->empty_element.position.line_number;
3453 break;
3454 default:
3455 *line_number = This->position.line_number;
3456 break;
3459 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3462 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *line_position)
3464 xmlreader *This = impl_from_IXmlReader(iface);
3465 const struct element *element;
3467 TRACE("%p, %p.\n", iface, line_position);
3469 if (!line_position)
3470 return E_INVALIDARG;
3472 switch (reader_get_nodetype(This))
3474 case XmlNodeType_Element:
3475 case XmlNodeType_EndElement:
3476 element = reader_get_element(This);
3477 *line_position = element->position.line_position;
3478 break;
3479 case XmlNodeType_Attribute:
3480 *line_position = This->attr->position.line_position;
3481 break;
3482 case XmlNodeType_Whitespace:
3483 case XmlNodeType_XmlDeclaration:
3484 *line_position = This->empty_element.position.line_position;
3485 break;
3486 default:
3487 *line_position = This->position.line_position;
3488 break;
3491 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3494 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3496 xmlreader *This = impl_from_IXmlReader(iface);
3498 TRACE("%p, %p.\n", iface, count);
3500 if (!count) return E_INVALIDARG;
3502 *count = This->attr_count;
3503 return S_OK;
3506 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3508 xmlreader *This = impl_from_IXmlReader(iface);
3510 TRACE("%p, %p.\n", iface, depth);
3512 *depth = This->depth;
3513 return S_OK;
3516 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3518 xmlreader *This = impl_from_IXmlReader(iface);
3520 TRACE("%p.\n", iface);
3522 return This->state == XmlReadState_EndOfFile;
3525 static const struct IXmlReaderVtbl xmlreader_vtbl =
3527 xmlreader_QueryInterface,
3528 xmlreader_AddRef,
3529 xmlreader_Release,
3530 xmlreader_SetInput,
3531 xmlreader_GetProperty,
3532 xmlreader_SetProperty,
3533 xmlreader_Read,
3534 xmlreader_GetNodeType,
3535 xmlreader_MoveToFirstAttribute,
3536 xmlreader_MoveToNextAttribute,
3537 xmlreader_MoveToAttributeByName,
3538 xmlreader_MoveToElement,
3539 xmlreader_GetQualifiedName,
3540 xmlreader_GetNamespaceUri,
3541 xmlreader_GetLocalName,
3542 xmlreader_GetPrefix,
3543 xmlreader_GetValue,
3544 xmlreader_ReadValueChunk,
3545 xmlreader_GetBaseUri,
3546 xmlreader_IsDefault,
3547 xmlreader_IsEmptyElement,
3548 xmlreader_GetLineNumber,
3549 xmlreader_GetLinePosition,
3550 xmlreader_GetAttributeCount,
3551 xmlreader_GetDepth,
3552 xmlreader_IsEOF
3555 /** IXmlReaderInput **/
3556 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3558 TRACE("%p, %s, %p.\n", iface, debugstr_guid(riid), ppvObject);
3560 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3561 IsEqualGUID(riid, &IID_IUnknown))
3563 *ppvObject = iface;
3565 else
3567 WARN("interface %s not implemented\n", debugstr_guid(riid));
3568 *ppvObject = NULL;
3569 return E_NOINTERFACE;
3572 IUnknown_AddRef(iface);
3574 return S_OK;
3577 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3579 xmlreaderinput *input = impl_from_IXmlReaderInput(iface);
3580 ULONG ref = InterlockedIncrement(&input->ref);
3581 TRACE("%p, refcount %ld.\n", iface, ref);
3582 return ref;
3585 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3587 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3588 LONG ref = InterlockedDecrement(&This->ref);
3590 TRACE("%p, refcount %ld.\n", iface, ref);
3592 if (ref == 0)
3594 IMalloc *imalloc = This->imalloc;
3595 if (This->input) IUnknown_Release(This->input);
3596 if (This->stream) ISequentialStream_Release(This->stream);
3597 if (This->buffer) free_input_buffer(This->buffer);
3598 readerinput_free(This, This->baseuri);
3599 readerinput_free(This, This);
3600 if (imalloc) IMalloc_Release(imalloc);
3603 return ref;
3606 static const struct IUnknownVtbl xmlreaderinputvtbl =
3608 xmlreaderinput_QueryInterface,
3609 xmlreaderinput_AddRef,
3610 xmlreaderinput_Release
3613 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3615 xmlreader *reader;
3616 HRESULT hr;
3617 int i;
3619 TRACE("%s, %p, %p.\n", wine_dbgstr_guid(riid), obj, imalloc);
3621 if (!(reader = m_alloc(imalloc, sizeof(*reader))))
3622 return E_OUTOFMEMORY;
3624 memset(reader, 0, sizeof(*reader));
3625 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3626 reader->ref = 1;
3627 reader->state = XmlReadState_Closed;
3628 reader->instate = XmlReadInState_Initial;
3629 reader->resumestate = XmlReadResumeState_Initial;
3630 reader->dtdmode = DtdProcessing_Prohibit;
3631 reader->imalloc = imalloc;
3632 if (imalloc) IMalloc_AddRef(imalloc);
3633 reader->nodetype = XmlNodeType_None;
3634 list_init(&reader->attrs);
3635 list_init(&reader->nsdef);
3636 list_init(&reader->ns);
3637 list_init(&reader->elements);
3638 reader->max_depth = 256;
3640 reader->chunk_read_off = 0;
3641 for (i = 0; i < StringValue_Last; i++)
3642 reader->strvalues[i] = strval_empty;
3644 hr = IXmlReader_QueryInterface(&reader->IXmlReader_iface, riid, obj);
3645 IXmlReader_Release(&reader->IXmlReader_iface);
3647 TRACE("returning iface %p, hr %#lx.\n", *obj, hr);
3649 return hr;
3652 static HRESULT create_reader_input(IUnknown *stream, IMalloc *imalloc, xml_encoding encoding,
3653 BOOL hint, const WCHAR *base_uri, IXmlReaderInput **ppInput)
3655 xmlreaderinput *readerinput;
3656 HRESULT hr;
3658 if (!stream || !ppInput) return E_INVALIDARG;
3660 if (!(readerinput = m_alloc(imalloc, sizeof(*readerinput))))
3661 return E_OUTOFMEMORY;
3662 memset(readerinput, 0, sizeof(*readerinput));
3664 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3665 readerinput->ref = 1;
3666 readerinput->imalloc = imalloc;
3667 if (imalloc) IMalloc_AddRef(imalloc);
3668 readerinput->encoding = encoding;
3669 readerinput->hint = hint;
3670 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3672 hr = alloc_input_buffer(readerinput);
3673 if (hr != S_OK)
3675 readerinput_free(readerinput, readerinput->baseuri);
3676 readerinput_free(readerinput, readerinput);
3677 if (imalloc) IMalloc_Release(imalloc);
3678 return hr;
3680 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3682 *ppInput = &readerinput->IXmlReaderInput_iface;
3684 TRACE("returning iface %p\n", *ppInput);
3686 return S_OK;
3689 /***********************************************************************
3690 * CreateXmlReaderInputWithEncodingName (xmllite.@)
3692 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream, IMalloc *imalloc,
3693 const WCHAR *encoding, BOOL hint, const WCHAR *base_uri, IXmlReaderInput **input)
3695 TRACE("%p, %p, %s, %d, %s, %p.\n", stream, imalloc, wine_dbgstr_w(encoding),
3696 hint, wine_dbgstr_w(base_uri), input);
3698 return create_reader_input(stream, imalloc, parse_encoding_name(encoding, -1), hint, base_uri, input);
3701 /***********************************************************************
3702 * CreateXmlReaderInputWithEncodingCodePage (xmllite.@)
3704 HRESULT WINAPI CreateXmlReaderInputWithEncodingCodePage(IUnknown *stream, IMalloc *imalloc,
3705 UINT codepage, BOOL hint, const WCHAR *base_uri, IXmlReaderInput **input)
3707 TRACE("%p, %p, %u, %d, %s, %p.\n", stream, imalloc, codepage, hint, wine_dbgstr_w(base_uri), input);
3709 return create_reader_input(stream, imalloc, get_encoding_from_codepage(codepage), hint, base_uri, input);