2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43 XmlReadInState_Initial
,
44 XmlReadInState_XmlDecl
,
45 XmlReadInState_Misc_DTD
,
47 XmlReadInState_DTD_Misc
,
48 XmlReadInState_Element
,
49 XmlReadInState_Content
,
50 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
52 } XmlReaderInternalState
;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
58 XmlReadResumeState_Initial
,
59 XmlReadResumeState_PITarget
,
60 XmlReadResumeState_PIBody
,
61 XmlReadResumeState_CDATA
,
62 XmlReadResumeState_Comment
,
63 XmlReadResumeState_STag
,
64 XmlReadResumeState_CharData
,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState
;
68 /* saved pointer index to resume from particular input position */
71 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local
, /* local for QName */
73 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
79 StringValue_LocalName
,
81 StringValue_QualifiedName
,
84 } XmlReaderStringValue
;
86 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW
[] = {'\"',0};
90 static const WCHAR quoteW
[] = {'\'',0};
91 static const WCHAR ltW
[] = {'<',0};
92 static const WCHAR gtW
[] = {'>',0};
93 static const WCHAR commentW
[] = {'<','!','-','-',0};
94 static const WCHAR piW
[] = {'<','?',0};
96 static BOOL
is_namestartchar(WCHAR ch
);
98 static const char *debugstr_nodetype(XmlNodeType nodetype
)
100 static const char * const type_names
[] =
109 "ProcessingInstruction",
122 if (nodetype
> _XmlNodeType_Last
)
123 return wine_dbg_sprintf("unknown type=%d", nodetype
);
125 return type_names
[nodetype
];
128 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
130 static const char * const prop_names
[] =
142 if (prop
> _XmlReaderProperty_Last
)
143 return wine_dbg_sprintf("unknown property=%d", prop
);
145 return prop_names
[prop
];
148 struct xml_encoding_data
155 static const struct xml_encoding_data xml_encoding_map
[] = {
156 { utf16W
, XmlEncoding_UTF16
, ~0 },
157 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
}
160 const WCHAR
*get_encoding_name(xml_encoding encoding
)
162 return xml_encoding_map
[encoding
].name
;
165 xml_encoding
get_encoding_from_codepage(UINT codepage
)
168 for (i
= 0; i
< sizeof(xml_encoding_map
)/sizeof(xml_encoding_map
[0]); i
++)
170 if (xml_encoding_map
[i
].cp
== codepage
) return xml_encoding_map
[i
].enc
;
172 return XmlEncoding_Unknown
;
179 unsigned int allocated
;
180 unsigned int written
;
183 typedef struct input_buffer input_buffer
;
187 IXmlReaderInput IXmlReaderInput_iface
;
189 /* reference passed on IXmlReaderInput creation, is kept when input is created */
192 xml_encoding encoding
;
195 /* stream reference set after SetInput() call from reader,
196 stored as sequential stream, cause currently
197 optimizations possible with IStream aren't implemented */
198 ISequentialStream
*stream
;
199 input_buffer
*buffer
;
200 unsigned int pending
: 1;
203 static const struct IUnknownVtbl xmlreaderinputvtbl
;
205 /* Structure to hold parsed string of specific length.
207 Reader stores node value as 'start' pointer, on request
208 a null-terminated version of it is allocated.
210 To init a strval variable use reader_init_strval(),
211 to set strval as a reader value use reader_set_strval().
215 WCHAR
*str
; /* allocated null-terminated string */
216 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
217 UINT start
; /* input position where value starts */
220 static WCHAR emptyW
[] = {0};
221 static WCHAR xmlW
[] = {'x','m','l',0};
222 static WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
223 static const strval strval_empty
= { emptyW
};
224 static const strval strval_xml
= { xmlW
, 3 };
225 static const strval strval_xmlns
= { xmlnsW
, 5 };
248 struct element
*element
;
253 IXmlReader IXmlReader_iface
;
255 xmlreaderinput
*input
;
258 XmlReaderInternalState instate
;
259 XmlReaderResumeState resumestate
;
260 XmlNodeType nodetype
;
261 DtdProcessing dtdmode
;
262 IXmlResolver
*resolver
;
264 UINT line
, pos
; /* reader position in XML stream */
265 struct list attrs
; /* attributes list for current node */
266 struct attribute
*attr
; /* current attribute */
270 struct list elements
;
271 strval strvalues
[StringValue_Last
];
274 BOOL is_empty_element
;
275 struct element empty_element
;
276 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
281 encoded_buffer utf16
;
282 encoded_buffer encoded
;
284 xmlreaderinput
*input
;
287 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
289 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
292 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
294 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
297 /* reader memory allocation functions */
298 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
300 return m_alloc(reader
->imalloc
, len
);
303 static inline void *reader_alloc_zero(xmlreader
*reader
, size_t len
)
305 void *ret
= reader_alloc(reader
, len
);
311 static inline void reader_free(xmlreader
*reader
, void *mem
)
313 m_free(reader
->imalloc
, mem
);
316 /* Just return pointer from offset, no attempt to read more. */
317 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
319 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
320 return (WCHAR
*)buffer
->data
+ offset
;
323 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
325 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
328 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
332 if (src
->str
!= strval_empty
.str
)
334 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
335 if (!dest
->str
) return E_OUTOFMEMORY
;
336 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
337 dest
->str
[dest
->len
] = 0;
344 /* reader input memory allocation functions */
345 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
347 return m_alloc(input
->imalloc
, len
);
350 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
352 return m_realloc(input
->imalloc
, mem
, len
);
355 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
357 m_free(input
->imalloc
, mem
);
360 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
367 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
368 ret
= readerinput_alloc(input
, size
);
369 if (ret
) memcpy(ret
, str
, size
);
375 static void reader_clear_attrs(xmlreader
*reader
)
377 struct attribute
*attr
, *attr2
;
378 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
380 reader_free(reader
, attr
);
382 list_init(&reader
->attrs
);
383 reader
->attr_count
= 0;
387 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
388 while we are on a node with attributes */
389 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*prefix
, strval
*localname
, strval
*value
)
391 struct attribute
*attr
;
393 attr
= reader_alloc(reader
, sizeof(*attr
));
394 if (!attr
) return E_OUTOFMEMORY
;
397 attr
->prefix
= *prefix
;
399 memset(&attr
->prefix
, 0, sizeof(attr
->prefix
));
400 attr
->localname
= *localname
;
401 attr
->value
= *value
;
402 list_add_tail(&reader
->attrs
, &attr
->entry
);
403 reader
->attr_count
++;
408 /* This one frees stored string value if needed */
409 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
411 if (v
->str
!= strval_empty
.str
)
413 reader_free(reader
, v
->str
);
418 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
425 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
427 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
430 /* used to initialize from constant string */
431 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
438 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
440 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
443 static void reader_free_strvalues(xmlreader
*reader
)
446 for (type
= 0; type
< StringValue_Last
; type
++)
447 reader_free_strvalue(reader
, type
);
450 /* This helper should only be used to test if strings are the same,
451 it doesn't try to sort. */
452 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
454 if (str1
->len
!= str2
->len
) return 0;
455 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
458 static void reader_clear_elements(xmlreader
*reader
)
460 struct element
*elem
, *elem2
;
461 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
463 reader_free_strvalued(reader
, &elem
->prefix
);
464 reader_free_strvalued(reader
, &elem
->localname
);
465 reader_free_strvalued(reader
, &elem
->qname
);
466 reader_free(reader
, elem
);
468 list_init(&reader
->elements
);
469 reader
->is_empty_element
= FALSE
;
472 static HRESULT
reader_inc_depth(xmlreader
*reader
)
474 if (++reader
->depth
> reader
->max_depth
) return SC_E_MAXELEMENTDEPTH
;
478 static void reader_dec_depth(xmlreader
*reader
)
480 if (reader
->depth
> 1) reader
->depth
--;
483 static HRESULT
reader_push_ns(xmlreader
*reader
, const strval
*prefix
, const strval
*uri
, BOOL def
)
488 ns
= reader_alloc(reader
, sizeof(*ns
));
489 if (!ns
) return E_OUTOFMEMORY
;
492 memset(&ns
->prefix
, 0, sizeof(ns
->prefix
));
494 hr
= reader_strvaldup(reader
, prefix
, &ns
->prefix
);
496 reader_free(reader
, ns
);
501 hr
= reader_strvaldup(reader
, uri
, &ns
->uri
);
503 reader_free_strvalued(reader
, &ns
->prefix
);
504 reader_free(reader
, ns
);
509 list_add_head(def
? &reader
->nsdef
: &reader
->ns
, &ns
->entry
);
513 static void reader_free_element(xmlreader
*reader
, struct element
*element
)
515 reader_free_strvalued(reader
, &element
->prefix
);
516 reader_free_strvalued(reader
, &element
->localname
);
517 reader_free_strvalued(reader
, &element
->qname
);
518 reader_free(reader
, element
);
521 static void reader_mark_ns_nodes(xmlreader
*reader
, struct element
*element
)
525 LIST_FOR_EACH_ENTRY(ns
, &reader
->ns
, struct ns
, entry
) {
528 ns
->element
= element
;
531 LIST_FOR_EACH_ENTRY(ns
, &reader
->nsdef
, struct ns
, entry
) {
534 ns
->element
= element
;
538 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*prefix
, strval
*localname
,
541 struct element
*element
;
544 if (!list_empty(&reader
->elements
))
546 hr
= reader_inc_depth(reader
);
551 element
= reader_alloc_zero(reader
, sizeof(*element
));
557 if ((hr
= reader_strvaldup(reader
, prefix
, &element
->prefix
)) != S_OK
||
558 (hr
= reader_strvaldup(reader
, localname
, &element
->localname
)) != S_OK
||
559 (hr
= reader_strvaldup(reader
, qname
, &element
->qname
)) != S_OK
)
561 reader_free_element(reader
, element
);
565 list_add_head(&reader
->elements
, &element
->entry
);
566 reader_mark_ns_nodes(reader
, element
);
567 reader
->is_empty_element
= FALSE
;
570 reader_dec_depth(reader
);
574 static void reader_pop_ns_nodes(xmlreader
*reader
, struct element
*element
)
578 LIST_FOR_EACH_ENTRY_SAFE_REV(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
579 if (ns
->element
!= element
)
582 list_remove(&ns
->entry
);
583 reader_free_strvalued(reader
, &ns
->prefix
);
584 reader_free_strvalued(reader
, &ns
->uri
);
585 reader_free(reader
, ns
);
588 if (!list_empty(&reader
->nsdef
)) {
589 ns
= LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
590 if (ns
->element
== element
) {
591 list_remove(&ns
->entry
);
592 reader_free_strvalued(reader
, &ns
->prefix
);
593 reader_free_strvalued(reader
, &ns
->uri
);
594 reader_free(reader
, ns
);
599 static void reader_pop_element(xmlreader
*reader
)
601 struct element
*element
;
603 if (list_empty(&reader
->elements
))
606 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
607 list_remove(&element
->entry
);
609 reader_pop_ns_nodes(reader
, element
);
610 reader_free_element(reader
, element
);
611 reader_dec_depth(reader
);
613 /* It was a root element, the rest is expected as Misc */
614 if (list_empty(&reader
->elements
))
615 reader
->instate
= XmlReadInState_MiscEnd
;
618 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
619 means node value is to be determined. */
620 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
622 strval
*v
= &reader
->strvalues
[type
];
624 reader_free_strvalue(reader
, type
);
633 if (value
->str
== strval_empty
.str
)
637 if (type
== StringValue_Value
)
639 /* defer allocation for value string */
641 v
->start
= value
->start
;
646 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
647 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
648 v
->str
[value
->len
] = 0;
654 static inline int is_reader_pending(xmlreader
*reader
)
656 return reader
->input
->pending
;
659 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
661 const int initial_len
= 0x2000;
662 buffer
->data
= readerinput_alloc(input
, initial_len
);
663 if (!buffer
->data
) return E_OUTOFMEMORY
;
665 memset(buffer
->data
, 0, 4);
667 buffer
->allocated
= initial_len
;
673 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
675 readerinput_free(input
, buffer
->data
);
678 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
680 if (encoding
== XmlEncoding_Unknown
)
682 FIXME("unsupported encoding %d\n", encoding
);
686 *cp
= xml_encoding_map
[encoding
].cp
;
691 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
695 if (!name
) return XmlEncoding_Unknown
;
698 max
= sizeof(xml_encoding_map
)/sizeof(struct xml_encoding_data
) - 1;
705 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
707 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
709 return xml_encoding_map
[n
].enc
;
717 return XmlEncoding_Unknown
;
720 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
722 input_buffer
*buffer
;
725 input
->buffer
= NULL
;
727 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
728 if (!buffer
) return E_OUTOFMEMORY
;
730 buffer
->input
= input
;
731 buffer
->code_page
= ~0; /* code page is unknown at this point */
732 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
734 readerinput_free(input
, buffer
);
738 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
740 free_encoded_buffer(input
, &buffer
->utf16
);
741 readerinput_free(input
, buffer
);
745 input
->buffer
= buffer
;
749 static void free_input_buffer(input_buffer
*buffer
)
751 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
752 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
753 readerinput_free(buffer
->input
, buffer
);
756 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
758 if (readerinput
->stream
) {
759 ISequentialStream_Release(readerinput
->stream
);
760 readerinput
->stream
= NULL
;
764 /* Queries already stored interface for IStream/ISequentialStream.
765 Interface supplied on creation will be overwritten */
766 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
770 readerinput_release_stream(readerinput
);
771 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
773 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
778 /* reads a chunk to raw buffer */
779 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
781 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
782 /* to make sure aligned length won't exceed allocated length */
783 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
787 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
788 variable width encodings like UTF-8 */
789 len
= (len
+ 3) & ~3;
790 /* try to use allocated space or grow */
791 if (buffer
->allocated
- buffer
->written
< len
)
793 buffer
->allocated
*= 2;
794 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
795 len
= buffer
->allocated
- buffer
->written
;
799 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
800 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
801 readerinput
->pending
= hr
== E_PENDING
;
802 if (FAILED(hr
)) return hr
;
803 buffer
->written
+= read
;
808 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
809 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
811 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
813 length
*= sizeof(WCHAR
);
814 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
815 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
817 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
818 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
819 buffer
->allocated
= grown_size
;
823 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
825 static const char startA
[] = {'<','?'};
826 static const char commentA
[] = {'<','!'};
827 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
828 unsigned char *ptr
= (unsigned char*)buffer
->data
;
830 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
831 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
832 /* test start byte */
835 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
836 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
837 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
838 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
842 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
844 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
845 static const char utf8bom
[] = {0xef,0xbb,0xbf};
846 static const char utf16lebom
[] = {0xff,0xfe};
849 *enc
= XmlEncoding_Unknown
;
851 if (buffer
->written
<= 3)
853 HRESULT hr
= readerinput_growraw(readerinput
);
854 if (FAILED(hr
)) return hr
;
855 if (buffer
->written
<= 3) return MX_E_INPUTEND
;
858 ptrW
= (WCHAR
*)buffer
->data
;
859 /* try start symbols if we have enough data to do that, input buffer should contain
860 first chunk already */
861 if (readerinput_is_utf8(readerinput
))
862 *enc
= XmlEncoding_UTF8
;
863 else if (*ptrW
== '<')
866 if (*ptrW
== '?' || *ptrW
== '!' || is_namestartchar(*ptrW
))
867 *enc
= XmlEncoding_UTF16
;
869 /* try with BOM now */
870 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
872 buffer
->cur
+= sizeof(utf8bom
);
873 *enc
= XmlEncoding_UTF8
;
875 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
877 buffer
->cur
+= sizeof(utf16lebom
);
878 *enc
= XmlEncoding_UTF16
;
884 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
886 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
887 int len
= buffer
->written
;
889 /* complete single byte char */
890 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
892 /* find start byte of multibyte char */
893 while (--len
&& !(buffer
->data
[len
] & 0xc0))
899 /* Returns byte length of complete char sequence for buffer code page,
900 it's relative to current buffer position which is currently used for BOM handling
902 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
904 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
907 if (readerinput
->buffer
->code_page
== CP_UTF8
)
908 len
= readerinput_get_utf8_convlen(readerinput
);
910 len
= buffer
->written
;
912 TRACE("%d\n", len
- buffer
->cur
);
913 return len
- buffer
->cur
;
916 /* It's possible that raw buffer has some leftovers from last conversion - some char
917 sequence that doesn't represent a full code point. Length argument should be calculated with
918 readerinput_get_convlen(), if it's -1 it will be calculated here. */
919 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
921 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
924 len
= readerinput_get_convlen(readerinput
);
926 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
927 /* everything below cur is lost too */
928 buffer
->written
-= len
+ buffer
->cur
;
929 /* after this point we don't need cur offset really,
930 it's used only to mark where actual data begins when first chunk is read */
934 /* note that raw buffer content is kept */
935 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
937 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
938 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
944 hr
= get_code_page(enc
, &cp
);
945 if (FAILED(hr
)) return;
947 readerinput
->buffer
->code_page
= cp
;
948 len
= readerinput_get_convlen(readerinput
);
950 TRACE("switching to cp %d\n", cp
);
952 /* just copy in this case */
953 if (enc
== XmlEncoding_UTF16
)
955 readerinput_grow(readerinput
, len
);
956 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
957 dest
->written
+= len
*sizeof(WCHAR
);
961 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
962 readerinput_grow(readerinput
, dest_len
);
963 ptr
= (WCHAR
*)dest
->data
;
964 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
966 dest
->written
+= dest_len
*sizeof(WCHAR
);
969 /* shrinks parsed data a buffer begins with */
970 static void reader_shrink(xmlreader
*reader
)
972 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
974 /* avoid to move too often using threshold shrink length */
975 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
977 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
978 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
980 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
984 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
985 It won't attempt to shrink but will grow destination buffer if needed */
986 static HRESULT
reader_more(xmlreader
*reader
)
988 xmlreaderinput
*readerinput
= reader
->input
;
989 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
990 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
991 UINT cp
= readerinput
->buffer
->code_page
;
996 /* get some raw data from stream first */
997 hr
= readerinput_growraw(readerinput
);
998 len
= readerinput_get_convlen(readerinput
);
1000 /* just copy for UTF-16 case */
1003 readerinput_grow(readerinput
, len
);
1004 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
1005 dest
->written
+= len
*sizeof(WCHAR
);
1009 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1010 readerinput_grow(readerinput
, dest_len
);
1011 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
1012 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1014 dest
->written
+= dest_len
*sizeof(WCHAR
);
1015 /* get rid of processed data */
1016 readerinput_shrinkraw(readerinput
, len
);
1021 static inline UINT
reader_get_cur(xmlreader
*reader
)
1023 return reader
->input
->buffer
->utf16
.cur
;
1026 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
1028 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1029 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
1030 if (!*ptr
) reader_more(reader
);
1031 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
1034 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
1037 const WCHAR
*ptr
= reader_get_ptr(reader
);
1042 reader_more(reader
);
1043 ptr
= reader_get_ptr(reader
);
1045 if (str
[i
] != ptr
[i
])
1046 return ptr
[i
] - str
[i
];
1052 /* moves cursor n WCHARs forward */
1053 static void reader_skipn(xmlreader
*reader
, int n
)
1055 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1056 const WCHAR
*ptr
= reader_get_ptr(reader
);
1058 while (*ptr
++ && n
--)
1065 static inline BOOL
is_wchar_space(WCHAR ch
)
1067 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
1070 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1071 static int reader_skipspaces(xmlreader
*reader
)
1073 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1074 const WCHAR
*ptr
= reader_get_ptr(reader
);
1075 UINT start
= reader_get_cur(reader
);
1077 while (is_wchar_space(*ptr
))
1081 else if (*ptr
== '\n')
1090 ptr
= reader_get_ptr(reader
);
1093 return reader_get_cur(reader
) - start
;
1096 /* [26] VersionNum ::= '1.' [0-9]+ */
1097 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
1099 static const WCHAR onedotW
[] = {'1','.',0};
1103 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
1105 start
= reader_get_cur(reader
);
1107 reader_skipn(reader
, 2);
1109 ptr2
= ptr
= reader_get_ptr(reader
);
1110 while (*ptr
>= '0' && *ptr
<= '9')
1112 reader_skipn(reader
, 1);
1113 ptr
= reader_get_ptr(reader
);
1116 if (ptr2
== ptr
) return WC_E_DIGIT
;
1117 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
1118 TRACE("version=%s\n", debug_strval(reader
, val
));
1122 /* [25] Eq ::= S? '=' S? */
1123 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1125 static const WCHAR eqW
[] = {'=',0};
1126 reader_skipspaces(reader
);
1127 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
1129 reader_skipn(reader
, 1);
1130 reader_skipspaces(reader
);
1134 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1135 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1137 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
1141 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1143 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
1144 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1145 /* skip 'version' */
1146 reader_skipn(reader
, 7);
1148 hr
= reader_parse_eq(reader
);
1149 if (FAILED(hr
)) return hr
;
1151 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1154 reader_skipn(reader
, 1);
1156 hr
= reader_parse_versionnum(reader
, &val
);
1157 if (FAILED(hr
)) return hr
;
1159 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1163 reader_skipn(reader
, 1);
1165 return reader_add_attr(reader
, NULL
, &name
, &val
);
1168 /* ([A-Za-z0-9._] | '-') */
1169 static inline BOOL
is_wchar_encname(WCHAR ch
)
1171 return ((ch
>= 'A' && ch
<= 'Z') ||
1172 (ch
>= 'a' && ch
<= 'z') ||
1173 (ch
>= '0' && ch
<= '9') ||
1174 (ch
== '.') || (ch
== '_') ||
1178 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1179 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1181 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1185 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1186 return WC_E_ENCNAME
;
1188 val
->start
= reader_get_cur(reader
);
1191 while (is_wchar_encname(*++ptr
))
1195 enc
= parse_encoding_name(start
, len
);
1196 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1200 if (enc
== XmlEncoding_Unknown
)
1201 return WC_E_ENCNAME
;
1203 /* skip encoding name */
1204 reader_skipn(reader
, len
);
1208 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1209 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1211 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1215 if (!reader_skipspaces(reader
)) return S_FALSE
;
1217 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1218 name
.str
= reader_get_ptr(reader
);
1219 name
.start
= reader_get_cur(reader
);
1221 /* skip 'encoding' */
1222 reader_skipn(reader
, 8);
1224 hr
= reader_parse_eq(reader
);
1225 if (FAILED(hr
)) return hr
;
1227 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1230 reader_skipn(reader
, 1);
1232 hr
= reader_parse_encname(reader
, &val
);
1233 if (FAILED(hr
)) return hr
;
1235 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1239 reader_skipn(reader
, 1);
1241 return reader_add_attr(reader
, NULL
, &name
, &val
);
1244 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1245 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1247 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1248 static const WCHAR yesW
[] = {'y','e','s',0};
1249 static const WCHAR noW
[] = {'n','o',0};
1254 if (!reader_skipspaces(reader
)) return S_FALSE
;
1256 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1257 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1258 /* skip 'standalone' */
1259 reader_skipn(reader
, 10);
1261 hr
= reader_parse_eq(reader
);
1262 if (FAILED(hr
)) return hr
;
1264 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1267 reader_skipn(reader
, 1);
1269 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1270 return WC_E_XMLDECL
;
1272 start
= reader_get_cur(reader
);
1273 /* skip 'yes'|'no' */
1274 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1275 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1276 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1278 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1281 reader_skipn(reader
, 1);
1283 return reader_add_attr(reader
, NULL
, &name
, &val
);
1286 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1287 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1289 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1290 static const WCHAR declcloseW
[] = {'?','>',0};
1293 /* check if we have "<?xml " */
1294 if (reader_cmp(reader
, xmldeclW
)) return S_FALSE
;
1296 reader_skipn(reader
, 5);
1297 hr
= reader_parse_versioninfo(reader
);
1301 hr
= reader_parse_encdecl(reader
);
1305 hr
= reader_parse_sddecl(reader
);
1309 reader_skipspaces(reader
);
1310 if (reader_cmp(reader
, declcloseW
)) return WC_E_XMLDECL
;
1311 reader_skipn(reader
, 2);
1313 reader_inc_depth(reader
);
1314 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1315 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1316 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1317 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1322 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1323 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1328 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1330 start
= reader
->resume
[XmlReadResume_Body
];
1331 ptr
= reader_get_ptr(reader
);
1336 reader_skipn(reader
, 4);
1337 reader_shrink(reader
);
1338 ptr
= reader_get_ptr(reader
);
1339 start
= reader_get_cur(reader
);
1340 reader
->nodetype
= XmlNodeType_Comment
;
1341 reader
->resume
[XmlReadResume_Body
] = start
;
1342 reader
->resumestate
= XmlReadResumeState_Comment
;
1343 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
1344 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
1345 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1348 /* will exit when there's no more data, it won't attempt to
1349 read more from stream */
1360 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1361 TRACE("%s\n", debug_strval(reader
, &value
));
1363 /* skip rest of markup '->' */
1364 reader_skipn(reader
, 3);
1366 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1367 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1368 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1369 reader
->resume
[XmlReadResume_Body
] = 0;
1370 reader
->resumestate
= XmlReadResumeState_Initial
;
1374 return WC_E_COMMENT
;
1378 reader_skipn(reader
, 1);
1385 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1386 static inline BOOL
is_char(WCHAR ch
)
1388 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1389 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1390 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1391 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1392 (ch
>= 0xe000 && ch
<= 0xfffd);
1395 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1396 static inline BOOL
is_pubchar(WCHAR ch
)
1398 return (ch
== ' ') ||
1399 (ch
>= 'a' && ch
<= 'z') ||
1400 (ch
>= 'A' && ch
<= 'Z') ||
1401 (ch
>= '0' && ch
<= '9') ||
1402 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1403 (ch
== '=') || (ch
== '?') ||
1404 (ch
== '@') || (ch
== '!') ||
1405 (ch
>= '#' && ch
<= '%') || /* #$% */
1406 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1409 static inline BOOL
is_namestartchar(WCHAR ch
)
1411 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1412 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1413 (ch
>= 0xc0 && ch
<= 0xd6) ||
1414 (ch
>= 0xd8 && ch
<= 0xf6) ||
1415 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1416 (ch
>= 0x370 && ch
<= 0x37d) ||
1417 (ch
>= 0x37f && ch
<= 0x1fff) ||
1418 (ch
>= 0x200c && ch
<= 0x200d) ||
1419 (ch
>= 0x2070 && ch
<= 0x218f) ||
1420 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1421 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1422 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1423 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1424 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1425 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1428 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1429 static inline BOOL
is_ncnamechar(WCHAR ch
)
1431 return (ch
>= 'A' && ch
<= 'Z') ||
1432 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1433 (ch
== '-') || (ch
== '.') ||
1434 (ch
>= '0' && ch
<= '9') ||
1436 (ch
>= 0xc0 && ch
<= 0xd6) ||
1437 (ch
>= 0xd8 && ch
<= 0xf6) ||
1438 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1439 (ch
>= 0x300 && ch
<= 0x36f) ||
1440 (ch
>= 0x370 && ch
<= 0x37d) ||
1441 (ch
>= 0x37f && ch
<= 0x1fff) ||
1442 (ch
>= 0x200c && ch
<= 0x200d) ||
1443 (ch
>= 0x203f && ch
<= 0x2040) ||
1444 (ch
>= 0x2070 && ch
<= 0x218f) ||
1445 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1446 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1447 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1448 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1449 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1450 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1453 static inline BOOL
is_namechar(WCHAR ch
)
1455 return (ch
== ':') || is_ncnamechar(ch
);
1458 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1460 /* When we're on attribute always return attribute type, container node type is kept.
1461 Note that container is not necessarily an element, and attribute doesn't mean it's
1462 an attribute in XML spec terms. */
1463 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1466 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1467 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1468 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1469 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1470 [5] Name ::= NameStartChar (NameChar)* */
1471 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1476 if (reader
->resume
[XmlReadResume_Name
])
1478 start
= reader
->resume
[XmlReadResume_Name
];
1479 ptr
= reader_get_ptr(reader
);
1483 ptr
= reader_get_ptr(reader
);
1484 start
= reader_get_cur(reader
);
1485 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1488 while (is_namechar(*ptr
))
1490 reader_skipn(reader
, 1);
1491 ptr
= reader_get_ptr(reader
);
1494 if (is_reader_pending(reader
))
1496 reader
->resume
[XmlReadResume_Name
] = start
;
1500 reader
->resume
[XmlReadResume_Name
] = 0;
1502 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1503 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1508 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1509 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1511 static const WCHAR xmlW
[] = {'x','m','l'};
1512 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1518 hr
= reader_parse_name(reader
, &name
);
1519 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1521 /* now that we got name check for illegal content */
1522 if (strval_eq(reader
, &name
, &xmlval
))
1523 return WC_E_LEADINGXML
;
1525 /* PITarget can't be a qualified name */
1526 ptr
= reader_get_strptr(reader
, &name
);
1527 for (i
= 0; i
< name
.len
; i
++)
1529 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1531 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1536 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1537 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1544 switch (reader
->resumestate
)
1546 case XmlReadResumeState_Initial
:
1548 reader_skipn(reader
, 2);
1549 reader_shrink(reader
);
1550 reader
->resumestate
= XmlReadResumeState_PITarget
;
1551 case XmlReadResumeState_PITarget
:
1552 hr
= reader_parse_pitarget(reader
, &target
);
1553 if (FAILED(hr
)) return hr
;
1554 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1555 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1556 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1557 reader
->resumestate
= XmlReadResumeState_PIBody
;
1558 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1563 start
= reader
->resume
[XmlReadResume_Body
];
1564 ptr
= reader_get_ptr(reader
);
1571 UINT cur
= reader_get_cur(reader
);
1574 /* strip all leading whitespace chars */
1577 ptr
= reader_get_ptr2(reader
, start
);
1578 if (!is_wchar_space(*ptr
)) break;
1582 reader_init_strvalue(start
, cur
-start
, &value
);
1585 reader_skipn(reader
, 2);
1586 TRACE("%s\n", debug_strval(reader
, &value
));
1587 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1588 reader
->resumestate
= XmlReadResumeState_Initial
;
1589 reader
->resume
[XmlReadResume_Body
] = 0;
1590 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1595 reader_skipn(reader
, 1);
1596 ptr
= reader_get_ptr(reader
);
1602 /* This one is used to parse significant whitespace nodes, like in Misc production */
1603 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1605 switch (reader
->resumestate
)
1607 case XmlReadResumeState_Initial
:
1608 reader_shrink(reader
);
1609 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1610 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1611 reader
->nodetype
= XmlNodeType_Whitespace
;
1612 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1613 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1614 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1616 case XmlReadResumeState_Whitespace
:
1621 reader_skipspaces(reader
);
1622 if (is_reader_pending(reader
)) return S_OK
;
1624 start
= reader
->resume
[XmlReadResume_Body
];
1625 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1626 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1627 TRACE("%s\n", debug_strval(reader
, &value
));
1628 reader
->resumestate
= XmlReadResumeState_Initial
;
1637 /* [27] Misc ::= Comment | PI | S */
1638 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1640 HRESULT hr
= S_FALSE
;
1642 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1644 hr
= reader_more(reader
);
1645 if (FAILED(hr
)) return hr
;
1647 /* finish current node */
1648 switch (reader
->resumestate
)
1650 case XmlReadResumeState_PITarget
:
1651 case XmlReadResumeState_PIBody
:
1652 return reader_parse_pi(reader
);
1653 case XmlReadResumeState_Comment
:
1654 return reader_parse_comment(reader
);
1655 case XmlReadResumeState_Whitespace
:
1656 return reader_parse_whitespace(reader
);
1658 ERR("unknown resume state %d\n", reader
->resumestate
);
1664 const WCHAR
*cur
= reader_get_ptr(reader
);
1666 if (is_wchar_space(*cur
))
1667 hr
= reader_parse_whitespace(reader
);
1668 else if (!reader_cmp(reader
, commentW
))
1669 hr
= reader_parse_comment(reader
);
1670 else if (!reader_cmp(reader
, piW
))
1671 hr
= reader_parse_pi(reader
);
1675 if (hr
!= S_FALSE
) return hr
;
1681 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1682 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1684 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1687 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1690 reader_skipn(reader
, 1);
1692 cur
= reader_get_ptr(reader
);
1693 start
= reader_get_cur(reader
);
1694 while (is_char(*cur
) && *cur
!= quote
)
1696 reader_skipn(reader
, 1);
1697 cur
= reader_get_ptr(reader
);
1699 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1700 if (*cur
== quote
) reader_skipn(reader
, 1);
1702 TRACE("%s\n", debug_strval(reader
, literal
));
1706 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1707 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1708 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1710 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1713 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1716 reader_skipn(reader
, 1);
1718 start
= reader_get_cur(reader
);
1719 cur
= reader_get_ptr(reader
);
1720 while (is_pubchar(*cur
) && *cur
!= quote
)
1722 reader_skipn(reader
, 1);
1723 cur
= reader_get_ptr(reader
);
1725 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1726 if (*cur
== quote
) reader_skipn(reader
, 1);
1728 TRACE("%s\n", debug_strval(reader
, literal
));
1732 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1733 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1735 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1736 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1741 if (!reader_cmp(reader
, publicW
)) {
1745 reader_skipn(reader
, 6);
1746 cnt
= reader_skipspaces(reader
);
1747 if (!cnt
) return WC_E_WHITESPACE
;
1749 hr
= reader_parse_pub_literal(reader
, &pub
);
1750 if (FAILED(hr
)) return hr
;
1752 reader_init_cstrvalue(publicW
, strlenW(publicW
), &name
);
1753 hr
= reader_add_attr(reader
, NULL
, &name
, &pub
);
1754 if (FAILED(hr
)) return hr
;
1756 cnt
= reader_skipspaces(reader
);
1757 if (!cnt
) return S_OK
;
1759 /* optional system id */
1760 hr
= reader_parse_sys_literal(reader
, &sys
);
1761 if (FAILED(hr
)) return S_OK
;
1763 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1764 hr
= reader_add_attr(reader
, NULL
, &name
, &sys
);
1765 if (FAILED(hr
)) return hr
;
1768 } else if (!reader_cmp(reader
, systemW
)) {
1770 reader_skipn(reader
, 6);
1771 cnt
= reader_skipspaces(reader
);
1772 if (!cnt
) return WC_E_WHITESPACE
;
1774 hr
= reader_parse_sys_literal(reader
, &sys
);
1775 if (FAILED(hr
)) return hr
;
1777 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1778 return reader_add_attr(reader
, NULL
, &name
, &sys
);
1784 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1785 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1787 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1792 /* check if we have "<!DOCTYPE" */
1793 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1794 reader_shrink(reader
);
1796 /* DTD processing is not allowed by default */
1797 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1799 reader_skipn(reader
, 9);
1800 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1803 hr
= reader_parse_name(reader
, &name
);
1804 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1806 reader_skipspaces(reader
);
1808 hr
= reader_parse_externalid(reader
);
1809 if (FAILED(hr
)) return hr
;
1811 reader_skipspaces(reader
);
1813 cur
= reader_get_ptr(reader
);
1816 FIXME("internal subset parsing not implemented\n");
1821 reader_skipn(reader
, 1);
1823 reader
->nodetype
= XmlNodeType_DocumentType
;
1824 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1825 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1830 /* [11 NS] LocalPart ::= NCName */
1831 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
)
1836 if (reader
->resume
[XmlReadResume_Local
])
1838 start
= reader
->resume
[XmlReadResume_Local
];
1839 ptr
= reader_get_ptr(reader
);
1843 ptr
= reader_get_ptr(reader
);
1844 start
= reader_get_cur(reader
);
1847 while (is_ncnamechar(*ptr
))
1849 reader_skipn(reader
, 1);
1850 ptr
= reader_get_ptr(reader
);
1853 if (is_reader_pending(reader
))
1855 reader
->resume
[XmlReadResume_Local
] = start
;
1859 reader
->resume
[XmlReadResume_Local
] = 0;
1861 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1866 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1867 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1868 [9 NS] UnprefixedName ::= LocalPart
1869 [10 NS] Prefix ::= NCName */
1870 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1876 if (reader
->resume
[XmlReadResume_Name
])
1878 start
= reader
->resume
[XmlReadResume_Name
];
1879 ptr
= reader_get_ptr(reader
);
1883 ptr
= reader_get_ptr(reader
);
1884 start
= reader_get_cur(reader
);
1885 reader
->resume
[XmlReadResume_Name
] = start
;
1886 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1889 if (reader
->resume
[XmlReadResume_Local
])
1891 hr
= reader_parse_local(reader
, local
);
1892 if (FAILED(hr
)) return hr
;
1894 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1895 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1900 /* skip prefix part */
1901 while (is_ncnamechar(*ptr
))
1903 reader_skipn(reader
, 1);
1904 ptr
= reader_get_ptr(reader
);
1907 if (is_reader_pending(reader
)) return E_PENDING
;
1909 /* got a qualified name */
1912 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
1915 reader_skipn(reader
, 1);
1916 hr
= reader_parse_local(reader
, local
);
1917 if (FAILED(hr
)) return hr
;
1921 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
1922 reader_init_strvalue(0, 0, prefix
);
1927 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
1929 TRACE("ncname %s\n", debug_strval(reader
, local
));
1931 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
1933 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
1936 reader
->resume
[XmlReadResume_Name
] = 0;
1937 reader
->resume
[XmlReadResume_Local
] = 0;
1942 /* Applies normalization rules to a single char, used for attribute values.
1944 Rules include 2 steps:
1946 1) replacing \r\n with a single \n;
1947 2) replacing all whitespace chars with ' '.
1950 static void reader_normalize_space(xmlreader
*reader
, WCHAR
*ptr
)
1952 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1954 if (!is_wchar_space(*ptr
)) return;
1956 if (*ptr
== '\r' && *(ptr
+1) == '\n')
1958 int len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - 2*sizeof(WCHAR
);
1959 memmove(ptr
+1, ptr
+2, len
);
1964 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
1966 static const WCHAR entltW
[] = {'l','t'};
1967 static const WCHAR entgtW
[] = {'g','t'};
1968 static const WCHAR entampW
[] = {'a','m','p'};
1969 static const WCHAR entaposW
[] = {'a','p','o','s'};
1970 static const WCHAR entquotW
[] = {'q','u','o','t'};
1971 static const strval lt
= { (WCHAR
*)entltW
, 2 };
1972 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
1973 static const strval amp
= { (WCHAR
*)entampW
, 3 };
1974 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
1975 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
1976 WCHAR
*str
= reader_get_strptr(reader
, name
);
1981 if (strval_eq(reader
, name
, <
)) return '<';
1984 if (strval_eq(reader
, name
, >
)) return '>';
1987 if (strval_eq(reader
, name
, &
))
1989 else if (strval_eq(reader
, name
, &apos
))
1993 if (strval_eq(reader
, name
, "
)) return '\"';
2002 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2003 [67] Reference ::= EntityRef | CharRef
2004 [68] EntityRef ::= '&' Name ';' */
2005 static HRESULT
reader_parse_reference(xmlreader
*reader
)
2007 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
2008 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
2009 UINT cur
= reader_get_cur(reader
);
2014 reader_skipn(reader
, 1);
2015 ptr
= reader_get_ptr(reader
);
2019 reader_skipn(reader
, 1);
2020 ptr
= reader_get_ptr(reader
);
2022 /* hex char or decimal */
2025 reader_skipn(reader
, 1);
2026 ptr
= reader_get_ptr(reader
);
2030 if ((*ptr
>= '0' && *ptr
<= '9'))
2031 ch
= ch
*16 + *ptr
- '0';
2032 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
2033 ch
= ch
*16 + *ptr
- 'a' + 10;
2034 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
2035 ch
= ch
*16 + *ptr
- 'A' + 10;
2037 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
2038 reader_skipn(reader
, 1);
2039 ptr
= reader_get_ptr(reader
);
2046 if ((*ptr
>= '0' && *ptr
<= '9'))
2048 ch
= ch
*10 + *ptr
- '0';
2049 reader_skipn(reader
, 1);
2050 ptr
= reader_get_ptr(reader
);
2053 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
2057 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
2060 if (is_wchar_space(ch
)) ch
= ' ';
2062 ptr
= reader_get_ptr(reader
);
2063 start
= reader_get_ptr2(reader
, cur
);
2064 len
= buffer
->written
- ((char *)ptr
- buffer
->data
);
2065 memmove(start
+ 1, ptr
+ 1, len
);
2067 buffer
->written
-= (reader_get_cur(reader
) - cur
) * sizeof(WCHAR
);
2068 buffer
->cur
= cur
+ 1;
2077 hr
= reader_parse_name(reader
, &name
);
2078 if (FAILED(hr
)) return hr
;
2080 ptr
= reader_get_ptr(reader
);
2081 if (*ptr
!= ';') return WC_E_SEMICOLON
;
2083 /* predefined entities resolve to a single character */
2084 ch
= get_predefined_entity(reader
, &name
);
2087 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
2088 memmove(start
+1, ptr
+1, len
);
2089 buffer
->cur
= cur
+ 1;
2095 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
2096 return WC_E_UNDECLAREDENTITY
;
2104 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2105 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
2110 ptr
= reader_get_ptr(reader
);
2112 /* skip opening quote */
2114 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
2115 reader_skipn(reader
, 1);
2117 ptr
= reader_get_ptr(reader
);
2118 start
= reader_get_cur(reader
);
2121 if (*ptr
== '<') return WC_E_LESSTHAN
;
2125 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
2126 /* skip closing quote */
2127 reader_skipn(reader
, 1);
2133 HRESULT hr
= reader_parse_reference(reader
);
2134 if (FAILED(hr
)) return hr
;
2138 reader_normalize_space(reader
, ptr
);
2139 reader_skipn(reader
, 1);
2141 ptr
= reader_get_ptr(reader
);
2147 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2148 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2149 [3 NS] DefaultAttName ::= 'xmlns'
2150 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2151 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2153 strval prefix
, local
, qname
, value
;
2154 BOOL ns
= FALSE
, nsdef
= FALSE
;
2157 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2158 if (FAILED(hr
)) return hr
;
2160 if (strval_eq(reader
, &prefix
, &strval_xmlns
))
2163 if (strval_eq(reader
, &qname
, &strval_xmlns
))
2166 hr
= reader_parse_eq(reader
);
2167 if (FAILED(hr
)) return hr
;
2169 hr
= reader_parse_attvalue(reader
, &value
);
2170 if (FAILED(hr
)) return hr
;
2173 reader_push_ns(reader
, nsdef
? &strval_xmlns
: &local
, &value
, nsdef
);
2175 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2176 return reader_add_attr(reader
, &prefix
, &local
, &value
);
2179 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2180 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2181 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
, int *empty
)
2185 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2186 if (FAILED(hr
)) return hr
;
2190 static const WCHAR endW
[] = {'/','>',0};
2192 reader_skipspaces(reader
);
2195 if ((*empty
= !reader_cmp(reader
, endW
)))
2198 reader_skipn(reader
, 2);
2199 reader
->is_empty_element
= TRUE
;
2200 reader
->empty_element
.prefix
= *prefix
;
2201 reader
->empty_element
.localname
= *local
;
2202 reader
->empty_element
.qname
= *qname
;
2203 reader_mark_ns_nodes(reader
, &reader
->empty_element
);
2207 /* got a start tag */
2208 if (!reader_cmp(reader
, gtW
))
2211 reader_skipn(reader
, 1);
2212 return reader_push_element(reader
, prefix
, local
, qname
);
2215 hr
= reader_parse_attribute(reader
);
2216 if (FAILED(hr
)) return hr
;
2222 /* [39] element ::= EmptyElemTag | STag content ETag */
2223 static HRESULT
reader_parse_element(xmlreader
*reader
)
2227 switch (reader
->resumestate
)
2229 case XmlReadResumeState_Initial
:
2230 /* check if we are really on element */
2231 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2234 reader_skipn(reader
, 1);
2236 reader_shrink(reader
);
2237 reader
->resumestate
= XmlReadResumeState_STag
;
2238 case XmlReadResumeState_STag
:
2240 strval qname
, prefix
, local
;
2243 /* this handles empty elements too */
2244 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
, &empty
);
2245 if (FAILED(hr
)) return hr
;
2247 /* FIXME: need to check for defined namespace to reject invalid prefix */
2249 /* if we got empty element and stack is empty go straight to Misc */
2250 if (empty
&& list_empty(&reader
->elements
))
2251 reader
->instate
= XmlReadInState_MiscEnd
;
2253 reader
->instate
= XmlReadInState_Content
;
2255 reader
->nodetype
= XmlNodeType_Element
;
2256 reader
->resumestate
= XmlReadResumeState_Initial
;
2257 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2258 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2259 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2269 /* [13 NS] ETag ::= '</' QName S? '>' */
2270 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2272 strval prefix
, local
, qname
;
2273 struct element
*elem
;
2277 reader_skipn(reader
, 2);
2279 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2280 if (FAILED(hr
)) return hr
;
2282 reader_skipspaces(reader
);
2284 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2287 reader_skipn(reader
, 1);
2289 /* Element stack should never be empty at this point, cause we shouldn't get to
2290 content parsing if it's empty. */
2291 elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2292 if (!strval_eq(reader
, &elem
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2294 reader
->nodetype
= XmlNodeType_EndElement
;
2295 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2296 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2297 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2302 /* [18] CDSect ::= CDStart CData CDEnd
2303 [19] CDStart ::= '<![CDATA['
2304 [20] CData ::= (Char* - (Char* ']]>' Char*))
2305 [21] CDEnd ::= ']]>' */
2306 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2311 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2313 start
= reader
->resume
[XmlReadResume_Body
];
2314 ptr
= reader_get_ptr(reader
);
2318 /* skip markup '<![CDATA[' */
2319 reader_skipn(reader
, 9);
2320 reader_shrink(reader
);
2321 ptr
= reader_get_ptr(reader
);
2322 start
= reader_get_cur(reader
);
2323 reader
->nodetype
= XmlNodeType_CDATA
;
2324 reader
->resume
[XmlReadResume_Body
] = start
;
2325 reader
->resumestate
= XmlReadResumeState_CDATA
;
2326 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
2327 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
2328 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2333 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2337 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2340 reader_skipn(reader
, 3);
2341 TRACE("%s\n", debug_strval(reader
, &value
));
2343 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2344 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2345 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2346 reader
->resume
[XmlReadResume_Body
] = 0;
2347 reader
->resumestate
= XmlReadResumeState_Initial
;
2352 /* Value normalization is not fully implemented, rules are:
2354 - single '\r' -> '\n';
2355 - sequence '\r\n' -> '\n', in this case value length changes;
2357 if (*ptr
== '\r') *ptr
= '\n';
2358 reader_skipn(reader
, 1);
2366 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2367 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2372 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2374 start
= reader
->resume
[XmlReadResume_Body
];
2375 ptr
= reader_get_ptr(reader
);
2379 reader_shrink(reader
);
2380 ptr
= reader_get_ptr(reader
);
2381 start
= reader_get_cur(reader
);
2382 /* There's no text */
2383 if (!*ptr
|| *ptr
== '<') return S_OK
;
2384 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2385 reader
->resume
[XmlReadResume_Body
] = start
;
2386 reader
->resumestate
= XmlReadResumeState_CharData
;
2387 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2388 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2389 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2394 static const WCHAR ampW
[] = {'&',0};
2396 /* CDATA closing sequence ']]>' is not allowed */
2397 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2398 return WC_E_CDSECTEND
;
2400 /* Found next markup part */
2405 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2406 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2407 reader
->resume
[XmlReadResume_Body
] = 0;
2408 reader
->resumestate
= XmlReadResumeState_Initial
;
2412 /* this covers a case when text has leading whitespace chars */
2413 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2415 if (!reader_cmp(reader
, ampW
))
2416 reader_parse_reference(reader
);
2418 reader_skipn(reader
, 1);
2420 ptr
= reader_get_ptr(reader
);
2426 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2427 static HRESULT
reader_parse_content(xmlreader
*reader
)
2429 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2430 static const WCHAR etagW
[] = {'<','/',0};
2432 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2434 switch (reader
->resumestate
)
2436 case XmlReadResumeState_CDATA
:
2437 return reader_parse_cdata(reader
);
2438 case XmlReadResumeState_Comment
:
2439 return reader_parse_comment(reader
);
2440 case XmlReadResumeState_PIBody
:
2441 case XmlReadResumeState_PITarget
:
2442 return reader_parse_pi(reader
);
2443 case XmlReadResumeState_CharData
:
2444 return reader_parse_chardata(reader
);
2446 ERR("unknown resume state %d\n", reader
->resumestate
);
2450 reader_shrink(reader
);
2452 /* handle end tag here, it indicates end of content as well */
2453 if (!reader_cmp(reader
, etagW
))
2454 return reader_parse_endtag(reader
);
2456 if (!reader_cmp(reader
, commentW
))
2457 return reader_parse_comment(reader
);
2459 if (!reader_cmp(reader
, piW
))
2460 return reader_parse_pi(reader
);
2462 if (!reader_cmp(reader
, cdstartW
))
2463 return reader_parse_cdata(reader
);
2465 if (!reader_cmp(reader
, ltW
))
2466 return reader_parse_element(reader
);
2468 /* what's left must be CharData */
2469 return reader_parse_chardata(reader
);
2472 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2474 XmlNodeType nodetype
= reader_get_nodetype(reader
);
2477 if (!is_reader_pending(reader
))
2478 reader_clear_attrs(reader
);
2480 /* When moving from EndElement or empty element, pop its own namespace definitions */
2481 if (nodetype
== XmlNodeType_Element
&& reader
->is_empty_element
)
2482 reader_pop_ns_nodes(reader
, &reader
->empty_element
);
2483 else if (nodetype
== XmlNodeType_EndElement
)
2484 reader_pop_element(reader
);
2488 switch (reader
->instate
)
2490 /* if it's a first call for a new input we need to detect stream encoding */
2491 case XmlReadInState_Initial
:
2495 hr
= readerinput_growraw(reader
->input
);
2496 if (FAILED(hr
)) return hr
;
2498 /* try to detect encoding by BOM or data and set input code page */
2499 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2500 TRACE("detected encoding %s, 0x%08x\n", enc
== XmlEncoding_Unknown
? "(unknown)" :
2501 debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2502 if (FAILED(hr
)) return hr
;
2504 /* always switch first time cause we have to put something in */
2505 readerinput_switchencoding(reader
->input
, enc
);
2507 /* parse xml declaration */
2508 hr
= reader_parse_xmldecl(reader
);
2509 if (FAILED(hr
)) return hr
;
2511 readerinput_shrinkraw(reader
->input
, -1);
2512 reader
->instate
= XmlReadInState_Misc_DTD
;
2513 if (hr
== S_OK
) return hr
;
2516 case XmlReadInState_Misc_DTD
:
2517 hr
= reader_parse_misc(reader
);
2518 if (FAILED(hr
)) return hr
;
2521 reader
->instate
= XmlReadInState_DTD
;
2525 case XmlReadInState_DTD
:
2526 hr
= reader_parse_dtd(reader
);
2527 if (FAILED(hr
)) return hr
;
2531 reader
->instate
= XmlReadInState_DTD_Misc
;
2535 reader
->instate
= XmlReadInState_Element
;
2537 case XmlReadInState_DTD_Misc
:
2538 hr
= reader_parse_misc(reader
);
2539 if (FAILED(hr
)) return hr
;
2542 reader
->instate
= XmlReadInState_Element
;
2546 case XmlReadInState_Element
:
2547 return reader_parse_element(reader
);
2548 case XmlReadInState_Content
:
2549 return reader_parse_content(reader
);
2550 case XmlReadInState_MiscEnd
:
2551 hr
= reader_parse_misc(reader
);
2552 if (FAILED(hr
)) return hr
;
2556 reader
->instate
= XmlReadInState_Eof
;
2557 reader
->nodetype
= XmlNodeType_None
;
2560 case XmlReadInState_Eof
:
2563 FIXME("internal state %d not handled\n", reader
->instate
);
2571 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2573 xmlreader
*This
= impl_from_IXmlReader(iface
);
2575 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2577 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2578 IsEqualGUID(riid
, &IID_IXmlReader
))
2584 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2586 return E_NOINTERFACE
;
2589 IXmlReader_AddRef(iface
);
2594 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2596 xmlreader
*This
= impl_from_IXmlReader(iface
);
2597 ULONG ref
= InterlockedIncrement(&This
->ref
);
2598 TRACE("(%p)->(%d)\n", This
, ref
);
2602 static void reader_clear_ns(xmlreader
*reader
)
2604 struct ns
*ns
, *ns2
;
2606 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
2607 reader_free_strvalued(reader
, &ns
->prefix
);
2608 reader_free_strvalued(reader
, &ns
->uri
);
2609 reader_free(reader
, ns
);
2612 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->nsdef
, struct ns
, entry
) {
2613 reader_free_strvalued(reader
, &ns
->uri
);
2614 reader_free(reader
, ns
);
2618 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2620 xmlreader
*This
= impl_from_IXmlReader(iface
);
2621 LONG ref
= InterlockedDecrement(&This
->ref
);
2623 TRACE("(%p)->(%d)\n", This
, ref
);
2627 IMalloc
*imalloc
= This
->imalloc
;
2628 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2629 if (This
->resolver
) IXmlResolver_Release(This
->resolver
);
2630 if (This
->mlang
) IUnknown_Release(This
->mlang
);
2631 reader_clear_attrs(This
);
2632 reader_clear_ns(This
);
2633 reader_clear_elements(This
);
2634 reader_free_strvalues(This
);
2635 reader_free(This
, This
);
2636 if (imalloc
) IMalloc_Release(imalloc
);
2642 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2644 xmlreader
*This
= impl_from_IXmlReader(iface
);
2645 IXmlReaderInput
*readerinput
;
2648 TRACE("(%p)->(%p)\n", This
, input
);
2652 readerinput_release_stream(This
->input
);
2653 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2657 This
->line
= This
->pos
= 0;
2658 reader_clear_elements(This
);
2660 This
->resumestate
= XmlReadResumeState_Initial
;
2661 memset(This
->resume
, 0, sizeof(This
->resume
));
2663 /* just reset current input */
2666 This
->state
= XmlReadState_Initial
;
2670 /* now try IXmlReaderInput, ISequentialStream, IStream */
2671 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2674 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2675 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2678 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2679 readerinput
, readerinput
->lpVtbl
);
2680 IUnknown_Release(readerinput
);
2686 if (hr
!= S_OK
|| !readerinput
)
2688 /* create IXmlReaderInput basing on supplied interface */
2689 hr
= CreateXmlReaderInputWithEncodingName(input
,
2690 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2691 if (hr
!= S_OK
) return hr
;
2692 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2695 /* set stream for supplied IXmlReaderInput */
2696 hr
= readerinput_query_for_stream(This
->input
);
2699 This
->state
= XmlReadState_Initial
;
2700 This
->instate
= XmlReadInState_Initial
;
2706 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2708 xmlreader
*This
= impl_from_IXmlReader(iface
);
2710 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2712 if (!value
) return E_INVALIDARG
;
2716 case XmlReaderProperty_MultiLanguage
:
2717 *value
= (LONG_PTR
)This
->mlang
;
2719 IUnknown_AddRef(This
->mlang
);
2721 case XmlReaderProperty_XmlResolver
:
2722 *value
= (LONG_PTR
)This
->resolver
;
2724 IXmlResolver_AddRef(This
->resolver
);
2726 case XmlReaderProperty_DtdProcessing
:
2727 *value
= This
->dtdmode
;
2729 case XmlReaderProperty_ReadState
:
2730 *value
= This
->state
;
2733 FIXME("Unimplemented property (%u)\n", property
);
2740 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2742 xmlreader
*This
= impl_from_IXmlReader(iface
);
2744 TRACE("(%p)->(%s 0x%lx)\n", This
, debugstr_reader_prop(property
), value
);
2748 case XmlReaderProperty_MultiLanguage
:
2750 IUnknown_Release(This
->mlang
);
2751 This
->mlang
= (IUnknown
*)value
;
2753 IUnknown_AddRef(This
->mlang
);
2755 FIXME("Ignoring MultiLanguage %p\n", This
->mlang
);
2757 case XmlReaderProperty_XmlResolver
:
2759 IXmlResolver_Release(This
->resolver
);
2760 This
->resolver
= (IXmlResolver
*)value
;
2762 IXmlResolver_AddRef(This
->resolver
);
2764 case XmlReaderProperty_DtdProcessing
:
2765 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2766 This
->dtdmode
= value
;
2768 case XmlReaderProperty_MaxElementDepth
:
2769 FIXME("Ignoring MaxElementDepth %ld\n", value
);
2772 FIXME("Unimplemented property (%u)\n", property
);
2779 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2781 xmlreader
*This
= impl_from_IXmlReader(iface
);
2782 XmlNodeType oldtype
= This
->nodetype
;
2785 TRACE("(%p)->(%p)\n", This
, nodetype
);
2787 if (This
->state
== XmlReadState_Closed
) return S_FALSE
;
2789 hr
= reader_parse_nextnode(This
);
2790 if (oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2791 This
->state
= XmlReadState_Interactive
;
2793 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2795 *nodetype
= This
->nodetype
;
2800 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2802 xmlreader
*This
= impl_from_IXmlReader(iface
);
2804 TRACE("(%p)->(%p)\n", This
, node_type
);
2807 return E_INVALIDARG
;
2809 *node_type
= reader_get_nodetype(This
);
2810 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2813 static HRESULT
reader_move_to_first_attribute(xmlreader
*reader
)
2815 if (!reader
->attr_count
)
2818 reader
->attr
= LIST_ENTRY(list_head(&reader
->attrs
), struct attribute
, entry
);
2819 reader_set_strvalue(reader
, StringValue_Prefix
, &reader
->attr
->prefix
);
2820 reader_set_strvalue(reader
, StringValue_LocalName
, &reader
->attr
->localname
);
2821 reader_set_strvalue(reader
, StringValue_Value
, &reader
->attr
->value
);
2826 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2828 xmlreader
*This
= impl_from_IXmlReader(iface
);
2830 TRACE("(%p)\n", This
);
2832 return reader_move_to_first_attribute(This
);
2835 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2837 xmlreader
*This
= impl_from_IXmlReader(iface
);
2838 const struct list
*next
;
2840 TRACE("(%p)\n", This
);
2842 if (!This
->attr_count
) return S_FALSE
;
2845 return reader_move_to_first_attribute(This
);
2847 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2850 This
->attr
= LIST_ENTRY(next
, struct attribute
, entry
);
2851 reader_set_strvalue(This
, StringValue_Prefix
, &This
->attr
->prefix
);
2852 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2853 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2856 return next
? S_OK
: S_FALSE
;
2859 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
2861 LPCWSTR namespaceUri
)
2863 FIXME("(%p %p %p): stub\n", iface
, local_name
, namespaceUri
);
2867 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
2869 xmlreader
*This
= impl_from_IXmlReader(iface
);
2871 TRACE("(%p)\n", This
);
2873 if (!This
->attr_count
) return S_FALSE
;
2876 /* FIXME: support other node types with 'attributes' like DTD */
2877 if (This
->is_empty_element
) {
2878 reader_set_strvalue(This
, StringValue_LocalName
, &This
->empty_element
.localname
);
2879 reader_set_strvalue(This
, StringValue_QualifiedName
, &This
->empty_element
.qname
);
2882 struct element
*element
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
2884 reader_set_strvalue(This
, StringValue_LocalName
, &element
->localname
);
2885 reader_set_strvalue(This
, StringValue_QualifiedName
, &element
->qname
);
2892 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2894 xmlreader
*This
= impl_from_IXmlReader(iface
);
2896 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2897 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
2898 if (len
) *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
2902 static struct ns
*reader_lookup_ns(xmlreader
*reader
, const strval
*prefix
)
2904 struct list
*nslist
= prefix
? &reader
->ns
: &reader
->nsdef
;
2907 LIST_FOR_EACH_ENTRY_REV(ns
, nslist
, struct ns
, entry
) {
2908 if (strval_eq(reader
, prefix
, &ns
->prefix
))
2915 static struct ns
*reader_lookup_nsdef(xmlreader
*reader
)
2917 if (list_empty(&reader
->nsdef
))
2920 return LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
2923 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
, const WCHAR
**uri
, UINT
*len
)
2925 xmlreader
*This
= impl_from_IXmlReader(iface
);
2926 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
2927 XmlNodeType nodetype
;
2931 TRACE("(%p %p %p)\n", iface
, uri
, len
);
2939 switch ((nodetype
= reader_get_nodetype(This
)))
2941 case XmlNodeType_Attribute
:
2943 static const WCHAR xmlns_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2944 '2','0','0','0','/','x','m','l','n','s','/',0};
2945 static const WCHAR xml_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2946 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
2947 const strval
*local
= &This
->strvalues
[StringValue_LocalName
];
2949 /* check for reserved prefixes first */
2950 if ((strval_eq(This
, prefix
, &strval_empty
) && strval_eq(This
, local
, &strval_xmlns
)) ||
2951 strval_eq(This
, prefix
, &strval_xmlns
))
2954 *len
= sizeof(xmlns_uriW
)/sizeof(xmlns_uriW
[0]) - 1;
2956 else if (strval_eq(This
, prefix
, &strval_xml
)) {
2958 *len
= sizeof(xml_uriW
)/sizeof(xml_uriW
[0]) - 1;
2962 ns
= reader_lookup_ns(This
, prefix
);
2974 case XmlNodeType_Element
:
2975 case XmlNodeType_EndElement
:
2977 ns
= reader_lookup_ns(This
, prefix
);
2979 /* pick top default ns if any */
2981 ns
= reader_lookup_nsdef(This
);
2994 FIXME("Unhandled node type %d\n", nodetype
);
3001 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3003 xmlreader
*This
= impl_from_IXmlReader(iface
);
3005 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3006 *name
= This
->strvalues
[StringValue_LocalName
].str
;
3007 if (len
) *len
= This
->strvalues
[StringValue_LocalName
].len
;
3011 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, LPCWSTR
*prefix
, UINT
*len
)
3013 xmlreader
*This
= impl_from_IXmlReader(iface
);
3015 TRACE("(%p)->(%p %p)\n", This
, prefix
, len
);
3016 *prefix
= This
->strvalues
[StringValue_Prefix
].str
;
3017 if (len
) *len
= This
->strvalues
[StringValue_Prefix
].len
;
3021 static BOOL
is_namespace_definition(xmlreader
*reader
)
3023 const strval
*local
= &reader
->strvalues
[StringValue_LocalName
];
3024 const strval
*prefix
= &reader
->strvalues
[StringValue_Prefix
];
3026 if (reader_get_nodetype(reader
) != XmlNodeType_Attribute
)
3029 return ((strval_eq(reader
, prefix
, &strval_empty
) && strval_eq(reader
, local
, &strval_xmlns
)) ||
3030 strval_eq(reader
, prefix
, &strval_xmlns
));
3033 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
3035 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3036 strval
*val
= &reader
->strvalues
[StringValue_Value
];
3038 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
3042 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
) || is_reader_pending(reader
))
3047 hr
= IXmlReader_Read(iface
, &type
);
3048 if (FAILED(hr
)) return hr
;
3050 /* return if still pending, partially read values are not reported */
3051 if (is_reader_pending(reader
)) return E_PENDING
;
3056 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
3057 if (!ptr
) return E_OUTOFMEMORY
;
3058 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
3063 /* For namespace definition attributes return values from namespace list */
3064 if (is_namespace_definition(reader
)) {
3065 const strval
*local
= &reader
->strvalues
[StringValue_LocalName
];
3068 ns
= reader_lookup_ns(reader
, local
);
3070 ns
= reader_lookup_nsdef(reader
);
3076 if (len
) *len
= val
->len
;
3080 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
3082 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3083 strval
*val
= &reader
->strvalues
[StringValue_Value
];
3086 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
3088 /* Value is already allocated, chunked reads are not possible. */
3089 if (val
->str
) return S_FALSE
;
3093 len
= min(chunk_size
, val
->len
);
3094 memcpy(buffer
, reader_get_ptr2(reader
, val
->start
), len
);
3097 if (read
) *read
= len
;
3103 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
3105 UINT
*baseUri_length
)
3107 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
3111 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
3113 FIXME("(%p): stub\n", iface
);
3117 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
3119 xmlreader
*This
= impl_from_IXmlReader(iface
);
3120 TRACE("(%p)\n", This
);
3121 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3122 when current node is start tag of an element */
3123 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->is_empty_element
: FALSE
;
3126 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*lineNumber
)
3128 xmlreader
*This
= impl_from_IXmlReader(iface
);
3130 TRACE("(%p %p)\n", This
, lineNumber
);
3132 if (!lineNumber
) return E_INVALIDARG
;
3134 *lineNumber
= This
->line
;
3139 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*linePosition
)
3141 xmlreader
*This
= impl_from_IXmlReader(iface
);
3143 TRACE("(%p %p)\n", This
, linePosition
);
3145 if (!linePosition
) return E_INVALIDARG
;
3147 *linePosition
= This
->pos
;
3152 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
3154 xmlreader
*This
= impl_from_IXmlReader(iface
);
3156 TRACE("(%p)->(%p)\n", This
, count
);
3158 if (!count
) return E_INVALIDARG
;
3160 *count
= This
->attr_count
;
3164 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
3166 xmlreader
*This
= impl_from_IXmlReader(iface
);
3167 TRACE("(%p)->(%p)\n", This
, depth
);
3168 *depth
= This
->depth
;
3172 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
3174 FIXME("(%p): stub\n", iface
);
3178 static const struct IXmlReaderVtbl xmlreader_vtbl
=
3180 xmlreader_QueryInterface
,
3184 xmlreader_GetProperty
,
3185 xmlreader_SetProperty
,
3187 xmlreader_GetNodeType
,
3188 xmlreader_MoveToFirstAttribute
,
3189 xmlreader_MoveToNextAttribute
,
3190 xmlreader_MoveToAttributeByName
,
3191 xmlreader_MoveToElement
,
3192 xmlreader_GetQualifiedName
,
3193 xmlreader_GetNamespaceUri
,
3194 xmlreader_GetLocalName
,
3195 xmlreader_GetPrefix
,
3197 xmlreader_ReadValueChunk
,
3198 xmlreader_GetBaseUri
,
3199 xmlreader_IsDefault
,
3200 xmlreader_IsEmptyElement
,
3201 xmlreader_GetLineNumber
,
3202 xmlreader_GetLinePosition
,
3203 xmlreader_GetAttributeCount
,
3208 /** IXmlReaderInput **/
3209 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
3211 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3213 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
3215 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
3216 IsEqualGUID(riid
, &IID_IUnknown
))
3222 WARN("interface %s not implemented\n", debugstr_guid(riid
));
3224 return E_NOINTERFACE
;
3227 IUnknown_AddRef(iface
);
3232 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
3234 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3235 ULONG ref
= InterlockedIncrement(&This
->ref
);
3236 TRACE("(%p)->(%d)\n", This
, ref
);
3240 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
3242 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3243 LONG ref
= InterlockedDecrement(&This
->ref
);
3245 TRACE("(%p)->(%d)\n", This
, ref
);
3249 IMalloc
*imalloc
= This
->imalloc
;
3250 if (This
->input
) IUnknown_Release(This
->input
);
3251 if (This
->stream
) ISequentialStream_Release(This
->stream
);
3252 if (This
->buffer
) free_input_buffer(This
->buffer
);
3253 readerinput_free(This
, This
->baseuri
);
3254 readerinput_free(This
, This
);
3255 if (imalloc
) IMalloc_Release(imalloc
);
3261 static const struct IUnknownVtbl xmlreaderinputvtbl
=
3263 xmlreaderinput_QueryInterface
,
3264 xmlreaderinput_AddRef
,
3265 xmlreaderinput_Release
3268 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
3273 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
3275 if (!IsEqualGUID(riid
, &IID_IXmlReader
))
3277 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid
));
3282 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
3284 reader
= heap_alloc(sizeof(*reader
));
3285 if(!reader
) return E_OUTOFMEMORY
;
3287 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
3289 reader
->input
= NULL
;
3290 reader
->state
= XmlReadState_Closed
;
3291 reader
->instate
= XmlReadInState_Initial
;
3292 reader
->resumestate
= XmlReadResumeState_Initial
;
3293 reader
->dtdmode
= DtdProcessing_Prohibit
;
3294 reader
->resolver
= NULL
;
3295 reader
->mlang
= NULL
;
3296 reader
->line
= reader
->pos
= 0;
3297 reader
->imalloc
= imalloc
;
3298 if (imalloc
) IMalloc_AddRef(imalloc
);
3299 reader
->nodetype
= XmlNodeType_None
;
3300 list_init(&reader
->attrs
);
3301 reader
->attr_count
= 0;
3302 reader
->attr
= NULL
;
3303 list_init(&reader
->nsdef
);
3304 list_init(&reader
->ns
);
3305 list_init(&reader
->elements
);
3307 reader
->max_depth
= 256;
3308 reader
->is_empty_element
= FALSE
;
3309 memset(reader
->resume
, 0, sizeof(reader
->resume
));
3311 for (i
= 0; i
< StringValue_Last
; i
++)
3312 reader
->strvalues
[i
] = strval_empty
;
3314 *obj
= &reader
->IXmlReader_iface
;
3316 TRACE("returning iface %p\n", *obj
);
3321 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
3326 IXmlReaderInput
**ppInput
)
3328 xmlreaderinput
*readerinput
;
3331 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3332 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3334 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3337 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3339 readerinput
= heap_alloc(sizeof(*readerinput
));
3340 if(!readerinput
) return E_OUTOFMEMORY
;
3342 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3343 readerinput
->ref
= 1;
3344 readerinput
->imalloc
= imalloc
;
3345 readerinput
->stream
= NULL
;
3346 if (imalloc
) IMalloc_AddRef(imalloc
);
3347 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3348 readerinput
->hint
= hint
;
3349 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3350 readerinput
->pending
= 0;
3352 hr
= alloc_input_buffer(readerinput
);
3355 readerinput_free(readerinput
, readerinput
->baseuri
);
3356 readerinput_free(readerinput
, readerinput
);
3357 if (imalloc
) IMalloc_Release(imalloc
);
3360 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3362 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3364 TRACE("returning iface %p\n", *ppInput
);