2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
31 #include "xmllite_private.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
35 #include "wine/unicode.h"
37 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
39 /* not defined in public headers */
40 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
44 XmlReadInState_Initial
,
45 XmlReadInState_XmlDecl
,
46 XmlReadInState_Misc_DTD
,
48 XmlReadInState_DTD_Misc
,
49 XmlReadInState_Element
,
50 XmlReadInState_Content
,
51 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
53 } XmlReaderInternalState
;
55 /* This state denotes where parsing was interrupted by input problem.
56 Reader resumes parsing using this information. */
59 XmlReadResumeState_Initial
,
60 XmlReadResumeState_PITarget
,
61 XmlReadResumeState_PIBody
,
62 XmlReadResumeState_CDATA
,
63 XmlReadResumeState_Comment
,
64 XmlReadResumeState_STag
,
65 XmlReadResumeState_CharData
,
66 XmlReadResumeState_Whitespace
67 } XmlReaderResumeState
;
69 /* saved pointer index to resume from particular input position */
72 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
73 XmlReadResume_Local
, /* local for QName */
74 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
80 StringValue_LocalName
,
82 StringValue_QualifiedName
,
85 } XmlReaderStringValue
;
87 static const WCHAR usasciiW
[] = {'U','S','-','A','S','C','I','I',0};
88 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
89 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
91 static const WCHAR dblquoteW
[] = {'\"',0};
92 static const WCHAR quoteW
[] = {'\'',0};
93 static const WCHAR ltW
[] = {'<',0};
94 static const WCHAR gtW
[] = {'>',0};
95 static const WCHAR commentW
[] = {'<','!','-','-',0};
96 static const WCHAR piW
[] = {'<','?',0};
98 static BOOL
is_namestartchar(WCHAR ch
);
100 static const char *debugstr_nodetype(XmlNodeType nodetype
)
102 static const char * const type_names
[] =
111 "ProcessingInstruction",
124 if (nodetype
> _XmlNodeType_Last
)
125 return wine_dbg_sprintf("unknown type=%d", nodetype
);
127 return type_names
[nodetype
];
130 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
132 static const char * const prop_names
[] =
144 if (prop
> _XmlReaderProperty_Last
)
145 return wine_dbg_sprintf("unknown property=%d", prop
);
147 return prop_names
[prop
];
150 struct xml_encoding_data
157 static const struct xml_encoding_data xml_encoding_map
[] = {
158 { usasciiW
, XmlEncoding_USASCII
, 20127 },
159 { utf16W
, XmlEncoding_UTF16
, 1200 },
160 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
},
163 const WCHAR
*get_encoding_name(xml_encoding encoding
)
165 return xml_encoding_map
[encoding
].name
;
168 xml_encoding
get_encoding_from_codepage(UINT codepage
)
171 for (i
= 0; i
< sizeof(xml_encoding_map
)/sizeof(xml_encoding_map
[0]); i
++)
173 if (xml_encoding_map
[i
].cp
== codepage
) return xml_encoding_map
[i
].enc
;
175 return XmlEncoding_Unknown
;
182 unsigned int allocated
;
183 unsigned int written
;
187 typedef struct input_buffer input_buffer
;
191 IXmlReaderInput IXmlReaderInput_iface
;
193 /* reference passed on IXmlReaderInput creation, is kept when input is created */
196 xml_encoding encoding
;
199 /* stream reference set after SetInput() call from reader,
200 stored as sequential stream, cause currently
201 optimizations possible with IStream aren't implemented */
202 ISequentialStream
*stream
;
203 input_buffer
*buffer
;
204 unsigned int pending
: 1;
207 static const struct IUnknownVtbl xmlreaderinputvtbl
;
209 /* Structure to hold parsed string of specific length.
211 Reader stores node value as 'start' pointer, on request
212 a null-terminated version of it is allocated.
214 To init a strval variable use reader_init_strval(),
215 to set strval as a reader value use reader_set_strval().
219 WCHAR
*str
; /* allocated null-terminated string */
220 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
221 UINT start
; /* input position where value starts */
224 static WCHAR emptyW
[] = {0};
225 static WCHAR xmlW
[] = {'x','m','l',0};
226 static WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
227 static const strval strval_empty
= { emptyW
};
228 static const strval strval_xml
= { xmlW
, 3 };
229 static const strval strval_xmlns
= { xmlnsW
, 5 };
231 struct reader_position
239 ATTRIBUTE_NS_DEFINITION
= 0x1,
240 ATTRIBUTE_DEFAULT_NS_DEFINITION
= 0x2,
250 struct reader_position position
;
260 struct reader_position position
;
268 struct element
*element
;
273 IXmlReader IXmlReader_iface
;
275 xmlreaderinput
*input
;
278 HRESULT error
; /* error set on XmlReadState_Error */
279 XmlReaderInternalState instate
;
280 XmlReaderResumeState resumestate
;
281 XmlNodeType nodetype
;
282 DtdProcessing dtdmode
;
283 IXmlResolver
*resolver
;
285 struct reader_position position
;
286 struct list attrs
; /* attributes list for current node */
287 struct attribute
*attr
; /* current attribute */
291 struct list elements
;
293 strval strvalues
[StringValue_Last
];
296 BOOL is_empty_element
;
297 struct element empty_element
; /* used for empty elements without end tag <a />,
298 and to keep <?xml reader position */
299 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
304 encoded_buffer utf16
;
305 encoded_buffer encoded
;
307 xmlreaderinput
*input
;
310 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
312 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
315 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
317 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
320 /* reader memory allocation functions */
321 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
323 return m_alloc(reader
->imalloc
, len
);
326 static inline void *reader_alloc_zero(xmlreader
*reader
, size_t len
)
328 void *ret
= reader_alloc(reader
, len
);
334 static inline void reader_free(xmlreader
*reader
, void *mem
)
336 m_free(reader
->imalloc
, mem
);
339 /* Just return pointer from offset, no attempt to read more. */
340 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
342 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
343 return (WCHAR
*)buffer
->data
+ offset
;
346 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
348 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
351 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
355 if (src
->str
!= strval_empty
.str
)
357 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
358 if (!dest
->str
) return E_OUTOFMEMORY
;
359 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
360 dest
->str
[dest
->len
] = 0;
367 /* reader input memory allocation functions */
368 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
370 return m_alloc(input
->imalloc
, len
);
373 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
375 return m_realloc(input
->imalloc
, mem
, len
);
378 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
380 m_free(input
->imalloc
, mem
);
383 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
390 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
391 ret
= readerinput_alloc(input
, size
);
392 if (ret
) memcpy(ret
, str
, size
);
398 /* This one frees stored string value if needed */
399 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
401 if (v
->str
!= strval_empty
.str
)
403 reader_free(reader
, v
->str
);
408 static void reader_clear_attrs(xmlreader
*reader
)
410 struct attribute
*attr
, *attr2
;
411 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
413 reader_free_strvalued(reader
, &attr
->localname
);
414 reader_free_strvalued(reader
, &attr
->value
);
415 reader_free(reader
, attr
);
417 list_init(&reader
->attrs
);
418 reader
->attr_count
= 0;
422 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
423 while we are on a node with attributes */
424 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*prefix
, strval
*localname
, strval
*qname
,
425 strval
*value
, const struct reader_position
*position
, unsigned int flags
)
427 struct attribute
*attr
;
430 attr
= reader_alloc(reader
, sizeof(*attr
));
431 if (!attr
) return E_OUTOFMEMORY
;
433 hr
= reader_strvaldup(reader
, localname
, &attr
->localname
);
436 hr
= reader_strvaldup(reader
, value
, &attr
->value
);
438 reader_free_strvalued(reader
, &attr
->value
);
442 reader_free(reader
, attr
);
447 attr
->prefix
= *prefix
;
449 memset(&attr
->prefix
, 0, sizeof(attr
->prefix
));
450 attr
->qname
= qname
? *qname
: *localname
;
451 attr
->position
= *position
;
453 list_add_tail(&reader
->attrs
, &attr
->entry
);
454 reader
->attr_count
++;
459 /* Returns current element, doesn't check if reader is actually positioned on it. */
460 static struct element
*reader_get_element(xmlreader
*reader
)
462 if (reader
->is_empty_element
)
463 return &reader
->empty_element
;
465 return LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
468 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
475 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
477 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
480 /* used to initialize from constant string */
481 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
488 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
490 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
493 static void reader_free_strvalues(xmlreader
*reader
)
496 for (type
= 0; type
< StringValue_Last
; type
++)
497 reader_free_strvalue(reader
, type
);
500 /* This helper should only be used to test if strings are the same,
501 it doesn't try to sort. */
502 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
504 if (str1
->len
!= str2
->len
) return 0;
505 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
508 static void reader_clear_elements(xmlreader
*reader
)
510 struct element
*elem
, *elem2
;
511 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
513 reader_free_strvalued(reader
, &elem
->prefix
);
514 reader_free_strvalued(reader
, &elem
->localname
);
515 reader_free_strvalued(reader
, &elem
->qname
);
516 reader_free(reader
, elem
);
518 list_init(&reader
->elements
);
519 reader_free_strvalued(reader
, &reader
->empty_element
.localname
);
520 reader_free_strvalued(reader
, &reader
->empty_element
.qname
);
521 reader
->is_empty_element
= FALSE
;
524 static struct ns
*reader_lookup_ns(xmlreader
*reader
, const strval
*prefix
)
526 struct list
*nslist
= prefix
? &reader
->ns
: &reader
->nsdef
;
529 LIST_FOR_EACH_ENTRY_REV(ns
, nslist
, struct ns
, entry
) {
530 if (strval_eq(reader
, prefix
, &ns
->prefix
))
537 static HRESULT
reader_inc_depth(xmlreader
*reader
)
539 return (++reader
->depth
>= reader
->max_depth
&& reader
->max_depth
) ? SC_E_MAXELEMENTDEPTH
: S_OK
;
542 static void reader_dec_depth(xmlreader
*reader
)
548 static HRESULT
reader_push_ns(xmlreader
*reader
, const strval
*prefix
, const strval
*uri
, BOOL def
)
553 ns
= reader_alloc(reader
, sizeof(*ns
));
554 if (!ns
) return E_OUTOFMEMORY
;
557 memset(&ns
->prefix
, 0, sizeof(ns
->prefix
));
559 hr
= reader_strvaldup(reader
, prefix
, &ns
->prefix
);
561 reader_free(reader
, ns
);
566 hr
= reader_strvaldup(reader
, uri
, &ns
->uri
);
568 reader_free_strvalued(reader
, &ns
->prefix
);
569 reader_free(reader
, ns
);
574 list_add_head(def
? &reader
->nsdef
: &reader
->ns
, &ns
->entry
);
578 static void reader_free_element(xmlreader
*reader
, struct element
*element
)
580 reader_free_strvalued(reader
, &element
->prefix
);
581 reader_free_strvalued(reader
, &element
->localname
);
582 reader_free_strvalued(reader
, &element
->qname
);
583 reader_free(reader
, element
);
586 static void reader_mark_ns_nodes(xmlreader
*reader
, struct element
*element
)
590 LIST_FOR_EACH_ENTRY(ns
, &reader
->ns
, struct ns
, entry
) {
593 ns
->element
= element
;
596 LIST_FOR_EACH_ENTRY(ns
, &reader
->nsdef
, struct ns
, entry
) {
599 ns
->element
= element
;
603 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*prefix
, strval
*localname
,
604 strval
*qname
, const struct reader_position
*position
)
606 struct element
*element
;
609 element
= reader_alloc_zero(reader
, sizeof(*element
));
611 return E_OUTOFMEMORY
;
613 if ((hr
= reader_strvaldup(reader
, prefix
, &element
->prefix
)) == S_OK
&&
614 (hr
= reader_strvaldup(reader
, localname
, &element
->localname
)) == S_OK
&&
615 (hr
= reader_strvaldup(reader
, qname
, &element
->qname
)) == S_OK
)
617 list_add_head(&reader
->elements
, &element
->entry
);
618 reader_mark_ns_nodes(reader
, element
);
619 reader
->is_empty_element
= FALSE
;
620 element
->position
= *position
;
623 reader_free_element(reader
, element
);
628 static void reader_pop_ns_nodes(xmlreader
*reader
, struct element
*element
)
632 LIST_FOR_EACH_ENTRY_SAFE_REV(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
633 if (ns
->element
!= element
)
636 list_remove(&ns
->entry
);
637 reader_free_strvalued(reader
, &ns
->prefix
);
638 reader_free_strvalued(reader
, &ns
->uri
);
639 reader_free(reader
, ns
);
642 if (!list_empty(&reader
->nsdef
)) {
643 ns
= LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
644 if (ns
->element
== element
) {
645 list_remove(&ns
->entry
);
646 reader_free_strvalued(reader
, &ns
->prefix
);
647 reader_free_strvalued(reader
, &ns
->uri
);
648 reader_free(reader
, ns
);
653 static void reader_pop_element(xmlreader
*reader
)
655 struct element
*element
;
657 if (list_empty(&reader
->elements
))
660 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
661 list_remove(&element
->entry
);
663 reader_pop_ns_nodes(reader
, element
);
664 reader_free_element(reader
, element
);
666 /* It was a root element, the rest is expected as Misc */
667 if (list_empty(&reader
->elements
))
668 reader
->instate
= XmlReadInState_MiscEnd
;
671 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
672 means node value is to be determined. */
673 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
675 strval
*v
= &reader
->strvalues
[type
];
677 reader_free_strvalue(reader
, type
);
686 if (value
->str
== strval_empty
.str
)
690 if (type
== StringValue_Value
)
692 /* defer allocation for value string */
694 v
->start
= value
->start
;
699 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
700 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
701 v
->str
[value
->len
] = 0;
707 static inline int is_reader_pending(xmlreader
*reader
)
709 return reader
->input
->pending
;
712 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
714 const int initial_len
= 0x2000;
715 buffer
->data
= readerinput_alloc(input
, initial_len
);
716 if (!buffer
->data
) return E_OUTOFMEMORY
;
718 memset(buffer
->data
, 0, 4);
720 buffer
->allocated
= initial_len
;
722 buffer
->prev_cr
= FALSE
;
727 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
729 readerinput_free(input
, buffer
->data
);
732 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
734 if (encoding
== XmlEncoding_Unknown
)
736 FIXME("unsupported encoding %d\n", encoding
);
740 *cp
= xml_encoding_map
[encoding
].cp
;
745 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
749 if (!name
) return XmlEncoding_Unknown
;
752 max
= sizeof(xml_encoding_map
)/sizeof(xml_encoding_map
[0]) - 1;
759 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
761 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
763 return xml_encoding_map
[n
].enc
;
771 return XmlEncoding_Unknown
;
774 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
776 input_buffer
*buffer
;
779 input
->buffer
= NULL
;
781 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
782 if (!buffer
) return E_OUTOFMEMORY
;
784 buffer
->input
= input
;
785 buffer
->code_page
= ~0; /* code page is unknown at this point */
786 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
788 readerinput_free(input
, buffer
);
792 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
794 free_encoded_buffer(input
, &buffer
->utf16
);
795 readerinput_free(input
, buffer
);
799 input
->buffer
= buffer
;
803 static void free_input_buffer(input_buffer
*buffer
)
805 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
806 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
807 readerinput_free(buffer
->input
, buffer
);
810 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
812 if (readerinput
->stream
) {
813 ISequentialStream_Release(readerinput
->stream
);
814 readerinput
->stream
= NULL
;
818 /* Queries already stored interface for IStream/ISequentialStream.
819 Interface supplied on creation will be overwritten */
820 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
824 readerinput_release_stream(readerinput
);
825 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
827 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
832 /* reads a chunk to raw buffer */
833 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
835 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
836 /* to make sure aligned length won't exceed allocated length */
837 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
841 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
842 variable width encodings like UTF-8 */
843 len
= (len
+ 3) & ~3;
844 /* try to use allocated space or grow */
845 if (buffer
->allocated
- buffer
->written
< len
)
847 buffer
->allocated
*= 2;
848 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
849 len
= buffer
->allocated
- buffer
->written
;
853 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
854 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
855 readerinput
->pending
= hr
== E_PENDING
;
856 if (FAILED(hr
)) return hr
;
857 buffer
->written
+= read
;
862 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
863 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
865 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
867 length
*= sizeof(WCHAR
);
868 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
869 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
871 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
872 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
873 buffer
->allocated
= grown_size
;
877 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
879 static const char startA
[] = {'<','?'};
880 static const char commentA
[] = {'<','!'};
881 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
882 unsigned char *ptr
= (unsigned char*)buffer
->data
;
884 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
885 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
886 /* test start byte */
889 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
890 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
891 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
892 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
896 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
898 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
899 static const char utf8bom
[] = {0xef,0xbb,0xbf};
900 static const char utf16lebom
[] = {0xff,0xfe};
903 *enc
= XmlEncoding_Unknown
;
905 if (buffer
->written
<= 3)
907 HRESULT hr
= readerinput_growraw(readerinput
);
908 if (FAILED(hr
)) return hr
;
909 if (buffer
->written
< 3) return MX_E_INPUTEND
;
912 ptrW
= (WCHAR
*)buffer
->data
;
913 /* try start symbols if we have enough data to do that, input buffer should contain
914 first chunk already */
915 if (readerinput_is_utf8(readerinput
))
916 *enc
= XmlEncoding_UTF8
;
917 else if (*ptrW
== '<')
920 if (*ptrW
== '?' || *ptrW
== '!' || is_namestartchar(*ptrW
))
921 *enc
= XmlEncoding_UTF16
;
923 /* try with BOM now */
924 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
926 buffer
->cur
+= sizeof(utf8bom
);
927 *enc
= XmlEncoding_UTF8
;
929 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
931 buffer
->cur
+= sizeof(utf16lebom
);
932 *enc
= XmlEncoding_UTF16
;
938 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
940 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
941 int len
= buffer
->written
;
943 /* complete single byte char */
944 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
946 /* find start byte of multibyte char */
947 while (--len
&& !(buffer
->data
[len
] & 0xc0))
953 /* Returns byte length of complete char sequence for buffer code page,
954 it's relative to current buffer position which is currently used for BOM handling
956 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
958 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
961 if (readerinput
->buffer
->code_page
== CP_UTF8
)
962 len
= readerinput_get_utf8_convlen(readerinput
);
964 len
= buffer
->written
;
966 TRACE("%d\n", len
- buffer
->cur
);
967 return len
- buffer
->cur
;
970 /* It's possible that raw buffer has some leftovers from last conversion - some char
971 sequence that doesn't represent a full code point. Length argument should be calculated with
972 readerinput_get_convlen(), if it's -1 it will be calculated here. */
973 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
975 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
978 len
= readerinput_get_convlen(readerinput
);
980 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
981 /* everything below cur is lost too */
982 buffer
->written
-= len
+ buffer
->cur
;
983 /* after this point we don't need cur offset really,
984 it's used only to mark where actual data begins when first chunk is read */
988 static void fixup_buffer_cr(encoded_buffer
*buffer
, int off
)
990 BOOL prev_cr
= buffer
->prev_cr
;
994 src
= dest
= (WCHAR
*)buffer
->data
+ off
;
995 while ((const char*)src
< buffer
->data
+ buffer
->written
)
1004 if(prev_cr
&& *src
== '\n')
1011 buffer
->written
= (char*)dest
- buffer
->data
;
1012 buffer
->prev_cr
= prev_cr
;
1016 /* note that raw buffer content is kept */
1017 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
1019 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1020 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1026 hr
= get_code_page(enc
, &cp
);
1027 if (FAILED(hr
)) return;
1029 readerinput
->buffer
->code_page
= cp
;
1030 len
= readerinput_get_convlen(readerinput
);
1032 TRACE("switching to cp %d\n", cp
);
1034 /* just copy in this case */
1035 if (enc
== XmlEncoding_UTF16
)
1037 readerinput_grow(readerinput
, len
);
1038 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
1039 dest
->written
+= len
*sizeof(WCHAR
);
1043 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1044 readerinput_grow(readerinput
, dest_len
);
1045 ptr
= (WCHAR
*)dest
->data
;
1046 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1048 dest
->written
+= dest_len
*sizeof(WCHAR
);
1051 fixup_buffer_cr(dest
, 0);
1054 /* shrinks parsed data a buffer begins with */
1055 static void reader_shrink(xmlreader
*reader
)
1057 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1059 /* avoid to move too often using threshold shrink length */
1060 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
1062 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
1063 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
1065 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
1069 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1070 It won't attempt to shrink but will grow destination buffer if needed */
1071 static HRESULT
reader_more(xmlreader
*reader
)
1073 xmlreaderinput
*readerinput
= reader
->input
;
1074 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1075 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1076 UINT cp
= readerinput
->buffer
->code_page
;
1077 int len
, dest_len
, prev_len
;
1081 /* get some raw data from stream first */
1082 hr
= readerinput_growraw(readerinput
);
1083 len
= readerinput_get_convlen(readerinput
);
1084 prev_len
= dest
->written
/ sizeof(WCHAR
);
1086 /* just copy for UTF-16 case */
1089 readerinput_grow(readerinput
, len
);
1090 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
1091 dest
->written
+= len
*sizeof(WCHAR
);
1095 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1096 readerinput_grow(readerinput
, dest_len
);
1097 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
1098 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1100 dest
->written
+= dest_len
*sizeof(WCHAR
);
1101 /* get rid of processed data */
1102 readerinput_shrinkraw(readerinput
, len
);
1105 fixup_buffer_cr(dest
, prev_len
);
1109 static inline UINT
reader_get_cur(xmlreader
*reader
)
1111 return reader
->input
->buffer
->utf16
.cur
;
1114 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
1116 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1117 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
1118 if (!*ptr
) reader_more(reader
);
1119 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
1122 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
1125 const WCHAR
*ptr
= reader_get_ptr(reader
);
1130 reader_more(reader
);
1131 ptr
= reader_get_ptr(reader
);
1133 if (str
[i
] != ptr
[i
])
1134 return ptr
[i
] - str
[i
];
1140 static void reader_update_position(xmlreader
*reader
, WCHAR ch
)
1143 reader
->position
.line_position
= 1;
1144 else if (ch
== '\n')
1146 reader
->position
.line_number
++;
1147 reader
->position
.line_position
= 1;
1150 reader
->position
.line_position
++;
1153 /* moves cursor n WCHARs forward */
1154 static void reader_skipn(xmlreader
*reader
, int n
)
1156 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1159 while (*(ptr
= reader_get_ptr(reader
)) && n
--)
1161 reader_update_position(reader
, *ptr
);
1166 static inline BOOL
is_wchar_space(WCHAR ch
)
1168 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
1171 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1172 static int reader_skipspaces(xmlreader
*reader
)
1174 const WCHAR
*ptr
= reader_get_ptr(reader
);
1175 UINT start
= reader_get_cur(reader
);
1177 while (is_wchar_space(*ptr
))
1179 reader_skipn(reader
, 1);
1180 ptr
= reader_get_ptr(reader
);
1183 return reader_get_cur(reader
) - start
;
1186 /* [26] VersionNum ::= '1.' [0-9]+ */
1187 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
1189 static const WCHAR onedotW
[] = {'1','.',0};
1193 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
1195 start
= reader_get_cur(reader
);
1197 reader_skipn(reader
, 2);
1199 ptr2
= ptr
= reader_get_ptr(reader
);
1200 while (*ptr
>= '0' && *ptr
<= '9')
1202 reader_skipn(reader
, 1);
1203 ptr
= reader_get_ptr(reader
);
1206 if (ptr2
== ptr
) return WC_E_DIGIT
;
1207 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
1208 TRACE("version=%s\n", debug_strval(reader
, val
));
1212 /* [25] Eq ::= S? '=' S? */
1213 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1215 static const WCHAR eqW
[] = {'=',0};
1216 reader_skipspaces(reader
);
1217 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
1219 reader_skipn(reader
, 1);
1220 reader_skipspaces(reader
);
1224 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1225 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1227 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
1228 struct reader_position position
;
1232 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1234 position
= reader
->position
;
1235 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
1236 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1237 /* skip 'version' */
1238 reader_skipn(reader
, 7);
1240 hr
= reader_parse_eq(reader
);
1241 if (FAILED(hr
)) return hr
;
1243 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1246 reader_skipn(reader
, 1);
1248 hr
= reader_parse_versionnum(reader
, &val
);
1249 if (FAILED(hr
)) return hr
;
1251 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1255 reader_skipn(reader
, 1);
1257 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1260 /* ([A-Za-z0-9._] | '-') */
1261 static inline BOOL
is_wchar_encname(WCHAR ch
)
1263 return ((ch
>= 'A' && ch
<= 'Z') ||
1264 (ch
>= 'a' && ch
<= 'z') ||
1265 (ch
>= '0' && ch
<= '9') ||
1266 (ch
== '.') || (ch
== '_') ||
1270 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1271 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1273 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1277 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1278 return WC_E_ENCNAME
;
1280 val
->start
= reader_get_cur(reader
);
1283 while (is_wchar_encname(*++ptr
))
1287 enc
= parse_encoding_name(start
, len
);
1288 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1292 if (enc
== XmlEncoding_Unknown
)
1293 return WC_E_ENCNAME
;
1295 /* skip encoding name */
1296 reader_skipn(reader
, len
);
1300 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1301 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1303 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1304 struct reader_position position
;
1308 if (!reader_skipspaces(reader
)) return S_FALSE
;
1310 position
= reader
->position
;
1311 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1312 name
.str
= reader_get_ptr(reader
);
1313 name
.start
= reader_get_cur(reader
);
1315 /* skip 'encoding' */
1316 reader_skipn(reader
, 8);
1318 hr
= reader_parse_eq(reader
);
1319 if (FAILED(hr
)) return hr
;
1321 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1324 reader_skipn(reader
, 1);
1326 hr
= reader_parse_encname(reader
, &val
);
1327 if (FAILED(hr
)) return hr
;
1329 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1333 reader_skipn(reader
, 1);
1335 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1338 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1339 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1341 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1342 static const WCHAR yesW
[] = {'y','e','s',0};
1343 static const WCHAR noW
[] = {'n','o',0};
1344 struct reader_position position
;
1349 if (!reader_skipspaces(reader
)) return S_FALSE
;
1351 position
= reader
->position
;
1352 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1353 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1354 /* skip 'standalone' */
1355 reader_skipn(reader
, 10);
1357 hr
= reader_parse_eq(reader
);
1358 if (FAILED(hr
)) return hr
;
1360 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1363 reader_skipn(reader
, 1);
1365 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1366 return WC_E_XMLDECL
;
1368 start
= reader_get_cur(reader
);
1369 /* skip 'yes'|'no' */
1370 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1371 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1372 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1374 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1377 reader_skipn(reader
, 1);
1379 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1382 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1383 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1385 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1386 static const WCHAR declcloseW
[] = {'?','>',0};
1387 struct reader_position position
;
1390 /* check if we have "<?xml " */
1391 if (reader_cmp(reader
, xmldeclW
))
1394 reader_skipn(reader
, 2);
1395 position
= reader
->position
;
1396 reader_skipn(reader
, 3);
1397 hr
= reader_parse_versioninfo(reader
);
1401 hr
= reader_parse_encdecl(reader
);
1405 hr
= reader_parse_sddecl(reader
);
1409 reader_skipspaces(reader
);
1410 if (reader_cmp(reader
, declcloseW
))
1411 return WC_E_XMLDECL
;
1414 reader_skipn(reader
, 2);
1416 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1417 reader
->empty_element
.position
= position
;
1418 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_xml
);
1419 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_xml
);
1424 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1425 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1430 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1432 start
= reader
->resume
[XmlReadResume_Body
];
1433 ptr
= reader_get_ptr(reader
);
1438 reader_skipn(reader
, 4);
1439 reader_shrink(reader
);
1440 ptr
= reader_get_ptr(reader
);
1441 start
= reader_get_cur(reader
);
1442 reader
->nodetype
= XmlNodeType_Comment
;
1443 reader
->resume
[XmlReadResume_Body
] = start
;
1444 reader
->resumestate
= XmlReadResumeState_Comment
;
1445 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1448 /* will exit when there's no more data, it won't attempt to
1449 read more from stream */
1460 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1461 TRACE("%s\n", debug_strval(reader
, &value
));
1463 /* skip rest of markup '->' */
1464 reader_skipn(reader
, 3);
1466 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1467 reader
->resume
[XmlReadResume_Body
] = 0;
1468 reader
->resumestate
= XmlReadResumeState_Initial
;
1472 return WC_E_COMMENT
;
1476 reader_skipn(reader
, 1);
1483 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1484 static inline BOOL
is_char(WCHAR ch
)
1486 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1487 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1488 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1489 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1490 (ch
>= 0xe000 && ch
<= 0xfffd);
1493 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1494 static inline BOOL
is_pubchar(WCHAR ch
)
1496 return (ch
== ' ') ||
1497 (ch
>= 'a' && ch
<= 'z') ||
1498 (ch
>= 'A' && ch
<= 'Z') ||
1499 (ch
>= '0' && ch
<= '9') ||
1500 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1501 (ch
== '=') || (ch
== '?') ||
1502 (ch
== '@') || (ch
== '!') ||
1503 (ch
>= '#' && ch
<= '%') || /* #$% */
1504 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1507 static inline BOOL
is_namestartchar(WCHAR ch
)
1509 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1510 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1511 (ch
>= 0xc0 && ch
<= 0xd6) ||
1512 (ch
>= 0xd8 && ch
<= 0xf6) ||
1513 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1514 (ch
>= 0x370 && ch
<= 0x37d) ||
1515 (ch
>= 0x37f && ch
<= 0x1fff) ||
1516 (ch
>= 0x200c && ch
<= 0x200d) ||
1517 (ch
>= 0x2070 && ch
<= 0x218f) ||
1518 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1519 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1520 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1521 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1522 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1523 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1526 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1527 static inline BOOL
is_ncnamechar(WCHAR ch
)
1529 return (ch
>= 'A' && ch
<= 'Z') ||
1530 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1531 (ch
== '-') || (ch
== '.') ||
1532 (ch
>= '0' && ch
<= '9') ||
1534 (ch
>= 0xc0 && ch
<= 0xd6) ||
1535 (ch
>= 0xd8 && ch
<= 0xf6) ||
1536 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1537 (ch
>= 0x300 && ch
<= 0x36f) ||
1538 (ch
>= 0x370 && ch
<= 0x37d) ||
1539 (ch
>= 0x37f && ch
<= 0x1fff) ||
1540 (ch
>= 0x200c && ch
<= 0x200d) ||
1541 (ch
>= 0x203f && ch
<= 0x2040) ||
1542 (ch
>= 0x2070 && ch
<= 0x218f) ||
1543 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1544 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1545 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1546 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1547 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1548 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1551 static inline BOOL
is_namechar(WCHAR ch
)
1553 return (ch
== ':') || is_ncnamechar(ch
);
1556 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1558 /* When we're on attribute always return attribute type, container node type is kept.
1559 Note that container is not necessarily an element, and attribute doesn't mean it's
1560 an attribute in XML spec terms. */
1561 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1564 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1565 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1566 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1567 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1568 [5] Name ::= NameStartChar (NameChar)* */
1569 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1574 if (reader
->resume
[XmlReadResume_Name
])
1576 start
= reader
->resume
[XmlReadResume_Name
];
1577 ptr
= reader_get_ptr(reader
);
1581 ptr
= reader_get_ptr(reader
);
1582 start
= reader_get_cur(reader
);
1583 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1586 while (is_namechar(*ptr
))
1588 reader_skipn(reader
, 1);
1589 ptr
= reader_get_ptr(reader
);
1592 if (is_reader_pending(reader
))
1594 reader
->resume
[XmlReadResume_Name
] = start
;
1598 reader
->resume
[XmlReadResume_Name
] = 0;
1600 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1601 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1606 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1607 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1609 static const WCHAR xmlW
[] = {'x','m','l'};
1610 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1616 hr
= reader_parse_name(reader
, &name
);
1617 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1619 /* now that we got name check for illegal content */
1620 if (strval_eq(reader
, &name
, &xmlval
))
1621 return WC_E_LEADINGXML
;
1623 /* PITarget can't be a qualified name */
1624 ptr
= reader_get_strptr(reader
, &name
);
1625 for (i
= 0; i
< name
.len
; i
++)
1627 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1629 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1634 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1635 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1642 switch (reader
->resumestate
)
1644 case XmlReadResumeState_Initial
:
1646 reader_skipn(reader
, 2);
1647 reader_shrink(reader
);
1648 reader
->resumestate
= XmlReadResumeState_PITarget
;
1649 case XmlReadResumeState_PITarget
:
1650 hr
= reader_parse_pitarget(reader
, &target
);
1651 if (FAILED(hr
)) return hr
;
1652 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1653 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1654 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1655 reader
->resumestate
= XmlReadResumeState_PIBody
;
1656 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1661 start
= reader
->resume
[XmlReadResume_Body
];
1662 ptr
= reader_get_ptr(reader
);
1669 UINT cur
= reader_get_cur(reader
);
1672 /* strip all leading whitespace chars */
1675 ptr
= reader_get_ptr2(reader
, start
);
1676 if (!is_wchar_space(*ptr
)) break;
1680 reader_init_strvalue(start
, cur
-start
, &value
);
1683 reader_skipn(reader
, 2);
1684 TRACE("%s\n", debug_strval(reader
, &value
));
1685 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1686 reader
->resumestate
= XmlReadResumeState_Initial
;
1687 reader
->resume
[XmlReadResume_Body
] = 0;
1688 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1693 reader_skipn(reader
, 1);
1694 ptr
= reader_get_ptr(reader
);
1700 /* This one is used to parse significant whitespace nodes, like in Misc production */
1701 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1703 switch (reader
->resumestate
)
1705 case XmlReadResumeState_Initial
:
1706 reader_shrink(reader
);
1707 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1708 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1709 reader
->nodetype
= XmlNodeType_Whitespace
;
1710 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1711 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1712 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1714 case XmlReadResumeState_Whitespace
:
1719 reader_skipspaces(reader
);
1720 if (is_reader_pending(reader
)) return S_OK
;
1722 start
= reader
->resume
[XmlReadResume_Body
];
1723 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1724 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1725 TRACE("%s\n", debug_strval(reader
, &value
));
1726 reader
->resumestate
= XmlReadResumeState_Initial
;
1735 /* [27] Misc ::= Comment | PI | S */
1736 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1738 HRESULT hr
= S_FALSE
;
1740 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1742 hr
= reader_more(reader
);
1743 if (FAILED(hr
)) return hr
;
1745 /* finish current node */
1746 switch (reader
->resumestate
)
1748 case XmlReadResumeState_PITarget
:
1749 case XmlReadResumeState_PIBody
:
1750 return reader_parse_pi(reader
);
1751 case XmlReadResumeState_Comment
:
1752 return reader_parse_comment(reader
);
1753 case XmlReadResumeState_Whitespace
:
1754 return reader_parse_whitespace(reader
);
1756 ERR("unknown resume state %d\n", reader
->resumestate
);
1762 const WCHAR
*cur
= reader_get_ptr(reader
);
1764 if (is_wchar_space(*cur
))
1765 hr
= reader_parse_whitespace(reader
);
1766 else if (!reader_cmp(reader
, commentW
))
1767 hr
= reader_parse_comment(reader
);
1768 else if (!reader_cmp(reader
, piW
))
1769 hr
= reader_parse_pi(reader
);
1773 if (hr
!= S_FALSE
) return hr
;
1779 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1780 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1782 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1785 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1788 reader_skipn(reader
, 1);
1790 cur
= reader_get_ptr(reader
);
1791 start
= reader_get_cur(reader
);
1792 while (is_char(*cur
) && *cur
!= quote
)
1794 reader_skipn(reader
, 1);
1795 cur
= reader_get_ptr(reader
);
1797 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1798 if (*cur
== quote
) reader_skipn(reader
, 1);
1800 TRACE("%s\n", debug_strval(reader
, literal
));
1804 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1805 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1806 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1808 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1811 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1814 reader_skipn(reader
, 1);
1816 start
= reader_get_cur(reader
);
1817 cur
= reader_get_ptr(reader
);
1818 while (is_pubchar(*cur
) && *cur
!= quote
)
1820 reader_skipn(reader
, 1);
1821 cur
= reader_get_ptr(reader
);
1823 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1824 if (*cur
== quote
) reader_skipn(reader
, 1);
1826 TRACE("%s\n", debug_strval(reader
, literal
));
1830 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1831 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1833 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1834 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1835 struct reader_position position
= reader
->position
;
1840 if (!reader_cmp(reader
, publicW
)) {
1844 reader_skipn(reader
, 6);
1845 cnt
= reader_skipspaces(reader
);
1846 if (!cnt
) return WC_E_WHITESPACE
;
1848 hr
= reader_parse_pub_literal(reader
, &pub
);
1849 if (FAILED(hr
)) return hr
;
1851 reader_init_cstrvalue(publicW
, strlenW(publicW
), &name
);
1852 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &pub
, &position
, 0);
1853 if (FAILED(hr
)) return hr
;
1855 cnt
= reader_skipspaces(reader
);
1856 if (!cnt
) return S_OK
;
1858 /* optional system id */
1859 hr
= reader_parse_sys_literal(reader
, &sys
);
1860 if (FAILED(hr
)) return S_OK
;
1862 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1863 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1864 if (FAILED(hr
)) return hr
;
1867 } else if (!reader_cmp(reader
, systemW
)) {
1869 reader_skipn(reader
, 6);
1870 cnt
= reader_skipspaces(reader
);
1871 if (!cnt
) return WC_E_WHITESPACE
;
1873 hr
= reader_parse_sys_literal(reader
, &sys
);
1874 if (FAILED(hr
)) return hr
;
1876 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1877 return reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1883 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1884 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1886 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1891 /* check if we have "<!DOCTYPE" */
1892 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1893 reader_shrink(reader
);
1895 /* DTD processing is not allowed by default */
1896 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1898 reader_skipn(reader
, 9);
1899 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1902 hr
= reader_parse_name(reader
, &name
);
1903 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1905 reader_skipspaces(reader
);
1907 hr
= reader_parse_externalid(reader
);
1908 if (FAILED(hr
)) return hr
;
1910 reader_skipspaces(reader
);
1912 cur
= reader_get_ptr(reader
);
1915 FIXME("internal subset parsing not implemented\n");
1920 reader_skipn(reader
, 1);
1922 reader
->nodetype
= XmlNodeType_DocumentType
;
1923 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1924 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1929 /* [11 NS] LocalPart ::= NCName */
1930 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
, BOOL check_for_separator
)
1935 if (reader
->resume
[XmlReadResume_Local
])
1937 start
= reader
->resume
[XmlReadResume_Local
];
1938 ptr
= reader_get_ptr(reader
);
1942 ptr
= reader_get_ptr(reader
);
1943 start
= reader_get_cur(reader
);
1946 while (is_ncnamechar(*ptr
))
1948 reader_skipn(reader
, 1);
1949 ptr
= reader_get_ptr(reader
);
1952 if (check_for_separator
&& *ptr
== ':')
1953 return NC_E_QNAMECOLON
;
1955 if (is_reader_pending(reader
))
1957 reader
->resume
[XmlReadResume_Local
] = start
;
1961 reader
->resume
[XmlReadResume_Local
] = 0;
1963 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1968 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1969 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1970 [9 NS] UnprefixedName ::= LocalPart
1971 [10 NS] Prefix ::= NCName */
1972 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1978 if (reader
->resume
[XmlReadResume_Name
])
1980 start
= reader
->resume
[XmlReadResume_Name
];
1981 ptr
= reader_get_ptr(reader
);
1985 ptr
= reader_get_ptr(reader
);
1986 start
= reader_get_cur(reader
);
1987 reader
->resume
[XmlReadResume_Name
] = start
;
1988 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1991 if (reader
->resume
[XmlReadResume_Local
])
1993 hr
= reader_parse_local(reader
, local
, FALSE
);
1994 if (FAILED(hr
)) return hr
;
1996 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1997 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
2002 /* skip prefix part */
2003 while (is_ncnamechar(*ptr
))
2005 reader_skipn(reader
, 1);
2006 ptr
= reader_get_ptr(reader
);
2009 if (is_reader_pending(reader
)) return E_PENDING
;
2011 /* got a qualified name */
2014 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
2017 reader_skipn(reader
, 1);
2018 hr
= reader_parse_local(reader
, local
, TRUE
);
2019 if (FAILED(hr
)) return hr
;
2023 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
2024 reader_init_strvalue(0, 0, prefix
);
2029 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
2031 TRACE("ncname %s\n", debug_strval(reader
, local
));
2033 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
2035 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
2038 reader
->resume
[XmlReadResume_Name
] = 0;
2039 reader
->resume
[XmlReadResume_Local
] = 0;
2044 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
2046 static const WCHAR entltW
[] = {'l','t'};
2047 static const WCHAR entgtW
[] = {'g','t'};
2048 static const WCHAR entampW
[] = {'a','m','p'};
2049 static const WCHAR entaposW
[] = {'a','p','o','s'};
2050 static const WCHAR entquotW
[] = {'q','u','o','t'};
2051 static const strval lt
= { (WCHAR
*)entltW
, 2 };
2052 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
2053 static const strval amp
= { (WCHAR
*)entampW
, 3 };
2054 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
2055 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
2056 WCHAR
*str
= reader_get_strptr(reader
, name
);
2061 if (strval_eq(reader
, name
, <
)) return '<';
2064 if (strval_eq(reader
, name
, >
)) return '>';
2067 if (strval_eq(reader
, name
, &
))
2069 else if (strval_eq(reader
, name
, &apos
))
2073 if (strval_eq(reader
, name
, "
)) return '\"';
2082 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2083 [67] Reference ::= EntityRef | CharRef
2084 [68] EntityRef ::= '&' Name ';' */
2085 static HRESULT
reader_parse_reference(xmlreader
*reader
)
2087 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
2088 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
2089 UINT cur
= reader_get_cur(reader
);
2094 reader_skipn(reader
, 1);
2095 ptr
= reader_get_ptr(reader
);
2099 reader_skipn(reader
, 1);
2100 ptr
= reader_get_ptr(reader
);
2102 /* hex char or decimal */
2105 reader_skipn(reader
, 1);
2106 ptr
= reader_get_ptr(reader
);
2110 if ((*ptr
>= '0' && *ptr
<= '9'))
2111 ch
= ch
*16 + *ptr
- '0';
2112 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
2113 ch
= ch
*16 + *ptr
- 'a' + 10;
2114 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
2115 ch
= ch
*16 + *ptr
- 'A' + 10;
2117 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
2118 reader_skipn(reader
, 1);
2119 ptr
= reader_get_ptr(reader
);
2126 if ((*ptr
>= '0' && *ptr
<= '9'))
2128 ch
= ch
*10 + *ptr
- '0';
2129 reader_skipn(reader
, 1);
2130 ptr
= reader_get_ptr(reader
);
2133 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
2137 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
2140 if (is_wchar_space(ch
)) ch
= ' ';
2142 ptr
= reader_get_ptr(reader
);
2143 start
= reader_get_ptr2(reader
, cur
);
2144 len
= buffer
->written
- ((char *)ptr
- buffer
->data
);
2145 memmove(start
+ 1, ptr
+ 1, len
);
2147 buffer
->written
-= (reader_get_cur(reader
) - cur
) * sizeof(WCHAR
);
2148 buffer
->cur
= cur
+ 1;
2157 hr
= reader_parse_name(reader
, &name
);
2158 if (FAILED(hr
)) return hr
;
2160 ptr
= reader_get_ptr(reader
);
2161 if (*ptr
!= ';') return WC_E_SEMICOLON
;
2163 /* predefined entities resolve to a single character */
2164 ch
= get_predefined_entity(reader
, &name
);
2167 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
2168 memmove(start
+1, ptr
+1, len
);
2169 buffer
->cur
= cur
+ 1;
2170 buffer
->written
-= (ptr
- start
) * sizeof(WCHAR
);
2176 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
2177 return WC_E_UNDECLAREDENTITY
;
2185 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2186 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
2191 ptr
= reader_get_ptr(reader
);
2193 /* skip opening quote */
2195 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
2196 reader_skipn(reader
, 1);
2198 ptr
= reader_get_ptr(reader
);
2199 start
= reader_get_cur(reader
);
2202 if (*ptr
== '<') return WC_E_LESSTHAN
;
2206 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
2207 /* skip closing quote */
2208 reader_skipn(reader
, 1);
2214 HRESULT hr
= reader_parse_reference(reader
);
2215 if (FAILED(hr
)) return hr
;
2219 /* replace all whitespace chars with ' ' */
2220 if (is_wchar_space(*ptr
)) *ptr
= ' ';
2221 reader_skipn(reader
, 1);
2223 ptr
= reader_get_ptr(reader
);
2229 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2230 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2231 [3 NS] DefaultAttName ::= 'xmlns'
2232 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2233 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2235 struct reader_position position
= reader
->position
;
2236 strval prefix
, local
, qname
, value
;
2237 enum attribute_flags flags
= 0;
2240 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2241 if (FAILED(hr
)) return hr
;
2243 if (strval_eq(reader
, &prefix
, &strval_xmlns
))
2244 flags
|= ATTRIBUTE_NS_DEFINITION
;
2246 if (strval_eq(reader
, &qname
, &strval_xmlns
))
2247 flags
|= ATTRIBUTE_DEFAULT_NS_DEFINITION
;
2249 hr
= reader_parse_eq(reader
);
2250 if (FAILED(hr
)) return hr
;
2252 hr
= reader_parse_attvalue(reader
, &value
);
2253 if (FAILED(hr
)) return hr
;
2255 if (flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
2256 reader_push_ns(reader
, &local
, &value
, !!(flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
));
2258 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2259 return reader_add_attr(reader
, &prefix
, &local
, &qname
, &value
, &position
, flags
);
2262 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2263 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2264 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
2266 struct reader_position position
= reader
->position
;
2269 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2270 if (FAILED(hr
)) return hr
;
2274 static const WCHAR endW
[] = {'/','>',0};
2276 reader_skipspaces(reader
);
2279 if ((reader
->is_empty_element
= !reader_cmp(reader
, endW
)))
2281 struct element
*element
= &reader
->empty_element
;
2284 reader_skipn(reader
, 2);
2286 reader_free_strvalued(reader
, &element
->qname
);
2287 reader_free_strvalued(reader
, &element
->localname
);
2289 element
->prefix
= *prefix
;
2290 reader_strvaldup(reader
, qname
, &element
->qname
);
2291 reader_strvaldup(reader
, local
, &element
->localname
);
2292 element
->position
= position
;
2293 reader_mark_ns_nodes(reader
, element
);
2297 /* got a start tag */
2298 if (!reader_cmp(reader
, gtW
))
2301 reader_skipn(reader
, 1);
2302 return reader_push_element(reader
, prefix
, local
, qname
, &position
);
2305 hr
= reader_parse_attribute(reader
);
2306 if (FAILED(hr
)) return hr
;
2312 /* [39] element ::= EmptyElemTag | STag content ETag */
2313 static HRESULT
reader_parse_element(xmlreader
*reader
)
2317 switch (reader
->resumestate
)
2319 case XmlReadResumeState_Initial
:
2320 /* check if we are really on element */
2321 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2324 reader_skipn(reader
, 1);
2326 reader_shrink(reader
);
2327 reader
->resumestate
= XmlReadResumeState_STag
;
2328 case XmlReadResumeState_STag
:
2330 strval qname
, prefix
, local
;
2332 /* this handles empty elements too */
2333 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
);
2334 if (FAILED(hr
)) return hr
;
2336 /* FIXME: need to check for defined namespace to reject invalid prefix */
2338 /* if we got empty element and stack is empty go straight to Misc */
2339 if (reader
->is_empty_element
&& list_empty(&reader
->elements
))
2340 reader
->instate
= XmlReadInState_MiscEnd
;
2342 reader
->instate
= XmlReadInState_Content
;
2344 reader
->nodetype
= XmlNodeType_Element
;
2345 reader
->resumestate
= XmlReadResumeState_Initial
;
2346 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2347 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2348 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
2358 /* [13 NS] ETag ::= '</' QName S? '>' */
2359 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2361 struct reader_position position
;
2362 strval prefix
, local
, qname
;
2363 struct element
*element
;
2367 reader_skipn(reader
, 2);
2369 position
= reader
->position
;
2370 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2371 if (FAILED(hr
)) return hr
;
2373 reader_skipspaces(reader
);
2375 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2378 reader_skipn(reader
, 1);
2380 /* Element stack should never be empty at this point, cause we shouldn't get to
2381 content parsing if it's empty. */
2382 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2383 if (!strval_eq(reader
, &element
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2385 /* update position stored for start tag, we won't be using it */
2386 element
->position
= position
;
2388 reader
->nodetype
= XmlNodeType_EndElement
;
2389 reader
->is_empty_element
= FALSE
;
2390 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2395 /* [18] CDSect ::= CDStart CData CDEnd
2396 [19] CDStart ::= '<![CDATA['
2397 [20] CData ::= (Char* - (Char* ']]>' Char*))
2398 [21] CDEnd ::= ']]>' */
2399 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2404 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2406 start
= reader
->resume
[XmlReadResume_Body
];
2407 ptr
= reader_get_ptr(reader
);
2411 /* skip markup '<![CDATA[' */
2412 reader_skipn(reader
, 9);
2413 reader_shrink(reader
);
2414 ptr
= reader_get_ptr(reader
);
2415 start
= reader_get_cur(reader
);
2416 reader
->nodetype
= XmlNodeType_CDATA
;
2417 reader
->resume
[XmlReadResume_Body
] = start
;
2418 reader
->resumestate
= XmlReadResumeState_CDATA
;
2419 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2424 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2428 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2431 reader_skipn(reader
, 3);
2432 TRACE("%s\n", debug_strval(reader
, &value
));
2434 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2435 reader
->resume
[XmlReadResume_Body
] = 0;
2436 reader
->resumestate
= XmlReadResumeState_Initial
;
2441 reader_skipn(reader
, 1);
2449 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2450 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2452 struct reader_position position
;
2456 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2458 start
= reader
->resume
[XmlReadResume_Body
];
2459 ptr
= reader_get_ptr(reader
);
2463 reader_shrink(reader
);
2464 ptr
= reader_get_ptr(reader
);
2465 start
= reader_get_cur(reader
);
2466 /* There's no text */
2467 if (!*ptr
|| *ptr
== '<') return S_OK
;
2468 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2469 reader
->resume
[XmlReadResume_Body
] = start
;
2470 reader
->resumestate
= XmlReadResumeState_CharData
;
2471 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2474 position
= reader
->position
;
2477 static const WCHAR ampW
[] = {'&',0};
2479 /* CDATA closing sequence ']]>' is not allowed */
2480 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2481 return WC_E_CDSECTEND
;
2483 /* Found next markup part */
2488 reader
->empty_element
.position
= position
;
2489 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2490 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2491 reader
->resume
[XmlReadResume_Body
] = 0;
2492 reader
->resumestate
= XmlReadResumeState_Initial
;
2496 /* this covers a case when text has leading whitespace chars */
2497 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2499 if (!reader_cmp(reader
, ampW
))
2500 reader_parse_reference(reader
);
2502 reader_skipn(reader
, 1);
2504 ptr
= reader_get_ptr(reader
);
2510 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2511 static HRESULT
reader_parse_content(xmlreader
*reader
)
2513 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2514 static const WCHAR etagW
[] = {'<','/',0};
2516 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2518 switch (reader
->resumestate
)
2520 case XmlReadResumeState_CDATA
:
2521 return reader_parse_cdata(reader
);
2522 case XmlReadResumeState_Comment
:
2523 return reader_parse_comment(reader
);
2524 case XmlReadResumeState_PIBody
:
2525 case XmlReadResumeState_PITarget
:
2526 return reader_parse_pi(reader
);
2527 case XmlReadResumeState_CharData
:
2528 return reader_parse_chardata(reader
);
2530 ERR("unknown resume state %d\n", reader
->resumestate
);
2534 reader_shrink(reader
);
2536 /* handle end tag here, it indicates end of content as well */
2537 if (!reader_cmp(reader
, etagW
))
2538 return reader_parse_endtag(reader
);
2540 if (!reader_cmp(reader
, commentW
))
2541 return reader_parse_comment(reader
);
2543 if (!reader_cmp(reader
, piW
))
2544 return reader_parse_pi(reader
);
2546 if (!reader_cmp(reader
, cdstartW
))
2547 return reader_parse_cdata(reader
);
2549 if (!reader_cmp(reader
, ltW
))
2550 return reader_parse_element(reader
);
2552 /* what's left must be CharData */
2553 return reader_parse_chardata(reader
);
2556 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2558 XmlNodeType nodetype
= reader_get_nodetype(reader
);
2561 if (!is_reader_pending(reader
))
2563 reader
->chunk_read_off
= 0;
2564 reader_clear_attrs(reader
);
2567 /* When moving from EndElement or empty element, pop its own namespace definitions */
2570 case XmlNodeType_Attribute
:
2571 reader_dec_depth(reader
);
2573 case XmlNodeType_Element
:
2574 if (reader
->is_empty_element
)
2575 reader_pop_ns_nodes(reader
, &reader
->empty_element
);
2576 else if (FAILED(hr
= reader_inc_depth(reader
)))
2579 case XmlNodeType_EndElement
:
2580 reader_pop_element(reader
);
2581 reader_dec_depth(reader
);
2589 switch (reader
->instate
)
2591 /* if it's a first call for a new input we need to detect stream encoding */
2592 case XmlReadInState_Initial
:
2596 hr
= readerinput_growraw(reader
->input
);
2597 if (FAILED(hr
)) return hr
;
2599 reader
->position
.line_number
= 1;
2600 reader
->position
.line_position
= 1;
2602 /* try to detect encoding by BOM or data and set input code page */
2603 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2604 TRACE("detected encoding %s, 0x%08x\n", enc
== XmlEncoding_Unknown
? "(unknown)" :
2605 debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2606 if (FAILED(hr
)) return hr
;
2608 /* always switch first time cause we have to put something in */
2609 readerinput_switchencoding(reader
->input
, enc
);
2611 /* parse xml declaration */
2612 hr
= reader_parse_xmldecl(reader
);
2613 if (FAILED(hr
)) return hr
;
2615 readerinput_shrinkraw(reader
->input
, -1);
2616 reader
->instate
= XmlReadInState_Misc_DTD
;
2617 if (hr
== S_OK
) return hr
;
2620 case XmlReadInState_Misc_DTD
:
2621 hr
= reader_parse_misc(reader
);
2622 if (FAILED(hr
)) return hr
;
2625 reader
->instate
= XmlReadInState_DTD
;
2629 case XmlReadInState_DTD
:
2630 hr
= reader_parse_dtd(reader
);
2631 if (FAILED(hr
)) return hr
;
2635 reader
->instate
= XmlReadInState_DTD_Misc
;
2639 reader
->instate
= XmlReadInState_Element
;
2641 case XmlReadInState_DTD_Misc
:
2642 hr
= reader_parse_misc(reader
);
2643 if (FAILED(hr
)) return hr
;
2646 reader
->instate
= XmlReadInState_Element
;
2650 case XmlReadInState_Element
:
2651 return reader_parse_element(reader
);
2652 case XmlReadInState_Content
:
2653 return reader_parse_content(reader
);
2654 case XmlReadInState_MiscEnd
:
2655 hr
= reader_parse_misc(reader
);
2656 if (hr
!= S_FALSE
) return hr
;
2658 if (*reader_get_ptr(reader
))
2660 WARN("found garbage in the end of XML\n");
2664 reader
->instate
= XmlReadInState_Eof
;
2665 reader
->state
= XmlReadState_EndOfFile
;
2666 reader
->nodetype
= XmlNodeType_None
;
2668 case XmlReadInState_Eof
:
2671 FIXME("internal state %d not handled\n", reader
->instate
);
2679 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2681 xmlreader
*This
= impl_from_IXmlReader(iface
);
2683 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2685 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2686 IsEqualGUID(riid
, &IID_IXmlReader
))
2692 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2694 return E_NOINTERFACE
;
2697 IXmlReader_AddRef(iface
);
2702 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2704 xmlreader
*This
= impl_from_IXmlReader(iface
);
2705 ULONG ref
= InterlockedIncrement(&This
->ref
);
2706 TRACE("(%p)->(%d)\n", This
, ref
);
2710 static void reader_clear_ns(xmlreader
*reader
)
2712 struct ns
*ns
, *ns2
;
2714 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
2715 list_remove(&ns
->entry
);
2716 reader_free_strvalued(reader
, &ns
->prefix
);
2717 reader_free_strvalued(reader
, &ns
->uri
);
2718 reader_free(reader
, ns
);
2721 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->nsdef
, struct ns
, entry
) {
2722 list_remove(&ns
->entry
);
2723 reader_free_strvalued(reader
, &ns
->uri
);
2724 reader_free(reader
, ns
);
2728 static void reader_reset_parser(xmlreader
*reader
)
2730 reader
->position
.line_number
= 0;
2731 reader
->position
.line_position
= 0;
2733 reader_clear_elements(reader
);
2734 reader_clear_attrs(reader
);
2735 reader_clear_ns(reader
);
2736 reader_free_strvalues(reader
);
2739 reader
->nodetype
= XmlNodeType_None
;
2740 reader
->resumestate
= XmlReadResumeState_Initial
;
2741 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2742 reader
->is_empty_element
= FALSE
;
2745 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2747 xmlreader
*This
= impl_from_IXmlReader(iface
);
2748 LONG ref
= InterlockedDecrement(&This
->ref
);
2750 TRACE("(%p)->(%d)\n", This
, ref
);
2754 IMalloc
*imalloc
= This
->imalloc
;
2755 reader_reset_parser(This
);
2756 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2757 if (This
->resolver
) IXmlResolver_Release(This
->resolver
);
2758 if (This
->mlang
) IUnknown_Release(This
->mlang
);
2759 reader_free(This
, This
);
2760 if (imalloc
) IMalloc_Release(imalloc
);
2766 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2768 xmlreader
*This
= impl_from_IXmlReader(iface
);
2769 IXmlReaderInput
*readerinput
;
2772 TRACE("(%p)->(%p)\n", This
, input
);
2776 readerinput_release_stream(This
->input
);
2777 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2781 reader_reset_parser(This
);
2783 /* just reset current input */
2786 This
->state
= XmlReadState_Initial
;
2790 /* now try IXmlReaderInput, ISequentialStream, IStream */
2791 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2794 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2795 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2798 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2799 readerinput
, readerinput
->lpVtbl
);
2800 IUnknown_Release(readerinput
);
2806 if (hr
!= S_OK
|| !readerinput
)
2808 /* create IXmlReaderInput basing on supplied interface */
2809 hr
= CreateXmlReaderInputWithEncodingName(input
,
2810 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2811 if (hr
!= S_OK
) return hr
;
2812 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2815 /* set stream for supplied IXmlReaderInput */
2816 hr
= readerinput_query_for_stream(This
->input
);
2819 This
->state
= XmlReadState_Initial
;
2820 This
->instate
= XmlReadInState_Initial
;
2825 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2827 xmlreader
*This
= impl_from_IXmlReader(iface
);
2829 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2831 if (!value
) return E_INVALIDARG
;
2835 case XmlReaderProperty_MultiLanguage
:
2836 *value
= (LONG_PTR
)This
->mlang
;
2838 IUnknown_AddRef(This
->mlang
);
2840 case XmlReaderProperty_XmlResolver
:
2841 *value
= (LONG_PTR
)This
->resolver
;
2843 IXmlResolver_AddRef(This
->resolver
);
2845 case XmlReaderProperty_DtdProcessing
:
2846 *value
= This
->dtdmode
;
2848 case XmlReaderProperty_ReadState
:
2849 *value
= This
->state
;
2851 case XmlReaderProperty_MaxElementDepth
:
2852 *value
= This
->max_depth
;
2855 FIXME("Unimplemented property (%u)\n", property
);
2862 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2864 xmlreader
*This
= impl_from_IXmlReader(iface
);
2866 TRACE("(%p)->(%s 0x%lx)\n", This
, debugstr_reader_prop(property
), value
);
2870 case XmlReaderProperty_MultiLanguage
:
2872 IUnknown_Release(This
->mlang
);
2873 This
->mlang
= (IUnknown
*)value
;
2875 IUnknown_AddRef(This
->mlang
);
2877 FIXME("Ignoring MultiLanguage %p\n", This
->mlang
);
2879 case XmlReaderProperty_XmlResolver
:
2881 IXmlResolver_Release(This
->resolver
);
2882 This
->resolver
= (IXmlResolver
*)value
;
2884 IXmlResolver_AddRef(This
->resolver
);
2886 case XmlReaderProperty_DtdProcessing
:
2887 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2888 This
->dtdmode
= value
;
2890 case XmlReaderProperty_MaxElementDepth
:
2891 This
->max_depth
= value
;
2894 FIXME("Unimplemented property (%u)\n", property
);
2901 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2903 xmlreader
*This
= impl_from_IXmlReader(iface
);
2904 XmlNodeType oldtype
= This
->nodetype
;
2908 TRACE("(%p)->(%p)\n", This
, nodetype
);
2913 switch (This
->state
)
2915 case XmlReadState_Closed
:
2918 case XmlReadState_Error
:
2922 hr
= reader_parse_nextnode(This
);
2923 if (SUCCEEDED(hr
) && oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2924 This
->state
= XmlReadState_Interactive
;
2928 This
->state
= XmlReadState_Error
;
2929 This
->nodetype
= XmlNodeType_None
;
2935 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2936 *nodetype
= This
->nodetype
;
2941 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2943 xmlreader
*This
= impl_from_IXmlReader(iface
);
2945 TRACE("(%p)->(%p)\n", This
, node_type
);
2948 return E_INVALIDARG
;
2950 *node_type
= reader_get_nodetype(This
);
2951 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2954 static void reader_set_current_attribute(xmlreader
*reader
, struct attribute
*attr
)
2956 reader
->attr
= attr
;
2957 reader
->chunk_read_off
= 0;
2958 reader_set_strvalue(reader
, StringValue_Prefix
, &attr
->prefix
);
2959 reader_set_strvalue(reader
, StringValue_QualifiedName
, &attr
->qname
);
2960 reader_set_strvalue(reader
, StringValue_Value
, &attr
->value
);
2963 static HRESULT
reader_move_to_first_attribute(xmlreader
*reader
)
2965 if (!reader
->attr_count
)
2969 reader_inc_depth(reader
);
2971 reader_set_current_attribute(reader
, LIST_ENTRY(list_head(&reader
->attrs
), struct attribute
, entry
));
2976 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2978 xmlreader
*This
= impl_from_IXmlReader(iface
);
2980 TRACE("(%p)\n", This
);
2982 return reader_move_to_first_attribute(This
);
2985 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2987 xmlreader
*This
= impl_from_IXmlReader(iface
);
2988 const struct list
*next
;
2990 TRACE("(%p)\n", This
);
2992 if (!This
->attr_count
) return S_FALSE
;
2995 return reader_move_to_first_attribute(This
);
2997 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2999 reader_set_current_attribute(This
, LIST_ENTRY(next
, struct attribute
, entry
));
3001 return next
? S_OK
: S_FALSE
;
3004 static void reader_get_attribute_ns_uri(xmlreader
*reader
, struct attribute
*attr
, const WCHAR
**uri
, UINT
*len
)
3006 static const WCHAR xmlns_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3007 '2','0','0','0','/','x','m','l','n','s','/',0};
3008 static const WCHAR xml_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3009 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3011 /* Check for reserved prefixes first */
3012 if ((strval_eq(reader
, &attr
->prefix
, &strval_empty
) && strval_eq(reader
, &attr
->localname
, &strval_xmlns
)) ||
3013 strval_eq(reader
, &attr
->prefix
, &strval_xmlns
))
3016 *len
= sizeof(xmlns_uriW
)/sizeof(xmlns_uriW
[0]) - 1;
3018 else if (strval_eq(reader
, &attr
->prefix
, &strval_xml
))
3021 *len
= sizeof(xml_uriW
)/sizeof(xml_uriW
[0]) - 1;
3033 if ((ns
= reader_lookup_ns(reader
, &attr
->prefix
)))
3046 static void reader_get_attribute_local_name(xmlreader
*reader
, struct attribute
*attr
, const WCHAR
**name
, UINT
*len
)
3048 if (attr
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3053 else if (attr
->flags
& ATTRIBUTE_NS_DEFINITION
)
3055 const struct ns
*ns
= reader_lookup_ns(reader
, &attr
->localname
);
3056 *name
= ns
->prefix
.str
;
3057 *len
= ns
->prefix
.len
;
3061 *name
= attr
->localname
.str
;
3062 *len
= attr
->localname
.len
;
3066 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
3067 const WCHAR
*local_name
, const WCHAR
*namespace_uri
)
3069 xmlreader
*This
= impl_from_IXmlReader(iface
);
3070 UINT target_name_len
, target_uri_len
;
3071 struct attribute
*attr
;
3073 TRACE("(%p)->(%s %s)\n", This
, debugstr_w(local_name
), debugstr_w(namespace_uri
));
3076 return E_INVALIDARG
;
3078 if (!This
->attr_count
)
3082 namespace_uri
= emptyW
;
3084 target_name_len
= strlenW(local_name
);
3085 target_uri_len
= strlenW(namespace_uri
);
3087 LIST_FOR_EACH_ENTRY(attr
, &This
->attrs
, struct attribute
, entry
)
3089 UINT name_len
, uri_len
;
3090 const WCHAR
*name
, *uri
;
3092 reader_get_attribute_local_name(This
, attr
, &name
, &name_len
);
3093 reader_get_attribute_ns_uri(This
, attr
, &uri
, &uri_len
);
3095 if (name_len
== target_name_len
&& uri_len
== target_uri_len
&&
3096 !strcmpW(name
, local_name
) && !strcmpW(uri
, namespace_uri
))
3098 reader_set_current_attribute(This
, attr
);
3106 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
3108 xmlreader
*This
= impl_from_IXmlReader(iface
);
3110 TRACE("(%p)\n", This
);
3112 if (!This
->attr_count
) return S_FALSE
;
3115 reader_dec_depth(This
);
3119 /* FIXME: support other node types with 'attributes' like DTD */
3120 if (This
->is_empty_element
) {
3121 reader_set_strvalue(This
, StringValue_Prefix
, &This
->empty_element
.prefix
);
3122 reader_set_strvalue(This
, StringValue_QualifiedName
, &This
->empty_element
.qname
);
3125 struct element
*element
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
3127 reader_set_strvalue(This
, StringValue_Prefix
, &element
->prefix
);
3128 reader_set_strvalue(This
, StringValue_QualifiedName
, &element
->qname
);
3131 This
->chunk_read_off
= 0;
3132 reader_set_strvalue(This
, StringValue_Value
, &strval_empty
);
3137 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3139 xmlreader
*This
= impl_from_IXmlReader(iface
);
3140 struct attribute
*attribute
= This
->attr
;
3141 struct element
*element
;
3144 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3149 switch (reader_get_nodetype(This
))
3151 case XmlNodeType_Text
:
3152 case XmlNodeType_CDATA
:
3153 case XmlNodeType_Comment
:
3154 case XmlNodeType_Whitespace
:
3158 case XmlNodeType_Element
:
3159 case XmlNodeType_EndElement
:
3160 element
= reader_get_element(This
);
3161 if (element
->prefix
.len
)
3163 *name
= element
->qname
.str
;
3164 *len
= element
->qname
.len
;
3168 *name
= element
->localname
.str
;
3169 *len
= element
->localname
.len
;
3172 case XmlNodeType_Attribute
:
3173 if (attribute
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3177 } else if (attribute
->prefix
.len
)
3179 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3180 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3184 *name
= attribute
->localname
.str
;
3185 *len
= attribute
->localname
.len
;
3189 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3190 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3197 static struct ns
*reader_lookup_nsdef(xmlreader
*reader
)
3199 if (list_empty(&reader
->nsdef
))
3202 return LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
3205 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
, const WCHAR
**uri
, UINT
*len
)
3207 xmlreader
*This
= impl_from_IXmlReader(iface
);
3208 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3209 XmlNodeType nodetype
;
3213 TRACE("(%p %p %p)\n", iface
, uri
, len
);
3218 switch ((nodetype
= reader_get_nodetype(This
)))
3220 case XmlNodeType_Attribute
:
3221 reader_get_attribute_ns_uri(This
, This
->attr
, uri
, len
);
3223 case XmlNodeType_Element
:
3224 case XmlNodeType_EndElement
:
3226 ns
= reader_lookup_ns(This
, prefix
);
3228 /* pick top default ns if any */
3230 ns
= reader_lookup_nsdef(This
);
3242 case XmlNodeType_Text
:
3243 case XmlNodeType_CDATA
:
3244 case XmlNodeType_ProcessingInstruction
:
3245 case XmlNodeType_Comment
:
3246 case XmlNodeType_Whitespace
:
3247 case XmlNodeType_XmlDeclaration
:
3252 FIXME("Unhandled node type %d\n", nodetype
);
3261 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3263 xmlreader
*This
= impl_from_IXmlReader(iface
);
3264 struct element
*element
;
3267 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3272 switch (reader_get_nodetype(This
))
3274 case XmlNodeType_Text
:
3275 case XmlNodeType_CDATA
:
3276 case XmlNodeType_Comment
:
3277 case XmlNodeType_Whitespace
:
3281 case XmlNodeType_Element
:
3282 case XmlNodeType_EndElement
:
3283 element
= reader_get_element(This
);
3284 *name
= element
->localname
.str
;
3285 *len
= element
->localname
.len
;
3287 case XmlNodeType_Attribute
:
3288 reader_get_attribute_local_name(This
, This
->attr
, name
, len
);
3291 *name
= This
->strvalues
[StringValue_LocalName
].str
;
3292 *len
= This
->strvalues
[StringValue_LocalName
].len
;
3299 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, const WCHAR
**ret
, UINT
*len
)
3301 xmlreader
*This
= impl_from_IXmlReader(iface
);
3302 XmlNodeType nodetype
;
3305 TRACE("(%p)->(%p %p)\n", This
, ret
, len
);
3313 switch ((nodetype
= reader_get_nodetype(This
)))
3315 case XmlNodeType_Element
:
3316 case XmlNodeType_EndElement
:
3317 case XmlNodeType_Attribute
:
3319 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3322 if (strval_eq(This
, prefix
, &strval_xml
))
3327 else if (strval_eq(This
, prefix
, &strval_xmlns
))
3332 else if ((ns
= reader_lookup_ns(This
, prefix
)))
3334 *ret
= ns
->prefix
.str
;
3335 *len
= ns
->prefix
.len
;
3347 static const strval
*reader_get_value(xmlreader
*reader
, BOOL ensure_allocated
)
3351 switch (reader_get_nodetype(reader
))
3353 case XmlNodeType_XmlDeclaration
:
3354 case XmlNodeType_EndElement
:
3355 case XmlNodeType_None
:
3356 return &strval_empty
;
3357 case XmlNodeType_Attribute
:
3358 /* For namespace definition attributes return values from namespace list */
3359 if (reader
->attr
->flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
3363 if (!(ns
= reader_lookup_ns(reader
, &reader
->attr
->localname
)))
3364 ns
= reader_lookup_nsdef(reader
);
3368 return &reader
->attr
->value
;
3373 val
= &reader
->strvalues
[StringValue_Value
];
3374 if (!val
->str
&& ensure_allocated
)
3376 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
3377 if (!ptr
) return NULL
;
3378 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
3386 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
3388 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3389 const strval
*val
= &reader
->strvalues
[StringValue_Value
];
3392 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
3396 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
&& !val
->len
) || is_reader_pending(reader
))
3401 hr
= IXmlReader_Read(iface
, &type
);
3402 if (FAILED(hr
)) return hr
;
3404 /* return if still pending, partially read values are not reported */
3405 if (is_reader_pending(reader
)) return E_PENDING
;
3408 val
= reader_get_value(reader
, TRUE
);
3410 return E_OUTOFMEMORY
;
3412 off
= abs(reader
->chunk_read_off
);
3413 assert(off
<= val
->len
);
3414 *value
= val
->str
+ off
;
3415 if (len
) *len
= val
->len
- off
;
3416 reader
->chunk_read_off
= -off
;
3420 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
3422 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3426 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
3428 val
= reader_get_value(reader
, FALSE
);
3430 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3431 if (reader
->chunk_read_off
>= 0)
3433 assert(reader
->chunk_read_off
<= val
->len
);
3434 len
= min(val
->len
- reader
->chunk_read_off
, chunk_size
);
3436 if (read
) *read
= len
;
3440 memcpy(buffer
, reader_get_strptr(reader
, val
) + reader
->chunk_read_off
, len
*sizeof(WCHAR
));
3441 reader
->chunk_read_off
+= len
;
3444 return len
|| !chunk_size
? S_OK
: S_FALSE
;
3447 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
3449 UINT
*baseUri_length
)
3451 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
3455 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
3457 FIXME("(%p): stub\n", iface
);
3461 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
3463 xmlreader
*This
= impl_from_IXmlReader(iface
);
3464 TRACE("(%p)\n", This
);
3465 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3466 when current node is start tag of an element */
3467 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->is_empty_element
: FALSE
;
3470 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*line_number
)
3472 xmlreader
*This
= impl_from_IXmlReader(iface
);
3473 const struct element
*element
;
3475 TRACE("(%p %p)\n", This
, line_number
);
3478 return E_INVALIDARG
;
3480 switch (reader_get_nodetype(This
))
3482 case XmlNodeType_Element
:
3483 case XmlNodeType_EndElement
:
3484 element
= reader_get_element(This
);
3485 *line_number
= element
->position
.line_number
;
3487 case XmlNodeType_Attribute
:
3488 *line_number
= This
->attr
->position
.line_number
;
3490 case XmlNodeType_Whitespace
:
3491 case XmlNodeType_XmlDeclaration
:
3492 *line_number
= This
->empty_element
.position
.line_number
;
3495 *line_number
= This
->position
.line_number
;
3499 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3502 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*line_position
)
3504 xmlreader
*This
= impl_from_IXmlReader(iface
);
3505 const struct element
*element
;
3507 TRACE("(%p %p)\n", This
, line_position
);
3510 return E_INVALIDARG
;
3512 switch (reader_get_nodetype(This
))
3514 case XmlNodeType_Element
:
3515 case XmlNodeType_EndElement
:
3516 element
= reader_get_element(This
);
3517 *line_position
= element
->position
.line_position
;
3519 case XmlNodeType_Attribute
:
3520 *line_position
= This
->attr
->position
.line_position
;
3522 case XmlNodeType_Whitespace
:
3523 case XmlNodeType_XmlDeclaration
:
3524 *line_position
= This
->empty_element
.position
.line_position
;
3527 *line_position
= This
->position
.line_position
;
3531 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3534 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
3536 xmlreader
*This
= impl_from_IXmlReader(iface
);
3538 TRACE("(%p)->(%p)\n", This
, count
);
3540 if (!count
) return E_INVALIDARG
;
3542 *count
= This
->attr_count
;
3546 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
3548 xmlreader
*This
= impl_from_IXmlReader(iface
);
3549 TRACE("(%p)->(%p)\n", This
, depth
);
3550 *depth
= This
->depth
;
3554 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
3556 xmlreader
*This
= impl_from_IXmlReader(iface
);
3557 TRACE("(%p)\n", iface
);
3558 return This
->state
== XmlReadState_EndOfFile
;
3561 static const struct IXmlReaderVtbl xmlreader_vtbl
=
3563 xmlreader_QueryInterface
,
3567 xmlreader_GetProperty
,
3568 xmlreader_SetProperty
,
3570 xmlreader_GetNodeType
,
3571 xmlreader_MoveToFirstAttribute
,
3572 xmlreader_MoveToNextAttribute
,
3573 xmlreader_MoveToAttributeByName
,
3574 xmlreader_MoveToElement
,
3575 xmlreader_GetQualifiedName
,
3576 xmlreader_GetNamespaceUri
,
3577 xmlreader_GetLocalName
,
3578 xmlreader_GetPrefix
,
3580 xmlreader_ReadValueChunk
,
3581 xmlreader_GetBaseUri
,
3582 xmlreader_IsDefault
,
3583 xmlreader_IsEmptyElement
,
3584 xmlreader_GetLineNumber
,
3585 xmlreader_GetLinePosition
,
3586 xmlreader_GetAttributeCount
,
3591 /** IXmlReaderInput **/
3592 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
3594 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3596 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
3598 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
3599 IsEqualGUID(riid
, &IID_IUnknown
))
3605 WARN("interface %s not implemented\n", debugstr_guid(riid
));
3607 return E_NOINTERFACE
;
3610 IUnknown_AddRef(iface
);
3615 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
3617 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3618 ULONG ref
= InterlockedIncrement(&This
->ref
);
3619 TRACE("(%p)->(%d)\n", This
, ref
);
3623 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
3625 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3626 LONG ref
= InterlockedDecrement(&This
->ref
);
3628 TRACE("(%p)->(%d)\n", This
, ref
);
3632 IMalloc
*imalloc
= This
->imalloc
;
3633 if (This
->input
) IUnknown_Release(This
->input
);
3634 if (This
->stream
) ISequentialStream_Release(This
->stream
);
3635 if (This
->buffer
) free_input_buffer(This
->buffer
);
3636 readerinput_free(This
, This
->baseuri
);
3637 readerinput_free(This
, This
);
3638 if (imalloc
) IMalloc_Release(imalloc
);
3644 static const struct IUnknownVtbl xmlreaderinputvtbl
=
3646 xmlreaderinput_QueryInterface
,
3647 xmlreaderinput_AddRef
,
3648 xmlreaderinput_Release
3651 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
3657 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
3660 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
3662 reader
= heap_alloc(sizeof(*reader
));
3664 return E_OUTOFMEMORY
;
3666 memset(reader
, 0, sizeof(*reader
));
3667 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
3669 reader
->state
= XmlReadState_Closed
;
3670 reader
->instate
= XmlReadInState_Initial
;
3671 reader
->resumestate
= XmlReadResumeState_Initial
;
3672 reader
->dtdmode
= DtdProcessing_Prohibit
;
3673 reader
->imalloc
= imalloc
;
3674 if (imalloc
) IMalloc_AddRef(imalloc
);
3675 reader
->nodetype
= XmlNodeType_None
;
3676 list_init(&reader
->attrs
);
3677 list_init(&reader
->nsdef
);
3678 list_init(&reader
->ns
);
3679 list_init(&reader
->elements
);
3680 reader
->max_depth
= 256;
3682 reader
->chunk_read_off
= 0;
3683 for (i
= 0; i
< StringValue_Last
; i
++)
3684 reader
->strvalues
[i
] = strval_empty
;
3686 hr
= IXmlReader_QueryInterface(&reader
->IXmlReader_iface
, riid
, obj
);
3687 IXmlReader_Release(&reader
->IXmlReader_iface
);
3689 TRACE("returning iface %p, hr %#x\n", *obj
, hr
);
3694 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
3699 IXmlReaderInput
**ppInput
)
3701 xmlreaderinput
*readerinput
;
3704 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3705 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3707 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3710 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3712 readerinput
= heap_alloc(sizeof(*readerinput
));
3713 if(!readerinput
) return E_OUTOFMEMORY
;
3715 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3716 readerinput
->ref
= 1;
3717 readerinput
->imalloc
= imalloc
;
3718 readerinput
->stream
= NULL
;
3719 if (imalloc
) IMalloc_AddRef(imalloc
);
3720 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3721 readerinput
->hint
= hint
;
3722 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3723 readerinput
->pending
= 0;
3725 hr
= alloc_input_buffer(readerinput
);
3728 readerinput_free(readerinput
, readerinput
->baseuri
);
3729 readerinput_free(readerinput
, readerinput
);
3730 if (imalloc
) IMalloc_Release(imalloc
);
3733 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3735 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3737 TRACE("returning iface %p\n", *ppInput
);