2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
31 #include "xmllite_private.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
35 #include "wine/unicode.h"
37 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
39 /* not defined in public headers */
40 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
44 XmlReadInState_Initial
,
45 XmlReadInState_XmlDecl
,
46 XmlReadInState_Misc_DTD
,
48 XmlReadInState_DTD_Misc
,
49 XmlReadInState_Element
,
50 XmlReadInState_Content
,
51 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
53 } XmlReaderInternalState
;
55 /* This state denotes where parsing was interrupted by input problem.
56 Reader resumes parsing using this information. */
59 XmlReadResumeState_Initial
,
60 XmlReadResumeState_PITarget
,
61 XmlReadResumeState_PIBody
,
62 XmlReadResumeState_CDATA
,
63 XmlReadResumeState_Comment
,
64 XmlReadResumeState_STag
,
65 XmlReadResumeState_CharData
,
66 XmlReadResumeState_Whitespace
67 } XmlReaderResumeState
;
69 /* saved pointer index to resume from particular input position */
72 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
73 XmlReadResume_Local
, /* local for QName */
74 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
80 StringValue_LocalName
,
82 StringValue_QualifiedName
,
85 } XmlReaderStringValue
;
87 static const WCHAR usasciiW
[] = {'U','S','-','A','S','C','I','I',0};
88 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
89 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
91 static const WCHAR dblquoteW
[] = {'\"',0};
92 static const WCHAR quoteW
[] = {'\'',0};
93 static const WCHAR ltW
[] = {'<',0};
94 static const WCHAR gtW
[] = {'>',0};
95 static const WCHAR commentW
[] = {'<','!','-','-',0};
96 static const WCHAR piW
[] = {'<','?',0};
98 static BOOL
is_namestartchar(WCHAR ch
);
100 static const char *debugstr_nodetype(XmlNodeType nodetype
)
102 static const char * const type_names
[] =
111 "ProcessingInstruction",
124 if (nodetype
> _XmlNodeType_Last
)
125 return wine_dbg_sprintf("unknown type=%d", nodetype
);
127 return type_names
[nodetype
];
130 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
132 static const char * const prop_names
[] =
144 if (prop
> _XmlReaderProperty_Last
)
145 return wine_dbg_sprintf("unknown property=%d", prop
);
147 return prop_names
[prop
];
150 struct xml_encoding_data
157 static const struct xml_encoding_data xml_encoding_map
[] = {
158 { usasciiW
, XmlEncoding_USASCII
, 20127 },
159 { utf16W
, XmlEncoding_UTF16
, ~0 },
160 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
},
163 const WCHAR
*get_encoding_name(xml_encoding encoding
)
165 return xml_encoding_map
[encoding
].name
;
168 xml_encoding
get_encoding_from_codepage(UINT codepage
)
171 for (i
= 0; i
< sizeof(xml_encoding_map
)/sizeof(xml_encoding_map
[0]); i
++)
173 if (xml_encoding_map
[i
].cp
== codepage
) return xml_encoding_map
[i
].enc
;
175 return XmlEncoding_Unknown
;
182 unsigned int allocated
;
183 unsigned int written
;
187 typedef struct input_buffer input_buffer
;
191 IXmlReaderInput IXmlReaderInput_iface
;
193 /* reference passed on IXmlReaderInput creation, is kept when input is created */
196 xml_encoding encoding
;
199 /* stream reference set after SetInput() call from reader,
200 stored as sequential stream, cause currently
201 optimizations possible with IStream aren't implemented */
202 ISequentialStream
*stream
;
203 input_buffer
*buffer
;
204 unsigned int pending
: 1;
207 static const struct IUnknownVtbl xmlreaderinputvtbl
;
209 /* Structure to hold parsed string of specific length.
211 Reader stores node value as 'start' pointer, on request
212 a null-terminated version of it is allocated.
214 To init a strval variable use reader_init_strval(),
215 to set strval as a reader value use reader_set_strval().
219 WCHAR
*str
; /* allocated null-terminated string */
220 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
221 UINT start
; /* input position where value starts */
224 static WCHAR emptyW
[] = {0};
225 static WCHAR xmlW
[] = {'x','m','l',0};
226 static WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
227 static const strval strval_empty
= { emptyW
};
228 static const strval strval_xml
= { xmlW
, 3 };
229 static const strval strval_xmlns
= { xmlnsW
, 5 };
231 struct reader_position
239 ATTRIBUTE_NS_DEFINITION
= 0x1,
240 ATTRIBUTE_DEFAULT_NS_DEFINITION
= 0x2,
250 struct reader_position position
;
260 struct reader_position position
;
268 struct element
*element
;
273 IXmlReader IXmlReader_iface
;
275 xmlreaderinput
*input
;
278 HRESULT error
; /* error set on XmlReadState_Error */
279 XmlReaderInternalState instate
;
280 XmlReaderResumeState resumestate
;
281 XmlNodeType nodetype
;
282 DtdProcessing dtdmode
;
283 IXmlResolver
*resolver
;
285 struct reader_position position
;
286 struct list attrs
; /* attributes list for current node */
287 struct attribute
*attr
; /* current attribute */
291 struct list elements
;
293 strval strvalues
[StringValue_Last
];
296 BOOL is_empty_element
;
297 struct element empty_element
; /* used for empty elements without end tag <a />,
298 and to keep <?xml reader position */
299 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
304 encoded_buffer utf16
;
305 encoded_buffer encoded
;
307 xmlreaderinput
*input
;
310 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
312 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
315 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
317 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
320 /* reader memory allocation functions */
321 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
323 return m_alloc(reader
->imalloc
, len
);
326 static inline void *reader_alloc_zero(xmlreader
*reader
, size_t len
)
328 void *ret
= reader_alloc(reader
, len
);
334 static inline void reader_free(xmlreader
*reader
, void *mem
)
336 m_free(reader
->imalloc
, mem
);
339 /* Just return pointer from offset, no attempt to read more. */
340 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
342 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
343 return (WCHAR
*)buffer
->data
+ offset
;
346 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
348 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
351 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
355 if (src
->str
!= strval_empty
.str
)
357 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
358 if (!dest
->str
) return E_OUTOFMEMORY
;
359 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
360 dest
->str
[dest
->len
] = 0;
367 /* reader input memory allocation functions */
368 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
370 return m_alloc(input
->imalloc
, len
);
373 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
375 return m_realloc(input
->imalloc
, mem
, len
);
378 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
380 m_free(input
->imalloc
, mem
);
383 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
390 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
391 ret
= readerinput_alloc(input
, size
);
392 if (ret
) memcpy(ret
, str
, size
);
398 /* This one frees stored string value if needed */
399 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
401 if (v
->str
!= strval_empty
.str
)
403 reader_free(reader
, v
->str
);
408 static void reader_clear_attrs(xmlreader
*reader
)
410 struct attribute
*attr
, *attr2
;
411 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
413 reader_free_strvalued(reader
, &attr
->localname
);
414 reader_free_strvalued(reader
, &attr
->value
);
415 reader_free(reader
, attr
);
417 list_init(&reader
->attrs
);
418 reader
->attr_count
= 0;
422 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
423 while we are on a node with attributes */
424 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*prefix
, strval
*localname
, strval
*qname
,
425 strval
*value
, const struct reader_position
*position
, unsigned int flags
)
427 struct attribute
*attr
;
430 attr
= reader_alloc(reader
, sizeof(*attr
));
431 if (!attr
) return E_OUTOFMEMORY
;
433 hr
= reader_strvaldup(reader
, localname
, &attr
->localname
);
436 hr
= reader_strvaldup(reader
, value
, &attr
->value
);
438 reader_free_strvalued(reader
, &attr
->value
);
442 reader_free(reader
, attr
);
447 attr
->prefix
= *prefix
;
449 memset(&attr
->prefix
, 0, sizeof(attr
->prefix
));
450 attr
->qname
= qname
? *qname
: *localname
;
451 attr
->position
= *position
;
453 list_add_tail(&reader
->attrs
, &attr
->entry
);
454 reader
->attr_count
++;
459 /* Returns current element, doesn't check if reader is actually positioned on it. */
460 static struct element
*reader_get_element(xmlreader
*reader
)
462 if (reader
->is_empty_element
)
463 return &reader
->empty_element
;
465 return LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
468 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
475 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
477 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
480 /* used to initialize from constant string */
481 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
488 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
490 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
493 static void reader_free_strvalues(xmlreader
*reader
)
496 for (type
= 0; type
< StringValue_Last
; type
++)
497 reader_free_strvalue(reader
, type
);
500 /* This helper should only be used to test if strings are the same,
501 it doesn't try to sort. */
502 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
504 if (str1
->len
!= str2
->len
) return 0;
505 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
508 static void reader_clear_elements(xmlreader
*reader
)
510 struct element
*elem
, *elem2
;
511 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
513 reader_free_strvalued(reader
, &elem
->prefix
);
514 reader_free_strvalued(reader
, &elem
->localname
);
515 reader_free_strvalued(reader
, &elem
->qname
);
516 reader_free(reader
, elem
);
518 list_init(&reader
->elements
);
519 reader_free_strvalued(reader
, &reader
->empty_element
.localname
);
520 reader_free_strvalued(reader
, &reader
->empty_element
.qname
);
521 reader
->is_empty_element
= FALSE
;
524 static HRESULT
reader_inc_depth(xmlreader
*reader
)
526 return (++reader
->depth
>= reader
->max_depth
&& reader
->max_depth
) ? SC_E_MAXELEMENTDEPTH
: S_OK
;
529 static void reader_dec_depth(xmlreader
*reader
)
535 static HRESULT
reader_push_ns(xmlreader
*reader
, const strval
*prefix
, const strval
*uri
, BOOL def
)
540 ns
= reader_alloc(reader
, sizeof(*ns
));
541 if (!ns
) return E_OUTOFMEMORY
;
544 memset(&ns
->prefix
, 0, sizeof(ns
->prefix
));
546 hr
= reader_strvaldup(reader
, prefix
, &ns
->prefix
);
548 reader_free(reader
, ns
);
553 hr
= reader_strvaldup(reader
, uri
, &ns
->uri
);
555 reader_free_strvalued(reader
, &ns
->prefix
);
556 reader_free(reader
, ns
);
561 list_add_head(def
? &reader
->nsdef
: &reader
->ns
, &ns
->entry
);
565 static void reader_free_element(xmlreader
*reader
, struct element
*element
)
567 reader_free_strvalued(reader
, &element
->prefix
);
568 reader_free_strvalued(reader
, &element
->localname
);
569 reader_free_strvalued(reader
, &element
->qname
);
570 reader_free(reader
, element
);
573 static void reader_mark_ns_nodes(xmlreader
*reader
, struct element
*element
)
577 LIST_FOR_EACH_ENTRY(ns
, &reader
->ns
, struct ns
, entry
) {
580 ns
->element
= element
;
583 LIST_FOR_EACH_ENTRY(ns
, &reader
->nsdef
, struct ns
, entry
) {
586 ns
->element
= element
;
590 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*prefix
, strval
*localname
,
591 strval
*qname
, const struct reader_position
*position
)
593 struct element
*element
;
596 element
= reader_alloc_zero(reader
, sizeof(*element
));
598 return E_OUTOFMEMORY
;
600 if ((hr
= reader_strvaldup(reader
, prefix
, &element
->prefix
)) == S_OK
&&
601 (hr
= reader_strvaldup(reader
, localname
, &element
->localname
)) == S_OK
&&
602 (hr
= reader_strvaldup(reader
, qname
, &element
->qname
)) == S_OK
)
604 list_add_head(&reader
->elements
, &element
->entry
);
605 reader_mark_ns_nodes(reader
, element
);
606 reader
->is_empty_element
= FALSE
;
607 element
->position
= *position
;
610 reader_free_element(reader
, element
);
615 static void reader_pop_ns_nodes(xmlreader
*reader
, struct element
*element
)
619 LIST_FOR_EACH_ENTRY_SAFE_REV(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
620 if (ns
->element
!= element
)
623 list_remove(&ns
->entry
);
624 reader_free_strvalued(reader
, &ns
->prefix
);
625 reader_free_strvalued(reader
, &ns
->uri
);
626 reader_free(reader
, ns
);
629 if (!list_empty(&reader
->nsdef
)) {
630 ns
= LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
631 if (ns
->element
== element
) {
632 list_remove(&ns
->entry
);
633 reader_free_strvalued(reader
, &ns
->prefix
);
634 reader_free_strvalued(reader
, &ns
->uri
);
635 reader_free(reader
, ns
);
640 static void reader_pop_element(xmlreader
*reader
)
642 struct element
*element
;
644 if (list_empty(&reader
->elements
))
647 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
648 list_remove(&element
->entry
);
650 reader_pop_ns_nodes(reader
, element
);
651 reader_free_element(reader
, element
);
653 /* It was a root element, the rest is expected as Misc */
654 if (list_empty(&reader
->elements
))
655 reader
->instate
= XmlReadInState_MiscEnd
;
658 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
659 means node value is to be determined. */
660 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
662 strval
*v
= &reader
->strvalues
[type
];
664 reader_free_strvalue(reader
, type
);
673 if (value
->str
== strval_empty
.str
)
677 if (type
== StringValue_Value
)
679 /* defer allocation for value string */
681 v
->start
= value
->start
;
686 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
687 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
688 v
->str
[value
->len
] = 0;
694 static inline int is_reader_pending(xmlreader
*reader
)
696 return reader
->input
->pending
;
699 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
701 const int initial_len
= 0x2000;
702 buffer
->data
= readerinput_alloc(input
, initial_len
);
703 if (!buffer
->data
) return E_OUTOFMEMORY
;
705 memset(buffer
->data
, 0, 4);
707 buffer
->allocated
= initial_len
;
709 buffer
->prev_cr
= FALSE
;
714 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
716 readerinput_free(input
, buffer
->data
);
719 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
721 if (encoding
== XmlEncoding_Unknown
)
723 FIXME("unsupported encoding %d\n", encoding
);
727 *cp
= xml_encoding_map
[encoding
].cp
;
732 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
736 if (!name
) return XmlEncoding_Unknown
;
739 max
= sizeof(xml_encoding_map
)/sizeof(xml_encoding_map
[0]) - 1;
746 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
748 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
750 return xml_encoding_map
[n
].enc
;
758 return XmlEncoding_Unknown
;
761 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
763 input_buffer
*buffer
;
766 input
->buffer
= NULL
;
768 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
769 if (!buffer
) return E_OUTOFMEMORY
;
771 buffer
->input
= input
;
772 buffer
->code_page
= ~0; /* code page is unknown at this point */
773 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
775 readerinput_free(input
, buffer
);
779 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
781 free_encoded_buffer(input
, &buffer
->utf16
);
782 readerinput_free(input
, buffer
);
786 input
->buffer
= buffer
;
790 static void free_input_buffer(input_buffer
*buffer
)
792 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
793 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
794 readerinput_free(buffer
->input
, buffer
);
797 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
799 if (readerinput
->stream
) {
800 ISequentialStream_Release(readerinput
->stream
);
801 readerinput
->stream
= NULL
;
805 /* Queries already stored interface for IStream/ISequentialStream.
806 Interface supplied on creation will be overwritten */
807 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
811 readerinput_release_stream(readerinput
);
812 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
814 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
819 /* reads a chunk to raw buffer */
820 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
822 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
823 /* to make sure aligned length won't exceed allocated length */
824 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
828 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
829 variable width encodings like UTF-8 */
830 len
= (len
+ 3) & ~3;
831 /* try to use allocated space or grow */
832 if (buffer
->allocated
- buffer
->written
< len
)
834 buffer
->allocated
*= 2;
835 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
836 len
= buffer
->allocated
- buffer
->written
;
840 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
841 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
842 readerinput
->pending
= hr
== E_PENDING
;
843 if (FAILED(hr
)) return hr
;
844 buffer
->written
+= read
;
849 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
850 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
852 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
854 length
*= sizeof(WCHAR
);
855 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
856 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
858 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
859 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
860 buffer
->allocated
= grown_size
;
864 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
866 static const char startA
[] = {'<','?'};
867 static const char commentA
[] = {'<','!'};
868 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
869 unsigned char *ptr
= (unsigned char*)buffer
->data
;
871 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
872 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
873 /* test start byte */
876 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
877 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
878 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
879 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
883 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
885 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
886 static const char utf8bom
[] = {0xef,0xbb,0xbf};
887 static const char utf16lebom
[] = {0xff,0xfe};
890 *enc
= XmlEncoding_Unknown
;
892 if (buffer
->written
<= 3)
894 HRESULT hr
= readerinput_growraw(readerinput
);
895 if (FAILED(hr
)) return hr
;
896 if (buffer
->written
< 3) return MX_E_INPUTEND
;
899 ptrW
= (WCHAR
*)buffer
->data
;
900 /* try start symbols if we have enough data to do that, input buffer should contain
901 first chunk already */
902 if (readerinput_is_utf8(readerinput
))
903 *enc
= XmlEncoding_UTF8
;
904 else if (*ptrW
== '<')
907 if (*ptrW
== '?' || *ptrW
== '!' || is_namestartchar(*ptrW
))
908 *enc
= XmlEncoding_UTF16
;
910 /* try with BOM now */
911 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
913 buffer
->cur
+= sizeof(utf8bom
);
914 *enc
= XmlEncoding_UTF8
;
916 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
918 buffer
->cur
+= sizeof(utf16lebom
);
919 *enc
= XmlEncoding_UTF16
;
925 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
927 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
928 int len
= buffer
->written
;
930 /* complete single byte char */
931 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
933 /* find start byte of multibyte char */
934 while (--len
&& !(buffer
->data
[len
] & 0xc0))
940 /* Returns byte length of complete char sequence for buffer code page,
941 it's relative to current buffer position which is currently used for BOM handling
943 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
945 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
948 if (readerinput
->buffer
->code_page
== CP_UTF8
)
949 len
= readerinput_get_utf8_convlen(readerinput
);
951 len
= buffer
->written
;
953 TRACE("%d\n", len
- buffer
->cur
);
954 return len
- buffer
->cur
;
957 /* It's possible that raw buffer has some leftovers from last conversion - some char
958 sequence that doesn't represent a full code point. Length argument should be calculated with
959 readerinput_get_convlen(), if it's -1 it will be calculated here. */
960 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
962 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
965 len
= readerinput_get_convlen(readerinput
);
967 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
968 /* everything below cur is lost too */
969 buffer
->written
-= len
+ buffer
->cur
;
970 /* after this point we don't need cur offset really,
971 it's used only to mark where actual data begins when first chunk is read */
975 static void fixup_buffer_cr(encoded_buffer
*buffer
, int off
)
977 BOOL prev_cr
= buffer
->prev_cr
;
981 src
= dest
= (WCHAR
*)buffer
->data
+ off
;
982 while ((const char*)src
< buffer
->data
+ buffer
->written
)
991 if(prev_cr
&& *src
== '\n')
998 buffer
->written
= (char*)dest
- buffer
->data
;
999 buffer
->prev_cr
= prev_cr
;
1003 /* note that raw buffer content is kept */
1004 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
1006 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1007 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1013 hr
= get_code_page(enc
, &cp
);
1014 if (FAILED(hr
)) return;
1016 readerinput
->buffer
->code_page
= cp
;
1017 len
= readerinput_get_convlen(readerinput
);
1019 TRACE("switching to cp %d\n", cp
);
1021 /* just copy in this case */
1022 if (enc
== XmlEncoding_UTF16
)
1024 readerinput_grow(readerinput
, len
);
1025 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
1026 dest
->written
+= len
*sizeof(WCHAR
);
1030 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1031 readerinput_grow(readerinput
, dest_len
);
1032 ptr
= (WCHAR
*)dest
->data
;
1033 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1035 dest
->written
+= dest_len
*sizeof(WCHAR
);
1038 fixup_buffer_cr(dest
, 0);
1041 /* shrinks parsed data a buffer begins with */
1042 static void reader_shrink(xmlreader
*reader
)
1044 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1046 /* avoid to move too often using threshold shrink length */
1047 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
1049 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
1050 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
1052 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
1056 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1057 It won't attempt to shrink but will grow destination buffer if needed */
1058 static HRESULT
reader_more(xmlreader
*reader
)
1060 xmlreaderinput
*readerinput
= reader
->input
;
1061 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1062 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1063 UINT cp
= readerinput
->buffer
->code_page
;
1064 int len
, dest_len
, prev_len
;
1068 /* get some raw data from stream first */
1069 hr
= readerinput_growraw(readerinput
);
1070 len
= readerinput_get_convlen(readerinput
);
1071 prev_len
= dest
->written
/ sizeof(WCHAR
);
1073 /* just copy for UTF-16 case */
1076 readerinput_grow(readerinput
, len
);
1077 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
1078 dest
->written
+= len
*sizeof(WCHAR
);
1082 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1083 readerinput_grow(readerinput
, dest_len
);
1084 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
1085 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1087 dest
->written
+= dest_len
*sizeof(WCHAR
);
1088 /* get rid of processed data */
1089 readerinput_shrinkraw(readerinput
, len
);
1092 fixup_buffer_cr(dest
, prev_len
);
1096 static inline UINT
reader_get_cur(xmlreader
*reader
)
1098 return reader
->input
->buffer
->utf16
.cur
;
1101 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
1103 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1104 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
1105 if (!*ptr
) reader_more(reader
);
1106 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
1109 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
1112 const WCHAR
*ptr
= reader_get_ptr(reader
);
1117 reader_more(reader
);
1118 ptr
= reader_get_ptr(reader
);
1120 if (str
[i
] != ptr
[i
])
1121 return ptr
[i
] - str
[i
];
1127 static void reader_update_position(xmlreader
*reader
, WCHAR ch
)
1130 reader
->position
.line_position
= 1;
1131 else if (ch
== '\n')
1133 reader
->position
.line_number
++;
1134 reader
->position
.line_position
= 1;
1137 reader
->position
.line_position
++;
1140 /* moves cursor n WCHARs forward */
1141 static void reader_skipn(xmlreader
*reader
, int n
)
1143 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1146 while (*(ptr
= reader_get_ptr(reader
)) && n
--)
1148 reader_update_position(reader
, *ptr
);
1153 static inline BOOL
is_wchar_space(WCHAR ch
)
1155 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
1158 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1159 static int reader_skipspaces(xmlreader
*reader
)
1161 const WCHAR
*ptr
= reader_get_ptr(reader
);
1162 UINT start
= reader_get_cur(reader
);
1164 while (is_wchar_space(*ptr
))
1166 reader_skipn(reader
, 1);
1167 ptr
= reader_get_ptr(reader
);
1170 return reader_get_cur(reader
) - start
;
1173 /* [26] VersionNum ::= '1.' [0-9]+ */
1174 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
1176 static const WCHAR onedotW
[] = {'1','.',0};
1180 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
1182 start
= reader_get_cur(reader
);
1184 reader_skipn(reader
, 2);
1186 ptr2
= ptr
= reader_get_ptr(reader
);
1187 while (*ptr
>= '0' && *ptr
<= '9')
1189 reader_skipn(reader
, 1);
1190 ptr
= reader_get_ptr(reader
);
1193 if (ptr2
== ptr
) return WC_E_DIGIT
;
1194 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
1195 TRACE("version=%s\n", debug_strval(reader
, val
));
1199 /* [25] Eq ::= S? '=' S? */
1200 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1202 static const WCHAR eqW
[] = {'=',0};
1203 reader_skipspaces(reader
);
1204 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
1206 reader_skipn(reader
, 1);
1207 reader_skipspaces(reader
);
1211 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1212 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1214 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
1215 struct reader_position position
;
1219 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1221 position
= reader
->position
;
1222 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
1223 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1224 /* skip 'version' */
1225 reader_skipn(reader
, 7);
1227 hr
= reader_parse_eq(reader
);
1228 if (FAILED(hr
)) return hr
;
1230 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1233 reader_skipn(reader
, 1);
1235 hr
= reader_parse_versionnum(reader
, &val
);
1236 if (FAILED(hr
)) return hr
;
1238 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1242 reader_skipn(reader
, 1);
1244 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1247 /* ([A-Za-z0-9._] | '-') */
1248 static inline BOOL
is_wchar_encname(WCHAR ch
)
1250 return ((ch
>= 'A' && ch
<= 'Z') ||
1251 (ch
>= 'a' && ch
<= 'z') ||
1252 (ch
>= '0' && ch
<= '9') ||
1253 (ch
== '.') || (ch
== '_') ||
1257 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1258 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1260 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1264 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1265 return WC_E_ENCNAME
;
1267 val
->start
= reader_get_cur(reader
);
1270 while (is_wchar_encname(*++ptr
))
1274 enc
= parse_encoding_name(start
, len
);
1275 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1279 if (enc
== XmlEncoding_Unknown
)
1280 return WC_E_ENCNAME
;
1282 /* skip encoding name */
1283 reader_skipn(reader
, len
);
1287 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1288 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1290 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1291 struct reader_position position
;
1295 if (!reader_skipspaces(reader
)) return S_FALSE
;
1297 position
= reader
->position
;
1298 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1299 name
.str
= reader_get_ptr(reader
);
1300 name
.start
= reader_get_cur(reader
);
1302 /* skip 'encoding' */
1303 reader_skipn(reader
, 8);
1305 hr
= reader_parse_eq(reader
);
1306 if (FAILED(hr
)) return hr
;
1308 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1311 reader_skipn(reader
, 1);
1313 hr
= reader_parse_encname(reader
, &val
);
1314 if (FAILED(hr
)) return hr
;
1316 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1320 reader_skipn(reader
, 1);
1322 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1325 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1326 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1328 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1329 static const WCHAR yesW
[] = {'y','e','s',0};
1330 static const WCHAR noW
[] = {'n','o',0};
1331 struct reader_position position
;
1336 if (!reader_skipspaces(reader
)) return S_FALSE
;
1338 position
= reader
->position
;
1339 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1340 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1341 /* skip 'standalone' */
1342 reader_skipn(reader
, 10);
1344 hr
= reader_parse_eq(reader
);
1345 if (FAILED(hr
)) return hr
;
1347 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1350 reader_skipn(reader
, 1);
1352 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1353 return WC_E_XMLDECL
;
1355 start
= reader_get_cur(reader
);
1356 /* skip 'yes'|'no' */
1357 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1358 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1359 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1361 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1364 reader_skipn(reader
, 1);
1366 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1369 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1370 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1372 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1373 static const WCHAR declcloseW
[] = {'?','>',0};
1374 struct reader_position position
;
1377 /* check if we have "<?xml " */
1378 if (reader_cmp(reader
, xmldeclW
))
1381 reader_skipn(reader
, 2);
1382 position
= reader
->position
;
1383 reader_skipn(reader
, 3);
1384 hr
= reader_parse_versioninfo(reader
);
1388 hr
= reader_parse_encdecl(reader
);
1392 hr
= reader_parse_sddecl(reader
);
1396 reader_skipspaces(reader
);
1397 if (reader_cmp(reader
, declcloseW
))
1398 return WC_E_XMLDECL
;
1401 reader_skipn(reader
, 2);
1403 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1404 reader
->empty_element
.position
= position
;
1405 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_xml
);
1406 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_xml
);
1411 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1412 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1417 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1419 start
= reader
->resume
[XmlReadResume_Body
];
1420 ptr
= reader_get_ptr(reader
);
1425 reader_skipn(reader
, 4);
1426 reader_shrink(reader
);
1427 ptr
= reader_get_ptr(reader
);
1428 start
= reader_get_cur(reader
);
1429 reader
->nodetype
= XmlNodeType_Comment
;
1430 reader
->resume
[XmlReadResume_Body
] = start
;
1431 reader
->resumestate
= XmlReadResumeState_Comment
;
1432 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1435 /* will exit when there's no more data, it won't attempt to
1436 read more from stream */
1447 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1448 TRACE("%s\n", debug_strval(reader
, &value
));
1450 /* skip rest of markup '->' */
1451 reader_skipn(reader
, 3);
1453 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1454 reader
->resume
[XmlReadResume_Body
] = 0;
1455 reader
->resumestate
= XmlReadResumeState_Initial
;
1459 return WC_E_COMMENT
;
1463 reader_skipn(reader
, 1);
1470 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1471 static inline BOOL
is_char(WCHAR ch
)
1473 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1474 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1475 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1476 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1477 (ch
>= 0xe000 && ch
<= 0xfffd);
1480 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1481 static inline BOOL
is_pubchar(WCHAR ch
)
1483 return (ch
== ' ') ||
1484 (ch
>= 'a' && ch
<= 'z') ||
1485 (ch
>= 'A' && ch
<= 'Z') ||
1486 (ch
>= '0' && ch
<= '9') ||
1487 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1488 (ch
== '=') || (ch
== '?') ||
1489 (ch
== '@') || (ch
== '!') ||
1490 (ch
>= '#' && ch
<= '%') || /* #$% */
1491 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1494 static inline BOOL
is_namestartchar(WCHAR ch
)
1496 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1497 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1498 (ch
>= 0xc0 && ch
<= 0xd6) ||
1499 (ch
>= 0xd8 && ch
<= 0xf6) ||
1500 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1501 (ch
>= 0x370 && ch
<= 0x37d) ||
1502 (ch
>= 0x37f && ch
<= 0x1fff) ||
1503 (ch
>= 0x200c && ch
<= 0x200d) ||
1504 (ch
>= 0x2070 && ch
<= 0x218f) ||
1505 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1506 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1507 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1508 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1509 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1510 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1513 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1514 static inline BOOL
is_ncnamechar(WCHAR ch
)
1516 return (ch
>= 'A' && ch
<= 'Z') ||
1517 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1518 (ch
== '-') || (ch
== '.') ||
1519 (ch
>= '0' && ch
<= '9') ||
1521 (ch
>= 0xc0 && ch
<= 0xd6) ||
1522 (ch
>= 0xd8 && ch
<= 0xf6) ||
1523 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1524 (ch
>= 0x300 && ch
<= 0x36f) ||
1525 (ch
>= 0x370 && ch
<= 0x37d) ||
1526 (ch
>= 0x37f && ch
<= 0x1fff) ||
1527 (ch
>= 0x200c && ch
<= 0x200d) ||
1528 (ch
>= 0x203f && ch
<= 0x2040) ||
1529 (ch
>= 0x2070 && ch
<= 0x218f) ||
1530 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1531 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1532 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1533 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1534 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1535 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1538 static inline BOOL
is_namechar(WCHAR ch
)
1540 return (ch
== ':') || is_ncnamechar(ch
);
1543 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1545 /* When we're on attribute always return attribute type, container node type is kept.
1546 Note that container is not necessarily an element, and attribute doesn't mean it's
1547 an attribute in XML spec terms. */
1548 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1551 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1552 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1553 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1554 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1555 [5] Name ::= NameStartChar (NameChar)* */
1556 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1561 if (reader
->resume
[XmlReadResume_Name
])
1563 start
= reader
->resume
[XmlReadResume_Name
];
1564 ptr
= reader_get_ptr(reader
);
1568 ptr
= reader_get_ptr(reader
);
1569 start
= reader_get_cur(reader
);
1570 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1573 while (is_namechar(*ptr
))
1575 reader_skipn(reader
, 1);
1576 ptr
= reader_get_ptr(reader
);
1579 if (is_reader_pending(reader
))
1581 reader
->resume
[XmlReadResume_Name
] = start
;
1585 reader
->resume
[XmlReadResume_Name
] = 0;
1587 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1588 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1593 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1594 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1596 static const WCHAR xmlW
[] = {'x','m','l'};
1597 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1603 hr
= reader_parse_name(reader
, &name
);
1604 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1606 /* now that we got name check for illegal content */
1607 if (strval_eq(reader
, &name
, &xmlval
))
1608 return WC_E_LEADINGXML
;
1610 /* PITarget can't be a qualified name */
1611 ptr
= reader_get_strptr(reader
, &name
);
1612 for (i
= 0; i
< name
.len
; i
++)
1614 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1616 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1621 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1622 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1629 switch (reader
->resumestate
)
1631 case XmlReadResumeState_Initial
:
1633 reader_skipn(reader
, 2);
1634 reader_shrink(reader
);
1635 reader
->resumestate
= XmlReadResumeState_PITarget
;
1636 case XmlReadResumeState_PITarget
:
1637 hr
= reader_parse_pitarget(reader
, &target
);
1638 if (FAILED(hr
)) return hr
;
1639 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1640 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1641 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1642 reader
->resumestate
= XmlReadResumeState_PIBody
;
1643 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1648 start
= reader
->resume
[XmlReadResume_Body
];
1649 ptr
= reader_get_ptr(reader
);
1656 UINT cur
= reader_get_cur(reader
);
1659 /* strip all leading whitespace chars */
1662 ptr
= reader_get_ptr2(reader
, start
);
1663 if (!is_wchar_space(*ptr
)) break;
1667 reader_init_strvalue(start
, cur
-start
, &value
);
1670 reader_skipn(reader
, 2);
1671 TRACE("%s\n", debug_strval(reader
, &value
));
1672 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1673 reader
->resumestate
= XmlReadResumeState_Initial
;
1674 reader
->resume
[XmlReadResume_Body
] = 0;
1675 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1680 reader_skipn(reader
, 1);
1681 ptr
= reader_get_ptr(reader
);
1687 /* This one is used to parse significant whitespace nodes, like in Misc production */
1688 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1690 switch (reader
->resumestate
)
1692 case XmlReadResumeState_Initial
:
1693 reader_shrink(reader
);
1694 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1695 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1696 reader
->nodetype
= XmlNodeType_Whitespace
;
1697 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1698 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1699 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1701 case XmlReadResumeState_Whitespace
:
1706 reader_skipspaces(reader
);
1707 if (is_reader_pending(reader
)) return S_OK
;
1709 start
= reader
->resume
[XmlReadResume_Body
];
1710 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1711 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1712 TRACE("%s\n", debug_strval(reader
, &value
));
1713 reader
->resumestate
= XmlReadResumeState_Initial
;
1722 /* [27] Misc ::= Comment | PI | S */
1723 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1725 HRESULT hr
= S_FALSE
;
1727 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1729 hr
= reader_more(reader
);
1730 if (FAILED(hr
)) return hr
;
1732 /* finish current node */
1733 switch (reader
->resumestate
)
1735 case XmlReadResumeState_PITarget
:
1736 case XmlReadResumeState_PIBody
:
1737 return reader_parse_pi(reader
);
1738 case XmlReadResumeState_Comment
:
1739 return reader_parse_comment(reader
);
1740 case XmlReadResumeState_Whitespace
:
1741 return reader_parse_whitespace(reader
);
1743 ERR("unknown resume state %d\n", reader
->resumestate
);
1749 const WCHAR
*cur
= reader_get_ptr(reader
);
1751 if (is_wchar_space(*cur
))
1752 hr
= reader_parse_whitespace(reader
);
1753 else if (!reader_cmp(reader
, commentW
))
1754 hr
= reader_parse_comment(reader
);
1755 else if (!reader_cmp(reader
, piW
))
1756 hr
= reader_parse_pi(reader
);
1760 if (hr
!= S_FALSE
) return hr
;
1766 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1767 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1769 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1772 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1775 reader_skipn(reader
, 1);
1777 cur
= reader_get_ptr(reader
);
1778 start
= reader_get_cur(reader
);
1779 while (is_char(*cur
) && *cur
!= quote
)
1781 reader_skipn(reader
, 1);
1782 cur
= reader_get_ptr(reader
);
1784 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1785 if (*cur
== quote
) reader_skipn(reader
, 1);
1787 TRACE("%s\n", debug_strval(reader
, literal
));
1791 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1792 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1793 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1795 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1798 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1801 reader_skipn(reader
, 1);
1803 start
= reader_get_cur(reader
);
1804 cur
= reader_get_ptr(reader
);
1805 while (is_pubchar(*cur
) && *cur
!= quote
)
1807 reader_skipn(reader
, 1);
1808 cur
= reader_get_ptr(reader
);
1810 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1811 if (*cur
== quote
) reader_skipn(reader
, 1);
1813 TRACE("%s\n", debug_strval(reader
, literal
));
1817 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1818 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1820 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1821 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1822 struct reader_position position
= reader
->position
;
1827 if (!reader_cmp(reader
, publicW
)) {
1831 reader_skipn(reader
, 6);
1832 cnt
= reader_skipspaces(reader
);
1833 if (!cnt
) return WC_E_WHITESPACE
;
1835 hr
= reader_parse_pub_literal(reader
, &pub
);
1836 if (FAILED(hr
)) return hr
;
1838 reader_init_cstrvalue(publicW
, strlenW(publicW
), &name
);
1839 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &pub
, &position
, 0);
1840 if (FAILED(hr
)) return hr
;
1842 cnt
= reader_skipspaces(reader
);
1843 if (!cnt
) return S_OK
;
1845 /* optional system id */
1846 hr
= reader_parse_sys_literal(reader
, &sys
);
1847 if (FAILED(hr
)) return S_OK
;
1849 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1850 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1851 if (FAILED(hr
)) return hr
;
1854 } else if (!reader_cmp(reader
, systemW
)) {
1856 reader_skipn(reader
, 6);
1857 cnt
= reader_skipspaces(reader
);
1858 if (!cnt
) return WC_E_WHITESPACE
;
1860 hr
= reader_parse_sys_literal(reader
, &sys
);
1861 if (FAILED(hr
)) return hr
;
1863 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1864 return reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1870 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1871 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1873 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1878 /* check if we have "<!DOCTYPE" */
1879 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1880 reader_shrink(reader
);
1882 /* DTD processing is not allowed by default */
1883 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1885 reader_skipn(reader
, 9);
1886 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1889 hr
= reader_parse_name(reader
, &name
);
1890 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1892 reader_skipspaces(reader
);
1894 hr
= reader_parse_externalid(reader
);
1895 if (FAILED(hr
)) return hr
;
1897 reader_skipspaces(reader
);
1899 cur
= reader_get_ptr(reader
);
1902 FIXME("internal subset parsing not implemented\n");
1907 reader_skipn(reader
, 1);
1909 reader
->nodetype
= XmlNodeType_DocumentType
;
1910 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1911 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1916 /* [11 NS] LocalPart ::= NCName */
1917 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
, BOOL check_for_separator
)
1922 if (reader
->resume
[XmlReadResume_Local
])
1924 start
= reader
->resume
[XmlReadResume_Local
];
1925 ptr
= reader_get_ptr(reader
);
1929 ptr
= reader_get_ptr(reader
);
1930 start
= reader_get_cur(reader
);
1933 while (is_ncnamechar(*ptr
))
1935 reader_skipn(reader
, 1);
1936 ptr
= reader_get_ptr(reader
);
1939 if (check_for_separator
&& *ptr
== ':')
1940 return NC_E_QNAMECOLON
;
1942 if (is_reader_pending(reader
))
1944 reader
->resume
[XmlReadResume_Local
] = start
;
1948 reader
->resume
[XmlReadResume_Local
] = 0;
1950 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1955 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1956 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1957 [9 NS] UnprefixedName ::= LocalPart
1958 [10 NS] Prefix ::= NCName */
1959 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1965 if (reader
->resume
[XmlReadResume_Name
])
1967 start
= reader
->resume
[XmlReadResume_Name
];
1968 ptr
= reader_get_ptr(reader
);
1972 ptr
= reader_get_ptr(reader
);
1973 start
= reader_get_cur(reader
);
1974 reader
->resume
[XmlReadResume_Name
] = start
;
1975 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1978 if (reader
->resume
[XmlReadResume_Local
])
1980 hr
= reader_parse_local(reader
, local
, FALSE
);
1981 if (FAILED(hr
)) return hr
;
1983 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1984 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1989 /* skip prefix part */
1990 while (is_ncnamechar(*ptr
))
1992 reader_skipn(reader
, 1);
1993 ptr
= reader_get_ptr(reader
);
1996 if (is_reader_pending(reader
)) return E_PENDING
;
1998 /* got a qualified name */
2001 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
2004 reader_skipn(reader
, 1);
2005 hr
= reader_parse_local(reader
, local
, TRUE
);
2006 if (FAILED(hr
)) return hr
;
2010 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
2011 reader_init_strvalue(0, 0, prefix
);
2016 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
2018 TRACE("ncname %s\n", debug_strval(reader
, local
));
2020 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
2022 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
2025 reader
->resume
[XmlReadResume_Name
] = 0;
2026 reader
->resume
[XmlReadResume_Local
] = 0;
2031 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
2033 static const WCHAR entltW
[] = {'l','t'};
2034 static const WCHAR entgtW
[] = {'g','t'};
2035 static const WCHAR entampW
[] = {'a','m','p'};
2036 static const WCHAR entaposW
[] = {'a','p','o','s'};
2037 static const WCHAR entquotW
[] = {'q','u','o','t'};
2038 static const strval lt
= { (WCHAR
*)entltW
, 2 };
2039 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
2040 static const strval amp
= { (WCHAR
*)entampW
, 3 };
2041 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
2042 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
2043 WCHAR
*str
= reader_get_strptr(reader
, name
);
2048 if (strval_eq(reader
, name
, <
)) return '<';
2051 if (strval_eq(reader
, name
, >
)) return '>';
2054 if (strval_eq(reader
, name
, &
))
2056 else if (strval_eq(reader
, name
, &apos
))
2060 if (strval_eq(reader
, name
, "
)) return '\"';
2069 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2070 [67] Reference ::= EntityRef | CharRef
2071 [68] EntityRef ::= '&' Name ';' */
2072 static HRESULT
reader_parse_reference(xmlreader
*reader
)
2074 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
2075 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
2076 UINT cur
= reader_get_cur(reader
);
2081 reader_skipn(reader
, 1);
2082 ptr
= reader_get_ptr(reader
);
2086 reader_skipn(reader
, 1);
2087 ptr
= reader_get_ptr(reader
);
2089 /* hex char or decimal */
2092 reader_skipn(reader
, 1);
2093 ptr
= reader_get_ptr(reader
);
2097 if ((*ptr
>= '0' && *ptr
<= '9'))
2098 ch
= ch
*16 + *ptr
- '0';
2099 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
2100 ch
= ch
*16 + *ptr
- 'a' + 10;
2101 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
2102 ch
= ch
*16 + *ptr
- 'A' + 10;
2104 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
2105 reader_skipn(reader
, 1);
2106 ptr
= reader_get_ptr(reader
);
2113 if ((*ptr
>= '0' && *ptr
<= '9'))
2115 ch
= ch
*10 + *ptr
- '0';
2116 reader_skipn(reader
, 1);
2117 ptr
= reader_get_ptr(reader
);
2120 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
2124 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
2127 if (is_wchar_space(ch
)) ch
= ' ';
2129 ptr
= reader_get_ptr(reader
);
2130 start
= reader_get_ptr2(reader
, cur
);
2131 len
= buffer
->written
- ((char *)ptr
- buffer
->data
);
2132 memmove(start
+ 1, ptr
+ 1, len
);
2134 buffer
->written
-= (reader_get_cur(reader
) - cur
) * sizeof(WCHAR
);
2135 buffer
->cur
= cur
+ 1;
2144 hr
= reader_parse_name(reader
, &name
);
2145 if (FAILED(hr
)) return hr
;
2147 ptr
= reader_get_ptr(reader
);
2148 if (*ptr
!= ';') return WC_E_SEMICOLON
;
2150 /* predefined entities resolve to a single character */
2151 ch
= get_predefined_entity(reader
, &name
);
2154 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
2155 memmove(start
+1, ptr
+1, len
);
2156 buffer
->cur
= cur
+ 1;
2157 buffer
->written
-= (ptr
- start
) * sizeof(WCHAR
);
2163 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
2164 return WC_E_UNDECLAREDENTITY
;
2172 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2173 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
2178 ptr
= reader_get_ptr(reader
);
2180 /* skip opening quote */
2182 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
2183 reader_skipn(reader
, 1);
2185 ptr
= reader_get_ptr(reader
);
2186 start
= reader_get_cur(reader
);
2189 if (*ptr
== '<') return WC_E_LESSTHAN
;
2193 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
2194 /* skip closing quote */
2195 reader_skipn(reader
, 1);
2201 HRESULT hr
= reader_parse_reference(reader
);
2202 if (FAILED(hr
)) return hr
;
2206 /* replace all whitespace chars with ' ' */
2207 if (is_wchar_space(*ptr
)) *ptr
= ' ';
2208 reader_skipn(reader
, 1);
2210 ptr
= reader_get_ptr(reader
);
2216 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2217 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2218 [3 NS] DefaultAttName ::= 'xmlns'
2219 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2220 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2222 struct reader_position position
= reader
->position
;
2223 strval prefix
, local
, qname
, value
;
2224 enum attribute_flags flags
= 0;
2227 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2228 if (FAILED(hr
)) return hr
;
2230 if (strval_eq(reader
, &prefix
, &strval_xmlns
))
2231 flags
|= ATTRIBUTE_NS_DEFINITION
;
2233 if (strval_eq(reader
, &qname
, &strval_xmlns
))
2234 flags
|= ATTRIBUTE_DEFAULT_NS_DEFINITION
;
2236 hr
= reader_parse_eq(reader
);
2237 if (FAILED(hr
)) return hr
;
2239 hr
= reader_parse_attvalue(reader
, &value
);
2240 if (FAILED(hr
)) return hr
;
2242 if (flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
2243 reader_push_ns(reader
, &local
, &value
, !!(flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
));
2245 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2246 return reader_add_attr(reader
, &prefix
, &local
, &qname
, &value
, &position
, flags
);
2249 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2250 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2251 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
2253 struct reader_position position
= reader
->position
;
2256 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2257 if (FAILED(hr
)) return hr
;
2261 static const WCHAR endW
[] = {'/','>',0};
2263 reader_skipspaces(reader
);
2266 if ((reader
->is_empty_element
= !reader_cmp(reader
, endW
)))
2268 struct element
*element
= &reader
->empty_element
;
2271 reader_skipn(reader
, 2);
2273 reader_free_strvalued(reader
, &element
->qname
);
2274 reader_free_strvalued(reader
, &element
->localname
);
2276 element
->prefix
= *prefix
;
2277 reader_strvaldup(reader
, qname
, &element
->qname
);
2278 reader_strvaldup(reader
, local
, &element
->localname
);
2279 element
->position
= position
;
2280 reader_mark_ns_nodes(reader
, element
);
2284 /* got a start tag */
2285 if (!reader_cmp(reader
, gtW
))
2288 reader_skipn(reader
, 1);
2289 return reader_push_element(reader
, prefix
, local
, qname
, &position
);
2292 hr
= reader_parse_attribute(reader
);
2293 if (FAILED(hr
)) return hr
;
2299 /* [39] element ::= EmptyElemTag | STag content ETag */
2300 static HRESULT
reader_parse_element(xmlreader
*reader
)
2304 switch (reader
->resumestate
)
2306 case XmlReadResumeState_Initial
:
2307 /* check if we are really on element */
2308 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2311 reader_skipn(reader
, 1);
2313 reader_shrink(reader
);
2314 reader
->resumestate
= XmlReadResumeState_STag
;
2315 case XmlReadResumeState_STag
:
2317 strval qname
, prefix
, local
;
2319 /* this handles empty elements too */
2320 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
);
2321 if (FAILED(hr
)) return hr
;
2323 /* FIXME: need to check for defined namespace to reject invalid prefix */
2325 /* if we got empty element and stack is empty go straight to Misc */
2326 if (reader
->is_empty_element
&& list_empty(&reader
->elements
))
2327 reader
->instate
= XmlReadInState_MiscEnd
;
2329 reader
->instate
= XmlReadInState_Content
;
2331 reader
->nodetype
= XmlNodeType_Element
;
2332 reader
->resumestate
= XmlReadResumeState_Initial
;
2333 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2334 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2335 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
2345 /* [13 NS] ETag ::= '</' QName S? '>' */
2346 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2348 struct reader_position position
;
2349 strval prefix
, local
, qname
;
2350 struct element
*element
;
2354 reader_skipn(reader
, 2);
2356 position
= reader
->position
;
2357 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2358 if (FAILED(hr
)) return hr
;
2360 reader_skipspaces(reader
);
2362 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2365 reader_skipn(reader
, 1);
2367 /* Element stack should never be empty at this point, cause we shouldn't get to
2368 content parsing if it's empty. */
2369 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2370 if (!strval_eq(reader
, &element
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2372 /* update position stored for start tag, we won't be using it */
2373 element
->position
= position
;
2375 reader
->nodetype
= XmlNodeType_EndElement
;
2376 reader
->is_empty_element
= FALSE
;
2377 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2382 /* [18] CDSect ::= CDStart CData CDEnd
2383 [19] CDStart ::= '<![CDATA['
2384 [20] CData ::= (Char* - (Char* ']]>' Char*))
2385 [21] CDEnd ::= ']]>' */
2386 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2391 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2393 start
= reader
->resume
[XmlReadResume_Body
];
2394 ptr
= reader_get_ptr(reader
);
2398 /* skip markup '<![CDATA[' */
2399 reader_skipn(reader
, 9);
2400 reader_shrink(reader
);
2401 ptr
= reader_get_ptr(reader
);
2402 start
= reader_get_cur(reader
);
2403 reader
->nodetype
= XmlNodeType_CDATA
;
2404 reader
->resume
[XmlReadResume_Body
] = start
;
2405 reader
->resumestate
= XmlReadResumeState_CDATA
;
2406 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2411 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2415 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2418 reader_skipn(reader
, 3);
2419 TRACE("%s\n", debug_strval(reader
, &value
));
2421 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2422 reader
->resume
[XmlReadResume_Body
] = 0;
2423 reader
->resumestate
= XmlReadResumeState_Initial
;
2428 reader_skipn(reader
, 1);
2436 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2437 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2439 struct reader_position position
;
2443 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2445 start
= reader
->resume
[XmlReadResume_Body
];
2446 ptr
= reader_get_ptr(reader
);
2450 reader_shrink(reader
);
2451 ptr
= reader_get_ptr(reader
);
2452 start
= reader_get_cur(reader
);
2453 /* There's no text */
2454 if (!*ptr
|| *ptr
== '<') return S_OK
;
2455 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2456 reader
->resume
[XmlReadResume_Body
] = start
;
2457 reader
->resumestate
= XmlReadResumeState_CharData
;
2458 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2461 position
= reader
->position
;
2464 static const WCHAR ampW
[] = {'&',0};
2466 /* CDATA closing sequence ']]>' is not allowed */
2467 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2468 return WC_E_CDSECTEND
;
2470 /* Found next markup part */
2475 reader
->empty_element
.position
= position
;
2476 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2477 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2478 reader
->resume
[XmlReadResume_Body
] = 0;
2479 reader
->resumestate
= XmlReadResumeState_Initial
;
2483 /* this covers a case when text has leading whitespace chars */
2484 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2486 if (!reader_cmp(reader
, ampW
))
2487 reader_parse_reference(reader
);
2489 reader_skipn(reader
, 1);
2491 ptr
= reader_get_ptr(reader
);
2497 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2498 static HRESULT
reader_parse_content(xmlreader
*reader
)
2500 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2501 static const WCHAR etagW
[] = {'<','/',0};
2503 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2505 switch (reader
->resumestate
)
2507 case XmlReadResumeState_CDATA
:
2508 return reader_parse_cdata(reader
);
2509 case XmlReadResumeState_Comment
:
2510 return reader_parse_comment(reader
);
2511 case XmlReadResumeState_PIBody
:
2512 case XmlReadResumeState_PITarget
:
2513 return reader_parse_pi(reader
);
2514 case XmlReadResumeState_CharData
:
2515 return reader_parse_chardata(reader
);
2517 ERR("unknown resume state %d\n", reader
->resumestate
);
2521 reader_shrink(reader
);
2523 /* handle end tag here, it indicates end of content as well */
2524 if (!reader_cmp(reader
, etagW
))
2525 return reader_parse_endtag(reader
);
2527 if (!reader_cmp(reader
, commentW
))
2528 return reader_parse_comment(reader
);
2530 if (!reader_cmp(reader
, piW
))
2531 return reader_parse_pi(reader
);
2533 if (!reader_cmp(reader
, cdstartW
))
2534 return reader_parse_cdata(reader
);
2536 if (!reader_cmp(reader
, ltW
))
2537 return reader_parse_element(reader
);
2539 /* what's left must be CharData */
2540 return reader_parse_chardata(reader
);
2543 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2545 XmlNodeType nodetype
= reader_get_nodetype(reader
);
2548 if (!is_reader_pending(reader
))
2550 reader
->chunk_read_off
= 0;
2551 reader_clear_attrs(reader
);
2554 /* When moving from EndElement or empty element, pop its own namespace definitions */
2557 case XmlNodeType_Attribute
:
2558 reader_dec_depth(reader
);
2560 case XmlNodeType_Element
:
2561 if (reader
->is_empty_element
)
2562 reader_pop_ns_nodes(reader
, &reader
->empty_element
);
2563 else if (FAILED(hr
= reader_inc_depth(reader
)))
2566 case XmlNodeType_EndElement
:
2567 reader_pop_element(reader
);
2568 reader_dec_depth(reader
);
2576 switch (reader
->instate
)
2578 /* if it's a first call for a new input we need to detect stream encoding */
2579 case XmlReadInState_Initial
:
2583 hr
= readerinput_growraw(reader
->input
);
2584 if (FAILED(hr
)) return hr
;
2586 reader
->position
.line_number
= 1;
2587 reader
->position
.line_position
= 1;
2589 /* try to detect encoding by BOM or data and set input code page */
2590 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2591 TRACE("detected encoding %s, 0x%08x\n", enc
== XmlEncoding_Unknown
? "(unknown)" :
2592 debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2593 if (FAILED(hr
)) return hr
;
2595 /* always switch first time cause we have to put something in */
2596 readerinput_switchencoding(reader
->input
, enc
);
2598 /* parse xml declaration */
2599 hr
= reader_parse_xmldecl(reader
);
2600 if (FAILED(hr
)) return hr
;
2602 readerinput_shrinkraw(reader
->input
, -1);
2603 reader
->instate
= XmlReadInState_Misc_DTD
;
2604 if (hr
== S_OK
) return hr
;
2607 case XmlReadInState_Misc_DTD
:
2608 hr
= reader_parse_misc(reader
);
2609 if (FAILED(hr
)) return hr
;
2612 reader
->instate
= XmlReadInState_DTD
;
2616 case XmlReadInState_DTD
:
2617 hr
= reader_parse_dtd(reader
);
2618 if (FAILED(hr
)) return hr
;
2622 reader
->instate
= XmlReadInState_DTD_Misc
;
2626 reader
->instate
= XmlReadInState_Element
;
2628 case XmlReadInState_DTD_Misc
:
2629 hr
= reader_parse_misc(reader
);
2630 if (FAILED(hr
)) return hr
;
2633 reader
->instate
= XmlReadInState_Element
;
2637 case XmlReadInState_Element
:
2638 return reader_parse_element(reader
);
2639 case XmlReadInState_Content
:
2640 return reader_parse_content(reader
);
2641 case XmlReadInState_MiscEnd
:
2642 hr
= reader_parse_misc(reader
);
2643 if (hr
!= S_FALSE
) return hr
;
2645 if (*reader_get_ptr(reader
))
2647 WARN("found garbage in the end of XML\n");
2651 reader
->instate
= XmlReadInState_Eof
;
2652 reader
->state
= XmlReadState_EndOfFile
;
2653 reader
->nodetype
= XmlNodeType_None
;
2655 case XmlReadInState_Eof
:
2658 FIXME("internal state %d not handled\n", reader
->instate
);
2666 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2668 xmlreader
*This
= impl_from_IXmlReader(iface
);
2670 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2672 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2673 IsEqualGUID(riid
, &IID_IXmlReader
))
2679 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2681 return E_NOINTERFACE
;
2684 IXmlReader_AddRef(iface
);
2689 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2691 xmlreader
*This
= impl_from_IXmlReader(iface
);
2692 ULONG ref
= InterlockedIncrement(&This
->ref
);
2693 TRACE("(%p)->(%d)\n", This
, ref
);
2697 static void reader_clear_ns(xmlreader
*reader
)
2699 struct ns
*ns
, *ns2
;
2701 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
2702 list_remove(&ns
->entry
);
2703 reader_free_strvalued(reader
, &ns
->prefix
);
2704 reader_free_strvalued(reader
, &ns
->uri
);
2705 reader_free(reader
, ns
);
2708 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->nsdef
, struct ns
, entry
) {
2709 list_remove(&ns
->entry
);
2710 reader_free_strvalued(reader
, &ns
->uri
);
2711 reader_free(reader
, ns
);
2715 static void reader_reset_parser(xmlreader
*reader
)
2717 reader
->position
.line_number
= 0;
2718 reader
->position
.line_position
= 0;
2720 reader_clear_elements(reader
);
2721 reader_clear_attrs(reader
);
2722 reader_clear_ns(reader
);
2723 reader_free_strvalues(reader
);
2726 reader
->nodetype
= XmlNodeType_None
;
2727 reader
->resumestate
= XmlReadResumeState_Initial
;
2728 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2729 reader
->is_empty_element
= FALSE
;
2732 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2734 xmlreader
*This
= impl_from_IXmlReader(iface
);
2735 LONG ref
= InterlockedDecrement(&This
->ref
);
2737 TRACE("(%p)->(%d)\n", This
, ref
);
2741 IMalloc
*imalloc
= This
->imalloc
;
2742 reader_reset_parser(This
);
2743 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2744 if (This
->resolver
) IXmlResolver_Release(This
->resolver
);
2745 if (This
->mlang
) IUnknown_Release(This
->mlang
);
2746 reader_free(This
, This
);
2747 if (imalloc
) IMalloc_Release(imalloc
);
2753 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2755 xmlreader
*This
= impl_from_IXmlReader(iface
);
2756 IXmlReaderInput
*readerinput
;
2759 TRACE("(%p)->(%p)\n", This
, input
);
2763 readerinput_release_stream(This
->input
);
2764 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2768 reader_reset_parser(This
);
2770 /* just reset current input */
2773 This
->state
= XmlReadState_Initial
;
2777 /* now try IXmlReaderInput, ISequentialStream, IStream */
2778 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2781 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2782 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2785 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2786 readerinput
, readerinput
->lpVtbl
);
2787 IUnknown_Release(readerinput
);
2793 if (hr
!= S_OK
|| !readerinput
)
2795 /* create IXmlReaderInput basing on supplied interface */
2796 hr
= CreateXmlReaderInputWithEncodingName(input
,
2797 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2798 if (hr
!= S_OK
) return hr
;
2799 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2802 /* set stream for supplied IXmlReaderInput */
2803 hr
= readerinput_query_for_stream(This
->input
);
2806 This
->state
= XmlReadState_Initial
;
2807 This
->instate
= XmlReadInState_Initial
;
2812 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2814 xmlreader
*This
= impl_from_IXmlReader(iface
);
2816 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2818 if (!value
) return E_INVALIDARG
;
2822 case XmlReaderProperty_MultiLanguage
:
2823 *value
= (LONG_PTR
)This
->mlang
;
2825 IUnknown_AddRef(This
->mlang
);
2827 case XmlReaderProperty_XmlResolver
:
2828 *value
= (LONG_PTR
)This
->resolver
;
2830 IXmlResolver_AddRef(This
->resolver
);
2832 case XmlReaderProperty_DtdProcessing
:
2833 *value
= This
->dtdmode
;
2835 case XmlReaderProperty_ReadState
:
2836 *value
= This
->state
;
2838 case XmlReaderProperty_MaxElementDepth
:
2839 *value
= This
->max_depth
;
2842 FIXME("Unimplemented property (%u)\n", property
);
2849 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2851 xmlreader
*This
= impl_from_IXmlReader(iface
);
2853 TRACE("(%p)->(%s 0x%lx)\n", This
, debugstr_reader_prop(property
), value
);
2857 case XmlReaderProperty_MultiLanguage
:
2859 IUnknown_Release(This
->mlang
);
2860 This
->mlang
= (IUnknown
*)value
;
2862 IUnknown_AddRef(This
->mlang
);
2864 FIXME("Ignoring MultiLanguage %p\n", This
->mlang
);
2866 case XmlReaderProperty_XmlResolver
:
2868 IXmlResolver_Release(This
->resolver
);
2869 This
->resolver
= (IXmlResolver
*)value
;
2871 IXmlResolver_AddRef(This
->resolver
);
2873 case XmlReaderProperty_DtdProcessing
:
2874 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2875 This
->dtdmode
= value
;
2877 case XmlReaderProperty_MaxElementDepth
:
2878 This
->max_depth
= value
;
2881 FIXME("Unimplemented property (%u)\n", property
);
2888 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2890 xmlreader
*This
= impl_from_IXmlReader(iface
);
2891 XmlNodeType oldtype
= This
->nodetype
;
2895 TRACE("(%p)->(%p)\n", This
, nodetype
);
2900 switch (This
->state
)
2902 case XmlReadState_Closed
:
2905 case XmlReadState_Error
:
2909 hr
= reader_parse_nextnode(This
);
2910 if (SUCCEEDED(hr
) && oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2911 This
->state
= XmlReadState_Interactive
;
2915 This
->state
= XmlReadState_Error
;
2916 This
->nodetype
= XmlNodeType_None
;
2922 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2923 *nodetype
= This
->nodetype
;
2928 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2930 xmlreader
*This
= impl_from_IXmlReader(iface
);
2932 TRACE("(%p)->(%p)\n", This
, node_type
);
2935 return E_INVALIDARG
;
2937 *node_type
= reader_get_nodetype(This
);
2938 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2941 static HRESULT
reader_move_to_first_attribute(xmlreader
*reader
)
2943 if (!reader
->attr_count
)
2947 reader_inc_depth(reader
);
2949 reader
->attr
= LIST_ENTRY(list_head(&reader
->attrs
), struct attribute
, entry
);
2950 reader
->chunk_read_off
= 0;
2951 reader_set_strvalue(reader
, StringValue_Prefix
, &reader
->attr
->prefix
);
2952 reader_set_strvalue(reader
, StringValue_QualifiedName
, &reader
->attr
->qname
);
2953 reader_set_strvalue(reader
, StringValue_Value
, &reader
->attr
->value
);
2958 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2960 xmlreader
*This
= impl_from_IXmlReader(iface
);
2962 TRACE("(%p)\n", This
);
2964 return reader_move_to_first_attribute(This
);
2967 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2969 xmlreader
*This
= impl_from_IXmlReader(iface
);
2970 const struct list
*next
;
2972 TRACE("(%p)\n", This
);
2974 if (!This
->attr_count
) return S_FALSE
;
2977 return reader_move_to_first_attribute(This
);
2979 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2982 This
->attr
= LIST_ENTRY(next
, struct attribute
, entry
);
2983 This
->chunk_read_off
= 0;
2984 reader_set_strvalue(This
, StringValue_Prefix
, &This
->attr
->prefix
);
2985 reader_set_strvalue(This
, StringValue_QualifiedName
, &This
->attr
->qname
);
2986 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2989 return next
? S_OK
: S_FALSE
;
2992 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
2994 LPCWSTR namespaceUri
)
2996 FIXME("(%p %p %p): stub\n", iface
, local_name
, namespaceUri
);
3000 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
3002 xmlreader
*This
= impl_from_IXmlReader(iface
);
3004 TRACE("(%p)\n", This
);
3006 if (!This
->attr_count
) return S_FALSE
;
3009 reader_dec_depth(This
);
3013 /* FIXME: support other node types with 'attributes' like DTD */
3014 if (This
->is_empty_element
) {
3015 reader_set_strvalue(This
, StringValue_Prefix
, &This
->empty_element
.prefix
);
3016 reader_set_strvalue(This
, StringValue_QualifiedName
, &This
->empty_element
.qname
);
3019 struct element
*element
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
3021 reader_set_strvalue(This
, StringValue_Prefix
, &element
->prefix
);
3022 reader_set_strvalue(This
, StringValue_QualifiedName
, &element
->qname
);
3025 This
->chunk_read_off
= 0;
3026 reader_set_strvalue(This
, StringValue_Value
, &strval_empty
);
3031 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3033 xmlreader
*This
= impl_from_IXmlReader(iface
);
3034 struct attribute
*attribute
= This
->attr
;
3035 struct element
*element
;
3038 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3043 switch (reader_get_nodetype(This
))
3045 case XmlNodeType_Text
:
3046 case XmlNodeType_CDATA
:
3047 case XmlNodeType_Comment
:
3048 case XmlNodeType_Whitespace
:
3052 case XmlNodeType_Element
:
3053 case XmlNodeType_EndElement
:
3054 element
= reader_get_element(This
);
3055 if (element
->prefix
.len
)
3057 *name
= element
->qname
.str
;
3058 *len
= element
->qname
.len
;
3062 *name
= element
->localname
.str
;
3063 *len
= element
->localname
.len
;
3066 case XmlNodeType_Attribute
:
3067 if (attribute
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3071 } else if (attribute
->prefix
.len
)
3073 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3074 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3078 *name
= attribute
->localname
.str
;
3079 *len
= attribute
->localname
.len
;
3083 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3084 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3091 static struct ns
*reader_lookup_ns(xmlreader
*reader
, const strval
*prefix
)
3093 struct list
*nslist
= prefix
? &reader
->ns
: &reader
->nsdef
;
3096 LIST_FOR_EACH_ENTRY_REV(ns
, nslist
, struct ns
, entry
) {
3097 if (strval_eq(reader
, prefix
, &ns
->prefix
))
3104 static struct ns
*reader_lookup_nsdef(xmlreader
*reader
)
3106 if (list_empty(&reader
->nsdef
))
3109 return LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
3112 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
, const WCHAR
**uri
, UINT
*len
)
3114 xmlreader
*This
= impl_from_IXmlReader(iface
);
3115 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3116 XmlNodeType nodetype
;
3120 TRACE("(%p %p %p)\n", iface
, uri
, len
);
3128 switch ((nodetype
= reader_get_nodetype(This
)))
3130 case XmlNodeType_Attribute
:
3132 static const WCHAR xmlns_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3133 '2','0','0','0','/','x','m','l','n','s','/',0};
3134 static const WCHAR xml_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3135 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3137 /* check for reserved prefixes first */
3138 if ((strval_eq(This
, prefix
, &strval_empty
) && strval_eq(This
, &This
->attr
->localname
, &strval_xmlns
)) ||
3139 strval_eq(This
, prefix
, &strval_xmlns
))
3142 *len
= sizeof(xmlns_uriW
)/sizeof(xmlns_uriW
[0]) - 1;
3144 else if (strval_eq(This
, prefix
, &strval_xml
)) {
3146 *len
= sizeof(xml_uriW
)/sizeof(xml_uriW
[0]) - 1;
3150 ns
= reader_lookup_ns(This
, prefix
);
3162 case XmlNodeType_Element
:
3163 case XmlNodeType_EndElement
:
3165 ns
= reader_lookup_ns(This
, prefix
);
3167 /* pick top default ns if any */
3169 ns
= reader_lookup_nsdef(This
);
3181 case XmlNodeType_Text
:
3182 case XmlNodeType_CDATA
:
3183 case XmlNodeType_ProcessingInstruction
:
3184 case XmlNodeType_Comment
:
3185 case XmlNodeType_Whitespace
:
3186 case XmlNodeType_XmlDeclaration
:
3191 FIXME("Unhandled node type %d\n", nodetype
);
3198 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3200 xmlreader
*This
= impl_from_IXmlReader(iface
);
3201 struct element
*element
;
3204 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3209 switch (reader_get_nodetype(This
))
3211 case XmlNodeType_Text
:
3212 case XmlNodeType_CDATA
:
3213 case XmlNodeType_Comment
:
3214 case XmlNodeType_Whitespace
:
3218 case XmlNodeType_Element
:
3219 case XmlNodeType_EndElement
:
3220 element
= reader_get_element(This
);
3221 *name
= element
->localname
.str
;
3222 *len
= element
->localname
.len
;
3224 case XmlNodeType_Attribute
:
3225 if (This
->attr
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3230 else if (This
->attr
->flags
& ATTRIBUTE_NS_DEFINITION
)
3232 const struct ns
*ns
= reader_lookup_ns(This
, &This
->attr
->localname
);
3233 *name
= ns
->prefix
.str
;
3234 *len
= ns
->prefix
.len
;
3238 *name
= This
->attr
->localname
.str
;
3239 *len
= This
->attr
->localname
.len
;
3243 *name
= This
->strvalues
[StringValue_LocalName
].str
;
3244 *len
= This
->strvalues
[StringValue_LocalName
].len
;
3251 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, const WCHAR
**ret
, UINT
*len
)
3253 xmlreader
*This
= impl_from_IXmlReader(iface
);
3254 XmlNodeType nodetype
;
3257 TRACE("(%p)->(%p %p)\n", This
, ret
, len
);
3265 switch ((nodetype
= reader_get_nodetype(This
)))
3267 case XmlNodeType_Element
:
3268 case XmlNodeType_EndElement
:
3269 case XmlNodeType_Attribute
:
3271 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3274 if (strval_eq(This
, prefix
, &strval_xml
))
3279 else if (strval_eq(This
, prefix
, &strval_xmlns
))
3284 else if ((ns
= reader_lookup_ns(This
, prefix
)))
3286 *ret
= ns
->prefix
.str
;
3287 *len
= ns
->prefix
.len
;
3299 static const strval
*reader_get_value(xmlreader
*reader
, BOOL ensure_allocated
)
3303 switch (reader_get_nodetype(reader
))
3305 case XmlNodeType_XmlDeclaration
:
3306 case XmlNodeType_EndElement
:
3307 case XmlNodeType_None
:
3308 return &strval_empty
;
3309 case XmlNodeType_Attribute
:
3310 /* For namespace definition attributes return values from namespace list */
3311 if (reader
->attr
->flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
3315 if (!(ns
= reader_lookup_ns(reader
, &reader
->attr
->localname
)))
3316 ns
= reader_lookup_nsdef(reader
);
3320 return &reader
->attr
->value
;
3325 val
= &reader
->strvalues
[StringValue_Value
];
3326 if (!val
->str
&& ensure_allocated
)
3328 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
3329 if (!ptr
) return NULL
;
3330 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
3338 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
3340 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3341 const strval
*val
= &reader
->strvalues
[StringValue_Value
];
3344 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
3348 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
&& !val
->len
) || is_reader_pending(reader
))
3353 hr
= IXmlReader_Read(iface
, &type
);
3354 if (FAILED(hr
)) return hr
;
3356 /* return if still pending, partially read values are not reported */
3357 if (is_reader_pending(reader
)) return E_PENDING
;
3360 val
= reader_get_value(reader
, TRUE
);
3362 return E_OUTOFMEMORY
;
3364 off
= abs(reader
->chunk_read_off
);
3365 assert(off
<= val
->len
);
3366 *value
= val
->str
+ off
;
3367 if (len
) *len
= val
->len
- off
;
3368 reader
->chunk_read_off
= -off
;
3372 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
3374 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3378 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
3380 val
= reader_get_value(reader
, FALSE
);
3382 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3383 if (reader
->chunk_read_off
>= 0)
3385 assert(reader
->chunk_read_off
<= val
->len
);
3386 len
= min(val
->len
- reader
->chunk_read_off
, chunk_size
);
3388 if (read
) *read
= len
;
3392 memcpy(buffer
, reader_get_strptr(reader
, val
) + reader
->chunk_read_off
, len
*sizeof(WCHAR
));
3393 reader
->chunk_read_off
+= len
;
3396 return len
|| !chunk_size
? S_OK
: S_FALSE
;
3399 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
3401 UINT
*baseUri_length
)
3403 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
3407 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
3409 FIXME("(%p): stub\n", iface
);
3413 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
3415 xmlreader
*This
= impl_from_IXmlReader(iface
);
3416 TRACE("(%p)\n", This
);
3417 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3418 when current node is start tag of an element */
3419 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->is_empty_element
: FALSE
;
3422 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*line_number
)
3424 xmlreader
*This
= impl_from_IXmlReader(iface
);
3425 const struct element
*element
;
3427 TRACE("(%p %p)\n", This
, line_number
);
3430 return E_INVALIDARG
;
3432 switch (reader_get_nodetype(This
))
3434 case XmlNodeType_Element
:
3435 case XmlNodeType_EndElement
:
3436 element
= reader_get_element(This
);
3437 *line_number
= element
->position
.line_number
;
3439 case XmlNodeType_Attribute
:
3440 *line_number
= This
->attr
->position
.line_number
;
3442 case XmlNodeType_Whitespace
:
3443 case XmlNodeType_XmlDeclaration
:
3444 *line_number
= This
->empty_element
.position
.line_number
;
3447 *line_number
= This
->position
.line_number
;
3451 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3454 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*line_position
)
3456 xmlreader
*This
= impl_from_IXmlReader(iface
);
3457 const struct element
*element
;
3459 TRACE("(%p %p)\n", This
, line_position
);
3462 return E_INVALIDARG
;
3464 switch (reader_get_nodetype(This
))
3466 case XmlNodeType_Element
:
3467 case XmlNodeType_EndElement
:
3468 element
= reader_get_element(This
);
3469 *line_position
= element
->position
.line_position
;
3471 case XmlNodeType_Attribute
:
3472 *line_position
= This
->attr
->position
.line_position
;
3474 case XmlNodeType_Whitespace
:
3475 case XmlNodeType_XmlDeclaration
:
3476 *line_position
= This
->empty_element
.position
.line_position
;
3479 *line_position
= This
->position
.line_position
;
3483 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3486 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
3488 xmlreader
*This
= impl_from_IXmlReader(iface
);
3490 TRACE("(%p)->(%p)\n", This
, count
);
3492 if (!count
) return E_INVALIDARG
;
3494 *count
= This
->attr_count
;
3498 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
3500 xmlreader
*This
= impl_from_IXmlReader(iface
);
3501 TRACE("(%p)->(%p)\n", This
, depth
);
3502 *depth
= This
->depth
;
3506 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
3508 xmlreader
*This
= impl_from_IXmlReader(iface
);
3509 TRACE("(%p)\n", iface
);
3510 return This
->state
== XmlReadState_EndOfFile
;
3513 static const struct IXmlReaderVtbl xmlreader_vtbl
=
3515 xmlreader_QueryInterface
,
3519 xmlreader_GetProperty
,
3520 xmlreader_SetProperty
,
3522 xmlreader_GetNodeType
,
3523 xmlreader_MoveToFirstAttribute
,
3524 xmlreader_MoveToNextAttribute
,
3525 xmlreader_MoveToAttributeByName
,
3526 xmlreader_MoveToElement
,
3527 xmlreader_GetQualifiedName
,
3528 xmlreader_GetNamespaceUri
,
3529 xmlreader_GetLocalName
,
3530 xmlreader_GetPrefix
,
3532 xmlreader_ReadValueChunk
,
3533 xmlreader_GetBaseUri
,
3534 xmlreader_IsDefault
,
3535 xmlreader_IsEmptyElement
,
3536 xmlreader_GetLineNumber
,
3537 xmlreader_GetLinePosition
,
3538 xmlreader_GetAttributeCount
,
3543 /** IXmlReaderInput **/
3544 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
3546 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3548 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
3550 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
3551 IsEqualGUID(riid
, &IID_IUnknown
))
3557 WARN("interface %s not implemented\n", debugstr_guid(riid
));
3559 return E_NOINTERFACE
;
3562 IUnknown_AddRef(iface
);
3567 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
3569 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3570 ULONG ref
= InterlockedIncrement(&This
->ref
);
3571 TRACE("(%p)->(%d)\n", This
, ref
);
3575 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
3577 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3578 LONG ref
= InterlockedDecrement(&This
->ref
);
3580 TRACE("(%p)->(%d)\n", This
, ref
);
3584 IMalloc
*imalloc
= This
->imalloc
;
3585 if (This
->input
) IUnknown_Release(This
->input
);
3586 if (This
->stream
) ISequentialStream_Release(This
->stream
);
3587 if (This
->buffer
) free_input_buffer(This
->buffer
);
3588 readerinput_free(This
, This
->baseuri
);
3589 readerinput_free(This
, This
);
3590 if (imalloc
) IMalloc_Release(imalloc
);
3596 static const struct IUnknownVtbl xmlreaderinputvtbl
=
3598 xmlreaderinput_QueryInterface
,
3599 xmlreaderinput_AddRef
,
3600 xmlreaderinput_Release
3603 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
3609 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
3612 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
3614 reader
= heap_alloc(sizeof(*reader
));
3616 return E_OUTOFMEMORY
;
3618 memset(reader
, 0, sizeof(*reader
));
3619 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
3621 reader
->state
= XmlReadState_Closed
;
3622 reader
->instate
= XmlReadInState_Initial
;
3623 reader
->resumestate
= XmlReadResumeState_Initial
;
3624 reader
->dtdmode
= DtdProcessing_Prohibit
;
3625 reader
->imalloc
= imalloc
;
3626 if (imalloc
) IMalloc_AddRef(imalloc
);
3627 reader
->nodetype
= XmlNodeType_None
;
3628 list_init(&reader
->attrs
);
3629 list_init(&reader
->nsdef
);
3630 list_init(&reader
->ns
);
3631 list_init(&reader
->elements
);
3632 reader
->max_depth
= 256;
3634 reader
->chunk_read_off
= 0;
3635 for (i
= 0; i
< StringValue_Last
; i
++)
3636 reader
->strvalues
[i
] = strval_empty
;
3638 hr
= IXmlReader_QueryInterface(&reader
->IXmlReader_iface
, riid
, obj
);
3639 IXmlReader_Release(&reader
->IXmlReader_iface
);
3641 TRACE("returning iface %p, hr %#x\n", *obj
, hr
);
3646 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
3651 IXmlReaderInput
**ppInput
)
3653 xmlreaderinput
*readerinput
;
3656 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3657 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3659 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3662 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3664 readerinput
= heap_alloc(sizeof(*readerinput
));
3665 if(!readerinput
) return E_OUTOFMEMORY
;
3667 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3668 readerinput
->ref
= 1;
3669 readerinput
->imalloc
= imalloc
;
3670 readerinput
->stream
= NULL
;
3671 if (imalloc
) IMalloc_AddRef(imalloc
);
3672 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3673 readerinput
->hint
= hint
;
3674 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3675 readerinput
->pending
= 0;
3677 hr
= alloc_input_buffer(readerinput
);
3680 readerinput_free(readerinput
, readerinput
->baseuri
);
3681 readerinput_free(readerinput
, readerinput
);
3682 if (imalloc
) IMalloc_Release(imalloc
);
3685 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3687 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3689 TRACE("returning iface %p\n", *ppInput
);