2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43 XmlReadInState_Initial
,
44 XmlReadInState_XmlDecl
,
45 XmlReadInState_Misc_DTD
,
47 XmlReadInState_DTD_Misc
,
48 XmlReadInState_Element
,
49 XmlReadInState_Content
,
50 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
52 } XmlReaderInternalState
;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
58 XmlReadResumeState_Initial
,
59 XmlReadResumeState_PITarget
,
60 XmlReadResumeState_PIBody
,
61 XmlReadResumeState_CDATA
,
62 XmlReadResumeState_Comment
,
63 XmlReadResumeState_STag
,
64 XmlReadResumeState_CharData
,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState
;
68 /* saved pointer index to resume from particular input position */
71 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local
, /* local for QName */
73 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
79 StringValue_LocalName
,
81 StringValue_QualifiedName
,
84 } XmlReaderStringValue
;
86 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW
[] = {'\"',0};
90 static const WCHAR quoteW
[] = {'\'',0};
91 static const WCHAR ltW
[] = {'<',0};
92 static const WCHAR gtW
[] = {'>',0};
93 static const WCHAR commentW
[] = {'<','!','-','-',0};
94 static const WCHAR piW
[] = {'<','?',0};
96 static BOOL
is_namestartchar(WCHAR ch
);
98 static const char *debugstr_nodetype(XmlNodeType nodetype
)
100 static const char * const type_names
[] =
109 "ProcessingInstruction",
122 if (nodetype
> _XmlNodeType_Last
)
123 return wine_dbg_sprintf("unknown type=%d", nodetype
);
125 return type_names
[nodetype
];
128 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
130 static const char * const prop_names
[] =
142 if (prop
> _XmlReaderProperty_Last
)
143 return wine_dbg_sprintf("unknown property=%d", prop
);
145 return prop_names
[prop
];
148 struct xml_encoding_data
155 static const struct xml_encoding_data xml_encoding_map
[] = {
156 { utf16W
, XmlEncoding_UTF16
, ~0 },
157 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
}
160 const WCHAR
*get_encoding_name(xml_encoding encoding
)
162 return xml_encoding_map
[encoding
].name
;
165 xml_encoding
get_encoding_from_codepage(UINT codepage
)
168 for (i
= 0; i
< sizeof(xml_encoding_map
)/sizeof(xml_encoding_map
[0]); i
++)
170 if (xml_encoding_map
[i
].cp
== codepage
) return xml_encoding_map
[i
].enc
;
172 return XmlEncoding_Unknown
;
179 unsigned int allocated
;
180 unsigned int written
;
183 typedef struct input_buffer input_buffer
;
187 IXmlReaderInput IXmlReaderInput_iface
;
189 /* reference passed on IXmlReaderInput creation, is kept when input is created */
192 xml_encoding encoding
;
195 /* stream reference set after SetInput() call from reader,
196 stored as sequential stream, cause currently
197 optimizations possible with IStream aren't implemented */
198 ISequentialStream
*stream
;
199 input_buffer
*buffer
;
200 unsigned int pending
: 1;
203 static const struct IUnknownVtbl xmlreaderinputvtbl
;
205 /* Structure to hold parsed string of specific length.
207 Reader stores node value as 'start' pointer, on request
208 a null-terminated version of it is allocated.
210 To init a strval variable use reader_init_strval(),
211 to set strval as a reader value use reader_set_strval().
215 WCHAR
*str
; /* allocated null-terminated string */
216 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
217 UINT start
; /* input position where value starts */
220 static WCHAR emptyW
[] = {0};
221 static WCHAR xmlW
[] = {'x','m','l',0};
222 static WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
223 static const strval strval_empty
= { emptyW
};
224 static const strval strval_xml
= { xmlW
, 3 };
225 static const strval strval_xmlns
= { xmlnsW
, 5 };
227 struct reader_position
235 ATTRIBUTE_NS_DEFINITION
= 0x1,
236 ATTRIBUTE_DEFAULT_NS_DEFINITION
= 0x2,
246 struct reader_position position
;
256 struct reader_position position
;
264 struct element
*element
;
269 IXmlReader IXmlReader_iface
;
271 xmlreaderinput
*input
;
274 HRESULT error
; /* error set on XmlReadState_Error */
275 XmlReaderInternalState instate
;
276 XmlReaderResumeState resumestate
;
277 XmlNodeType nodetype
;
278 DtdProcessing dtdmode
;
279 IXmlResolver
*resolver
;
281 struct reader_position position
;
282 struct list attrs
; /* attributes list for current node */
283 struct attribute
*attr
; /* current attribute */
287 struct list elements
;
288 strval strvalues
[StringValue_Last
];
291 BOOL is_empty_element
;
292 struct element empty_element
; /* used for empty elements without end tag <a />,
293 and to keep <?xml reader position */
294 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
299 encoded_buffer utf16
;
300 encoded_buffer encoded
;
302 xmlreaderinput
*input
;
305 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
307 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
310 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
312 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
315 /* reader memory allocation functions */
316 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
318 return m_alloc(reader
->imalloc
, len
);
321 static inline void *reader_alloc_zero(xmlreader
*reader
, size_t len
)
323 void *ret
= reader_alloc(reader
, len
);
329 static inline void reader_free(xmlreader
*reader
, void *mem
)
331 m_free(reader
->imalloc
, mem
);
334 /* Just return pointer from offset, no attempt to read more. */
335 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
337 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
338 return (WCHAR
*)buffer
->data
+ offset
;
341 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
343 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
346 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
350 if (src
->str
!= strval_empty
.str
)
352 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
353 if (!dest
->str
) return E_OUTOFMEMORY
;
354 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
355 dest
->str
[dest
->len
] = 0;
362 /* reader input memory allocation functions */
363 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
365 return m_alloc(input
->imalloc
, len
);
368 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
370 return m_realloc(input
->imalloc
, mem
, len
);
373 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
375 m_free(input
->imalloc
, mem
);
378 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
385 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
386 ret
= readerinput_alloc(input
, size
);
387 if (ret
) memcpy(ret
, str
, size
);
393 static void reader_clear_attrs(xmlreader
*reader
)
395 struct attribute
*attr
, *attr2
;
396 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
398 reader_free(reader
, attr
);
400 list_init(&reader
->attrs
);
401 reader
->attr_count
= 0;
405 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
406 while we are on a node with attributes */
407 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*prefix
, strval
*localname
, strval
*qname
,
408 strval
*value
, const struct reader_position
*position
, unsigned int flags
)
410 struct attribute
*attr
;
412 attr
= reader_alloc(reader
, sizeof(*attr
));
413 if (!attr
) return E_OUTOFMEMORY
;
416 attr
->prefix
= *prefix
;
418 memset(&attr
->prefix
, 0, sizeof(attr
->prefix
));
419 attr
->localname
= *localname
;
420 attr
->qname
= qname
? *qname
: *localname
;
421 attr
->value
= *value
;
422 attr
->position
= *position
;
424 list_add_tail(&reader
->attrs
, &attr
->entry
);
425 reader
->attr_count
++;
430 /* Returns current element, doesn't check if reader is actually positioned on it. */
431 static struct element
*reader_get_element(xmlreader
*reader
)
433 if (reader
->is_empty_element
)
434 return &reader
->empty_element
;
436 return LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
439 /* This one frees stored string value if needed */
440 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
442 if (v
->str
!= strval_empty
.str
)
444 reader_free(reader
, v
->str
);
449 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
456 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
458 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
461 /* used to initialize from constant string */
462 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
469 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
471 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
474 static void reader_free_strvalues(xmlreader
*reader
)
477 for (type
= 0; type
< StringValue_Last
; type
++)
478 reader_free_strvalue(reader
, type
);
481 /* This helper should only be used to test if strings are the same,
482 it doesn't try to sort. */
483 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
485 if (str1
->len
!= str2
->len
) return 0;
486 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
489 static void reader_clear_elements(xmlreader
*reader
)
491 struct element
*elem
, *elem2
;
492 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
494 reader_free_strvalued(reader
, &elem
->prefix
);
495 reader_free_strvalued(reader
, &elem
->localname
);
496 reader_free_strvalued(reader
, &elem
->qname
);
497 reader_free(reader
, elem
);
499 list_init(&reader
->elements
);
500 reader_free_strvalued(reader
, &reader
->empty_element
.localname
);
501 reader_free_strvalued(reader
, &reader
->empty_element
.qname
);
502 reader
->is_empty_element
= FALSE
;
505 static HRESULT
reader_inc_depth(xmlreader
*reader
)
507 return (++reader
->depth
>= reader
->max_depth
&& reader
->max_depth
) ? SC_E_MAXELEMENTDEPTH
: S_OK
;
510 static void reader_dec_depth(xmlreader
*reader
)
516 static HRESULT
reader_push_ns(xmlreader
*reader
, const strval
*prefix
, const strval
*uri
, BOOL def
)
521 ns
= reader_alloc(reader
, sizeof(*ns
));
522 if (!ns
) return E_OUTOFMEMORY
;
525 memset(&ns
->prefix
, 0, sizeof(ns
->prefix
));
527 hr
= reader_strvaldup(reader
, prefix
, &ns
->prefix
);
529 reader_free(reader
, ns
);
534 hr
= reader_strvaldup(reader
, uri
, &ns
->uri
);
536 reader_free_strvalued(reader
, &ns
->prefix
);
537 reader_free(reader
, ns
);
542 list_add_head(def
? &reader
->nsdef
: &reader
->ns
, &ns
->entry
);
546 static void reader_free_element(xmlreader
*reader
, struct element
*element
)
548 reader_free_strvalued(reader
, &element
->prefix
);
549 reader_free_strvalued(reader
, &element
->localname
);
550 reader_free_strvalued(reader
, &element
->qname
);
551 reader_free(reader
, element
);
554 static void reader_mark_ns_nodes(xmlreader
*reader
, struct element
*element
)
558 LIST_FOR_EACH_ENTRY(ns
, &reader
->ns
, struct ns
, entry
) {
561 ns
->element
= element
;
564 LIST_FOR_EACH_ENTRY(ns
, &reader
->nsdef
, struct ns
, entry
) {
567 ns
->element
= element
;
571 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*prefix
, strval
*localname
,
572 strval
*qname
, const struct reader_position
*position
)
574 struct element
*element
;
577 element
= reader_alloc_zero(reader
, sizeof(*element
));
579 return E_OUTOFMEMORY
;
581 if ((hr
= reader_strvaldup(reader
, prefix
, &element
->prefix
)) == S_OK
&&
582 (hr
= reader_strvaldup(reader
, localname
, &element
->localname
)) == S_OK
&&
583 (hr
= reader_strvaldup(reader
, qname
, &element
->qname
)) == S_OK
)
585 list_add_head(&reader
->elements
, &element
->entry
);
586 reader_mark_ns_nodes(reader
, element
);
587 reader
->is_empty_element
= FALSE
;
588 element
->position
= *position
;
591 reader_free_element(reader
, element
);
596 static void reader_pop_ns_nodes(xmlreader
*reader
, struct element
*element
)
600 LIST_FOR_EACH_ENTRY_SAFE_REV(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
601 if (ns
->element
!= element
)
604 list_remove(&ns
->entry
);
605 reader_free_strvalued(reader
, &ns
->prefix
);
606 reader_free_strvalued(reader
, &ns
->uri
);
607 reader_free(reader
, ns
);
610 if (!list_empty(&reader
->nsdef
)) {
611 ns
= LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
612 if (ns
->element
== element
) {
613 list_remove(&ns
->entry
);
614 reader_free_strvalued(reader
, &ns
->prefix
);
615 reader_free_strvalued(reader
, &ns
->uri
);
616 reader_free(reader
, ns
);
621 static void reader_pop_element(xmlreader
*reader
)
623 struct element
*element
;
625 if (list_empty(&reader
->elements
))
628 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
629 list_remove(&element
->entry
);
631 reader_pop_ns_nodes(reader
, element
);
632 reader_free_element(reader
, element
);
634 /* It was a root element, the rest is expected as Misc */
635 if (list_empty(&reader
->elements
))
636 reader
->instate
= XmlReadInState_MiscEnd
;
639 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
640 means node value is to be determined. */
641 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
643 strval
*v
= &reader
->strvalues
[type
];
645 reader_free_strvalue(reader
, type
);
654 if (value
->str
== strval_empty
.str
)
658 if (type
== StringValue_Value
)
660 /* defer allocation for value string */
662 v
->start
= value
->start
;
667 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
668 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
669 v
->str
[value
->len
] = 0;
675 static inline int is_reader_pending(xmlreader
*reader
)
677 return reader
->input
->pending
;
680 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
682 const int initial_len
= 0x2000;
683 buffer
->data
= readerinput_alloc(input
, initial_len
);
684 if (!buffer
->data
) return E_OUTOFMEMORY
;
686 memset(buffer
->data
, 0, 4);
688 buffer
->allocated
= initial_len
;
694 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
696 readerinput_free(input
, buffer
->data
);
699 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
701 if (encoding
== XmlEncoding_Unknown
)
703 FIXME("unsupported encoding %d\n", encoding
);
707 *cp
= xml_encoding_map
[encoding
].cp
;
712 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
716 if (!name
) return XmlEncoding_Unknown
;
719 max
= sizeof(xml_encoding_map
)/sizeof(struct xml_encoding_data
) - 1;
726 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
728 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
730 return xml_encoding_map
[n
].enc
;
738 return XmlEncoding_Unknown
;
741 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
743 input_buffer
*buffer
;
746 input
->buffer
= NULL
;
748 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
749 if (!buffer
) return E_OUTOFMEMORY
;
751 buffer
->input
= input
;
752 buffer
->code_page
= ~0; /* code page is unknown at this point */
753 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
755 readerinput_free(input
, buffer
);
759 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
761 free_encoded_buffer(input
, &buffer
->utf16
);
762 readerinput_free(input
, buffer
);
766 input
->buffer
= buffer
;
770 static void free_input_buffer(input_buffer
*buffer
)
772 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
773 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
774 readerinput_free(buffer
->input
, buffer
);
777 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
779 if (readerinput
->stream
) {
780 ISequentialStream_Release(readerinput
->stream
);
781 readerinput
->stream
= NULL
;
785 /* Queries already stored interface for IStream/ISequentialStream.
786 Interface supplied on creation will be overwritten */
787 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
791 readerinput_release_stream(readerinput
);
792 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
794 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
799 /* reads a chunk to raw buffer */
800 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
802 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
803 /* to make sure aligned length won't exceed allocated length */
804 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
808 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
809 variable width encodings like UTF-8 */
810 len
= (len
+ 3) & ~3;
811 /* try to use allocated space or grow */
812 if (buffer
->allocated
- buffer
->written
< len
)
814 buffer
->allocated
*= 2;
815 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
816 len
= buffer
->allocated
- buffer
->written
;
820 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
821 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
822 readerinput
->pending
= hr
== E_PENDING
;
823 if (FAILED(hr
)) return hr
;
824 buffer
->written
+= read
;
829 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
830 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
832 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
834 length
*= sizeof(WCHAR
);
835 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
836 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
838 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
839 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
840 buffer
->allocated
= grown_size
;
844 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
846 static const char startA
[] = {'<','?'};
847 static const char commentA
[] = {'<','!'};
848 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
849 unsigned char *ptr
= (unsigned char*)buffer
->data
;
851 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
852 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
853 /* test start byte */
856 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
857 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
858 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
859 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
863 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
865 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
866 static const char utf8bom
[] = {0xef,0xbb,0xbf};
867 static const char utf16lebom
[] = {0xff,0xfe};
870 *enc
= XmlEncoding_Unknown
;
872 if (buffer
->written
<= 3)
874 HRESULT hr
= readerinput_growraw(readerinput
);
875 if (FAILED(hr
)) return hr
;
876 if (buffer
->written
<= 3) return MX_E_INPUTEND
;
879 ptrW
= (WCHAR
*)buffer
->data
;
880 /* try start symbols if we have enough data to do that, input buffer should contain
881 first chunk already */
882 if (readerinput_is_utf8(readerinput
))
883 *enc
= XmlEncoding_UTF8
;
884 else if (*ptrW
== '<')
887 if (*ptrW
== '?' || *ptrW
== '!' || is_namestartchar(*ptrW
))
888 *enc
= XmlEncoding_UTF16
;
890 /* try with BOM now */
891 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
893 buffer
->cur
+= sizeof(utf8bom
);
894 *enc
= XmlEncoding_UTF8
;
896 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
898 buffer
->cur
+= sizeof(utf16lebom
);
899 *enc
= XmlEncoding_UTF16
;
905 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
907 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
908 int len
= buffer
->written
;
910 /* complete single byte char */
911 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
913 /* find start byte of multibyte char */
914 while (--len
&& !(buffer
->data
[len
] & 0xc0))
920 /* Returns byte length of complete char sequence for buffer code page,
921 it's relative to current buffer position which is currently used for BOM handling
923 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
925 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
928 if (readerinput
->buffer
->code_page
== CP_UTF8
)
929 len
= readerinput_get_utf8_convlen(readerinput
);
931 len
= buffer
->written
;
933 TRACE("%d\n", len
- buffer
->cur
);
934 return len
- buffer
->cur
;
937 /* It's possible that raw buffer has some leftovers from last conversion - some char
938 sequence that doesn't represent a full code point. Length argument should be calculated with
939 readerinput_get_convlen(), if it's -1 it will be calculated here. */
940 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
942 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
945 len
= readerinput_get_convlen(readerinput
);
947 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
948 /* everything below cur is lost too */
949 buffer
->written
-= len
+ buffer
->cur
;
950 /* after this point we don't need cur offset really,
951 it's used only to mark where actual data begins when first chunk is read */
955 /* note that raw buffer content is kept */
956 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
958 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
959 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
965 hr
= get_code_page(enc
, &cp
);
966 if (FAILED(hr
)) return;
968 readerinput
->buffer
->code_page
= cp
;
969 len
= readerinput_get_convlen(readerinput
);
971 TRACE("switching to cp %d\n", cp
);
973 /* just copy in this case */
974 if (enc
== XmlEncoding_UTF16
)
976 readerinput_grow(readerinput
, len
);
977 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
978 dest
->written
+= len
*sizeof(WCHAR
);
982 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
983 readerinput_grow(readerinput
, dest_len
);
984 ptr
= (WCHAR
*)dest
->data
;
985 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
987 dest
->written
+= dest_len
*sizeof(WCHAR
);
990 /* shrinks parsed data a buffer begins with */
991 static void reader_shrink(xmlreader
*reader
)
993 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
995 /* avoid to move too often using threshold shrink length */
996 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
998 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
999 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
1001 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
1005 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1006 It won't attempt to shrink but will grow destination buffer if needed */
1007 static HRESULT
reader_more(xmlreader
*reader
)
1009 xmlreaderinput
*readerinput
= reader
->input
;
1010 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1011 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1012 UINT cp
= readerinput
->buffer
->code_page
;
1017 /* get some raw data from stream first */
1018 hr
= readerinput_growraw(readerinput
);
1019 len
= readerinput_get_convlen(readerinput
);
1021 /* just copy for UTF-16 case */
1024 readerinput_grow(readerinput
, len
);
1025 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
1026 dest
->written
+= len
*sizeof(WCHAR
);
1030 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1031 readerinput_grow(readerinput
, dest_len
);
1032 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
1033 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1035 dest
->written
+= dest_len
*sizeof(WCHAR
);
1036 /* get rid of processed data */
1037 readerinput_shrinkraw(readerinput
, len
);
1042 static inline UINT
reader_get_cur(xmlreader
*reader
)
1044 return reader
->input
->buffer
->utf16
.cur
;
1047 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
1049 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1050 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
1051 if (!*ptr
) reader_more(reader
);
1052 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
1055 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
1058 const WCHAR
*ptr
= reader_get_ptr(reader
);
1063 reader_more(reader
);
1064 ptr
= reader_get_ptr(reader
);
1066 if (str
[i
] != ptr
[i
])
1067 return ptr
[i
] - str
[i
];
1073 static void reader_update_position(xmlreader
*reader
, WCHAR ch
)
1076 reader
->position
.line_position
= 1;
1077 else if (ch
== '\n')
1079 reader
->position
.line_number
++;
1080 reader
->position
.line_position
= 1;
1083 reader
->position
.line_position
++;
1086 /* moves cursor n WCHARs forward */
1087 static void reader_skipn(xmlreader
*reader
, int n
)
1089 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1092 while (*(ptr
= reader_get_ptr(reader
)) && n
--)
1094 reader_update_position(reader
, *ptr
);
1099 static inline BOOL
is_wchar_space(WCHAR ch
)
1101 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
1104 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1105 static int reader_skipspaces(xmlreader
*reader
)
1107 const WCHAR
*ptr
= reader_get_ptr(reader
);
1108 UINT start
= reader_get_cur(reader
);
1110 while (is_wchar_space(*ptr
))
1112 reader_skipn(reader
, 1);
1113 ptr
= reader_get_ptr(reader
);
1116 return reader_get_cur(reader
) - start
;
1119 /* [26] VersionNum ::= '1.' [0-9]+ */
1120 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
1122 static const WCHAR onedotW
[] = {'1','.',0};
1126 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
1128 start
= reader_get_cur(reader
);
1130 reader_skipn(reader
, 2);
1132 ptr2
= ptr
= reader_get_ptr(reader
);
1133 while (*ptr
>= '0' && *ptr
<= '9')
1135 reader_skipn(reader
, 1);
1136 ptr
= reader_get_ptr(reader
);
1139 if (ptr2
== ptr
) return WC_E_DIGIT
;
1140 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
1141 TRACE("version=%s\n", debug_strval(reader
, val
));
1145 /* [25] Eq ::= S? '=' S? */
1146 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1148 static const WCHAR eqW
[] = {'=',0};
1149 reader_skipspaces(reader
);
1150 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
1152 reader_skipn(reader
, 1);
1153 reader_skipspaces(reader
);
1157 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1158 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1160 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
1161 struct reader_position position
;
1165 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1167 position
= reader
->position
;
1168 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
1169 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1170 /* skip 'version' */
1171 reader_skipn(reader
, 7);
1173 hr
= reader_parse_eq(reader
);
1174 if (FAILED(hr
)) return hr
;
1176 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1179 reader_skipn(reader
, 1);
1181 hr
= reader_parse_versionnum(reader
, &val
);
1182 if (FAILED(hr
)) return hr
;
1184 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1188 reader_skipn(reader
, 1);
1190 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1193 /* ([A-Za-z0-9._] | '-') */
1194 static inline BOOL
is_wchar_encname(WCHAR ch
)
1196 return ((ch
>= 'A' && ch
<= 'Z') ||
1197 (ch
>= 'a' && ch
<= 'z') ||
1198 (ch
>= '0' && ch
<= '9') ||
1199 (ch
== '.') || (ch
== '_') ||
1203 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1204 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1206 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1210 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1211 return WC_E_ENCNAME
;
1213 val
->start
= reader_get_cur(reader
);
1216 while (is_wchar_encname(*++ptr
))
1220 enc
= parse_encoding_name(start
, len
);
1221 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1225 if (enc
== XmlEncoding_Unknown
)
1226 return WC_E_ENCNAME
;
1228 /* skip encoding name */
1229 reader_skipn(reader
, len
);
1233 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1234 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1236 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1237 struct reader_position position
;
1241 if (!reader_skipspaces(reader
)) return S_FALSE
;
1243 position
= reader
->position
;
1244 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1245 name
.str
= reader_get_ptr(reader
);
1246 name
.start
= reader_get_cur(reader
);
1248 /* skip 'encoding' */
1249 reader_skipn(reader
, 8);
1251 hr
= reader_parse_eq(reader
);
1252 if (FAILED(hr
)) return hr
;
1254 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1257 reader_skipn(reader
, 1);
1259 hr
= reader_parse_encname(reader
, &val
);
1260 if (FAILED(hr
)) return hr
;
1262 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1266 reader_skipn(reader
, 1);
1268 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1271 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1272 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1274 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1275 static const WCHAR yesW
[] = {'y','e','s',0};
1276 static const WCHAR noW
[] = {'n','o',0};
1277 struct reader_position position
;
1282 if (!reader_skipspaces(reader
)) return S_FALSE
;
1284 position
= reader
->position
;
1285 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1286 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1287 /* skip 'standalone' */
1288 reader_skipn(reader
, 10);
1290 hr
= reader_parse_eq(reader
);
1291 if (FAILED(hr
)) return hr
;
1293 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1296 reader_skipn(reader
, 1);
1298 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1299 return WC_E_XMLDECL
;
1301 start
= reader_get_cur(reader
);
1302 /* skip 'yes'|'no' */
1303 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1304 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1305 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1307 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1310 reader_skipn(reader
, 1);
1312 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1315 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1316 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1318 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1319 static const WCHAR declcloseW
[] = {'?','>',0};
1320 struct reader_position position
;
1323 /* check if we have "<?xml " */
1324 if (reader_cmp(reader
, xmldeclW
))
1327 reader_skipn(reader
, 2);
1328 position
= reader
->position
;
1329 reader_skipn(reader
, 3);
1330 hr
= reader_parse_versioninfo(reader
);
1334 hr
= reader_parse_encdecl(reader
);
1338 hr
= reader_parse_sddecl(reader
);
1342 reader_skipspaces(reader
);
1343 if (reader_cmp(reader
, declcloseW
))
1344 return WC_E_XMLDECL
;
1347 reader_skipn(reader
, 2);
1349 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1350 reader
->empty_element
.position
= position
;
1351 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_xml
);
1352 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_xml
);
1357 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1358 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1363 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1365 start
= reader
->resume
[XmlReadResume_Body
];
1366 ptr
= reader_get_ptr(reader
);
1371 reader_skipn(reader
, 4);
1372 reader_shrink(reader
);
1373 ptr
= reader_get_ptr(reader
);
1374 start
= reader_get_cur(reader
);
1375 reader
->nodetype
= XmlNodeType_Comment
;
1376 reader
->resume
[XmlReadResume_Body
] = start
;
1377 reader
->resumestate
= XmlReadResumeState_Comment
;
1378 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1381 /* will exit when there's no more data, it won't attempt to
1382 read more from stream */
1393 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1394 TRACE("%s\n", debug_strval(reader
, &value
));
1396 /* skip rest of markup '->' */
1397 reader_skipn(reader
, 3);
1399 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1400 reader
->resume
[XmlReadResume_Body
] = 0;
1401 reader
->resumestate
= XmlReadResumeState_Initial
;
1405 return WC_E_COMMENT
;
1409 reader_skipn(reader
, 1);
1416 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1417 static inline BOOL
is_char(WCHAR ch
)
1419 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1420 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1421 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1422 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1423 (ch
>= 0xe000 && ch
<= 0xfffd);
1426 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1427 static inline BOOL
is_pubchar(WCHAR ch
)
1429 return (ch
== ' ') ||
1430 (ch
>= 'a' && ch
<= 'z') ||
1431 (ch
>= 'A' && ch
<= 'Z') ||
1432 (ch
>= '0' && ch
<= '9') ||
1433 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1434 (ch
== '=') || (ch
== '?') ||
1435 (ch
== '@') || (ch
== '!') ||
1436 (ch
>= '#' && ch
<= '%') || /* #$% */
1437 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1440 static inline BOOL
is_namestartchar(WCHAR ch
)
1442 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1443 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1444 (ch
>= 0xc0 && ch
<= 0xd6) ||
1445 (ch
>= 0xd8 && ch
<= 0xf6) ||
1446 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1447 (ch
>= 0x370 && ch
<= 0x37d) ||
1448 (ch
>= 0x37f && ch
<= 0x1fff) ||
1449 (ch
>= 0x200c && ch
<= 0x200d) ||
1450 (ch
>= 0x2070 && ch
<= 0x218f) ||
1451 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1452 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1453 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1454 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1455 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1456 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1459 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1460 static inline BOOL
is_ncnamechar(WCHAR ch
)
1462 return (ch
>= 'A' && ch
<= 'Z') ||
1463 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1464 (ch
== '-') || (ch
== '.') ||
1465 (ch
>= '0' && ch
<= '9') ||
1467 (ch
>= 0xc0 && ch
<= 0xd6) ||
1468 (ch
>= 0xd8 && ch
<= 0xf6) ||
1469 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1470 (ch
>= 0x300 && ch
<= 0x36f) ||
1471 (ch
>= 0x370 && ch
<= 0x37d) ||
1472 (ch
>= 0x37f && ch
<= 0x1fff) ||
1473 (ch
>= 0x200c && ch
<= 0x200d) ||
1474 (ch
>= 0x203f && ch
<= 0x2040) ||
1475 (ch
>= 0x2070 && ch
<= 0x218f) ||
1476 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1477 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1478 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1479 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1480 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1481 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1484 static inline BOOL
is_namechar(WCHAR ch
)
1486 return (ch
== ':') || is_ncnamechar(ch
);
1489 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1491 /* When we're on attribute always return attribute type, container node type is kept.
1492 Note that container is not necessarily an element, and attribute doesn't mean it's
1493 an attribute in XML spec terms. */
1494 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1497 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1498 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1499 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1500 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1501 [5] Name ::= NameStartChar (NameChar)* */
1502 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1507 if (reader
->resume
[XmlReadResume_Name
])
1509 start
= reader
->resume
[XmlReadResume_Name
];
1510 ptr
= reader_get_ptr(reader
);
1514 ptr
= reader_get_ptr(reader
);
1515 start
= reader_get_cur(reader
);
1516 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1519 while (is_namechar(*ptr
))
1521 reader_skipn(reader
, 1);
1522 ptr
= reader_get_ptr(reader
);
1525 if (is_reader_pending(reader
))
1527 reader
->resume
[XmlReadResume_Name
] = start
;
1531 reader
->resume
[XmlReadResume_Name
] = 0;
1533 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1534 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1539 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1540 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1542 static const WCHAR xmlW
[] = {'x','m','l'};
1543 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1549 hr
= reader_parse_name(reader
, &name
);
1550 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1552 /* now that we got name check for illegal content */
1553 if (strval_eq(reader
, &name
, &xmlval
))
1554 return WC_E_LEADINGXML
;
1556 /* PITarget can't be a qualified name */
1557 ptr
= reader_get_strptr(reader
, &name
);
1558 for (i
= 0; i
< name
.len
; i
++)
1560 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1562 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1567 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1568 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1575 switch (reader
->resumestate
)
1577 case XmlReadResumeState_Initial
:
1579 reader_skipn(reader
, 2);
1580 reader_shrink(reader
);
1581 reader
->resumestate
= XmlReadResumeState_PITarget
;
1582 case XmlReadResumeState_PITarget
:
1583 hr
= reader_parse_pitarget(reader
, &target
);
1584 if (FAILED(hr
)) return hr
;
1585 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1586 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1587 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1588 reader
->resumestate
= XmlReadResumeState_PIBody
;
1589 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1594 start
= reader
->resume
[XmlReadResume_Body
];
1595 ptr
= reader_get_ptr(reader
);
1602 UINT cur
= reader_get_cur(reader
);
1605 /* strip all leading whitespace chars */
1608 ptr
= reader_get_ptr2(reader
, start
);
1609 if (!is_wchar_space(*ptr
)) break;
1613 reader_init_strvalue(start
, cur
-start
, &value
);
1616 reader_skipn(reader
, 2);
1617 TRACE("%s\n", debug_strval(reader
, &value
));
1618 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1619 reader
->resumestate
= XmlReadResumeState_Initial
;
1620 reader
->resume
[XmlReadResume_Body
] = 0;
1621 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1626 reader_skipn(reader
, 1);
1627 ptr
= reader_get_ptr(reader
);
1633 /* This one is used to parse significant whitespace nodes, like in Misc production */
1634 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1636 switch (reader
->resumestate
)
1638 case XmlReadResumeState_Initial
:
1639 reader_shrink(reader
);
1640 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1641 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1642 reader
->nodetype
= XmlNodeType_Whitespace
;
1643 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1644 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1645 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1647 case XmlReadResumeState_Whitespace
:
1652 reader_skipspaces(reader
);
1653 if (is_reader_pending(reader
)) return S_OK
;
1655 start
= reader
->resume
[XmlReadResume_Body
];
1656 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1657 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1658 TRACE("%s\n", debug_strval(reader
, &value
));
1659 reader
->resumestate
= XmlReadResumeState_Initial
;
1668 /* [27] Misc ::= Comment | PI | S */
1669 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1671 HRESULT hr
= S_FALSE
;
1673 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1675 hr
= reader_more(reader
);
1676 if (FAILED(hr
)) return hr
;
1678 /* finish current node */
1679 switch (reader
->resumestate
)
1681 case XmlReadResumeState_PITarget
:
1682 case XmlReadResumeState_PIBody
:
1683 return reader_parse_pi(reader
);
1684 case XmlReadResumeState_Comment
:
1685 return reader_parse_comment(reader
);
1686 case XmlReadResumeState_Whitespace
:
1687 return reader_parse_whitespace(reader
);
1689 ERR("unknown resume state %d\n", reader
->resumestate
);
1695 const WCHAR
*cur
= reader_get_ptr(reader
);
1697 if (is_wchar_space(*cur
))
1698 hr
= reader_parse_whitespace(reader
);
1699 else if (!reader_cmp(reader
, commentW
))
1700 hr
= reader_parse_comment(reader
);
1701 else if (!reader_cmp(reader
, piW
))
1702 hr
= reader_parse_pi(reader
);
1706 if (hr
!= S_FALSE
) return hr
;
1712 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1713 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1715 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1718 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1721 reader_skipn(reader
, 1);
1723 cur
= reader_get_ptr(reader
);
1724 start
= reader_get_cur(reader
);
1725 while (is_char(*cur
) && *cur
!= quote
)
1727 reader_skipn(reader
, 1);
1728 cur
= reader_get_ptr(reader
);
1730 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1731 if (*cur
== quote
) reader_skipn(reader
, 1);
1733 TRACE("%s\n", debug_strval(reader
, literal
));
1737 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1738 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1739 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1741 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1744 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1747 reader_skipn(reader
, 1);
1749 start
= reader_get_cur(reader
);
1750 cur
= reader_get_ptr(reader
);
1751 while (is_pubchar(*cur
) && *cur
!= quote
)
1753 reader_skipn(reader
, 1);
1754 cur
= reader_get_ptr(reader
);
1756 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1757 if (*cur
== quote
) reader_skipn(reader
, 1);
1759 TRACE("%s\n", debug_strval(reader
, literal
));
1763 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1764 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1766 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1767 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1768 struct reader_position position
= reader
->position
;
1773 if (!reader_cmp(reader
, publicW
)) {
1777 reader_skipn(reader
, 6);
1778 cnt
= reader_skipspaces(reader
);
1779 if (!cnt
) return WC_E_WHITESPACE
;
1781 hr
= reader_parse_pub_literal(reader
, &pub
);
1782 if (FAILED(hr
)) return hr
;
1784 reader_init_cstrvalue(publicW
, strlenW(publicW
), &name
);
1785 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &pub
, &position
, 0);
1786 if (FAILED(hr
)) return hr
;
1788 cnt
= reader_skipspaces(reader
);
1789 if (!cnt
) return S_OK
;
1791 /* optional system id */
1792 hr
= reader_parse_sys_literal(reader
, &sys
);
1793 if (FAILED(hr
)) return S_OK
;
1795 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1796 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1797 if (FAILED(hr
)) return hr
;
1800 } else if (!reader_cmp(reader
, systemW
)) {
1802 reader_skipn(reader
, 6);
1803 cnt
= reader_skipspaces(reader
);
1804 if (!cnt
) return WC_E_WHITESPACE
;
1806 hr
= reader_parse_sys_literal(reader
, &sys
);
1807 if (FAILED(hr
)) return hr
;
1809 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1810 return reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1816 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1817 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1819 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1824 /* check if we have "<!DOCTYPE" */
1825 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1826 reader_shrink(reader
);
1828 /* DTD processing is not allowed by default */
1829 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1831 reader_skipn(reader
, 9);
1832 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1835 hr
= reader_parse_name(reader
, &name
);
1836 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1838 reader_skipspaces(reader
);
1840 hr
= reader_parse_externalid(reader
);
1841 if (FAILED(hr
)) return hr
;
1843 reader_skipspaces(reader
);
1845 cur
= reader_get_ptr(reader
);
1848 FIXME("internal subset parsing not implemented\n");
1853 reader_skipn(reader
, 1);
1855 reader
->nodetype
= XmlNodeType_DocumentType
;
1856 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1857 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1862 /* [11 NS] LocalPart ::= NCName */
1863 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
, BOOL check_for_separator
)
1868 if (reader
->resume
[XmlReadResume_Local
])
1870 start
= reader
->resume
[XmlReadResume_Local
];
1871 ptr
= reader_get_ptr(reader
);
1875 ptr
= reader_get_ptr(reader
);
1876 start
= reader_get_cur(reader
);
1879 while (is_ncnamechar(*ptr
))
1881 reader_skipn(reader
, 1);
1882 ptr
= reader_get_ptr(reader
);
1885 if (check_for_separator
&& *ptr
== ':')
1886 return NC_E_QNAMECOLON
;
1888 if (is_reader_pending(reader
))
1890 reader
->resume
[XmlReadResume_Local
] = start
;
1894 reader
->resume
[XmlReadResume_Local
] = 0;
1896 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1901 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1902 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1903 [9 NS] UnprefixedName ::= LocalPart
1904 [10 NS] Prefix ::= NCName */
1905 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1911 if (reader
->resume
[XmlReadResume_Name
])
1913 start
= reader
->resume
[XmlReadResume_Name
];
1914 ptr
= reader_get_ptr(reader
);
1918 ptr
= reader_get_ptr(reader
);
1919 start
= reader_get_cur(reader
);
1920 reader
->resume
[XmlReadResume_Name
] = start
;
1921 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1924 if (reader
->resume
[XmlReadResume_Local
])
1926 hr
= reader_parse_local(reader
, local
, FALSE
);
1927 if (FAILED(hr
)) return hr
;
1929 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1930 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1935 /* skip prefix part */
1936 while (is_ncnamechar(*ptr
))
1938 reader_skipn(reader
, 1);
1939 ptr
= reader_get_ptr(reader
);
1942 if (is_reader_pending(reader
)) return E_PENDING
;
1944 /* got a qualified name */
1947 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
1950 reader_skipn(reader
, 1);
1951 hr
= reader_parse_local(reader
, local
, TRUE
);
1952 if (FAILED(hr
)) return hr
;
1956 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
1957 reader_init_strvalue(0, 0, prefix
);
1962 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
1964 TRACE("ncname %s\n", debug_strval(reader
, local
));
1966 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
1968 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
1971 reader
->resume
[XmlReadResume_Name
] = 0;
1972 reader
->resume
[XmlReadResume_Local
] = 0;
1977 /* Applies normalization rules to a single char, used for attribute values.
1979 Rules include 2 steps:
1981 1) replacing \r\n with a single \n;
1982 2) replacing all whitespace chars with ' '.
1985 static void reader_normalize_space(xmlreader
*reader
, WCHAR
*ptr
)
1987 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1989 if (!is_wchar_space(*ptr
)) return;
1991 if (*ptr
== '\r' && *(ptr
+1) == '\n')
1993 int len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - 2*sizeof(WCHAR
);
1994 memmove(ptr
+1, ptr
+2, len
);
1999 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
2001 static const WCHAR entltW
[] = {'l','t'};
2002 static const WCHAR entgtW
[] = {'g','t'};
2003 static const WCHAR entampW
[] = {'a','m','p'};
2004 static const WCHAR entaposW
[] = {'a','p','o','s'};
2005 static const WCHAR entquotW
[] = {'q','u','o','t'};
2006 static const strval lt
= { (WCHAR
*)entltW
, 2 };
2007 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
2008 static const strval amp
= { (WCHAR
*)entampW
, 3 };
2009 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
2010 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
2011 WCHAR
*str
= reader_get_strptr(reader
, name
);
2016 if (strval_eq(reader
, name
, <
)) return '<';
2019 if (strval_eq(reader
, name
, >
)) return '>';
2022 if (strval_eq(reader
, name
, &
))
2024 else if (strval_eq(reader
, name
, &apos
))
2028 if (strval_eq(reader
, name
, "
)) return '\"';
2037 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2038 [67] Reference ::= EntityRef | CharRef
2039 [68] EntityRef ::= '&' Name ';' */
2040 static HRESULT
reader_parse_reference(xmlreader
*reader
)
2042 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
2043 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
2044 UINT cur
= reader_get_cur(reader
);
2049 reader_skipn(reader
, 1);
2050 ptr
= reader_get_ptr(reader
);
2054 reader_skipn(reader
, 1);
2055 ptr
= reader_get_ptr(reader
);
2057 /* hex char or decimal */
2060 reader_skipn(reader
, 1);
2061 ptr
= reader_get_ptr(reader
);
2065 if ((*ptr
>= '0' && *ptr
<= '9'))
2066 ch
= ch
*16 + *ptr
- '0';
2067 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
2068 ch
= ch
*16 + *ptr
- 'a' + 10;
2069 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
2070 ch
= ch
*16 + *ptr
- 'A' + 10;
2072 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
2073 reader_skipn(reader
, 1);
2074 ptr
= reader_get_ptr(reader
);
2081 if ((*ptr
>= '0' && *ptr
<= '9'))
2083 ch
= ch
*10 + *ptr
- '0';
2084 reader_skipn(reader
, 1);
2085 ptr
= reader_get_ptr(reader
);
2088 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
2092 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
2095 if (is_wchar_space(ch
)) ch
= ' ';
2097 ptr
= reader_get_ptr(reader
);
2098 start
= reader_get_ptr2(reader
, cur
);
2099 len
= buffer
->written
- ((char *)ptr
- buffer
->data
);
2100 memmove(start
+ 1, ptr
+ 1, len
);
2102 buffer
->written
-= (reader_get_cur(reader
) - cur
) * sizeof(WCHAR
);
2103 buffer
->cur
= cur
+ 1;
2112 hr
= reader_parse_name(reader
, &name
);
2113 if (FAILED(hr
)) return hr
;
2115 ptr
= reader_get_ptr(reader
);
2116 if (*ptr
!= ';') return WC_E_SEMICOLON
;
2118 /* predefined entities resolve to a single character */
2119 ch
= get_predefined_entity(reader
, &name
);
2122 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
2123 memmove(start
+1, ptr
+1, len
);
2124 buffer
->cur
= cur
+ 1;
2130 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
2131 return WC_E_UNDECLAREDENTITY
;
2139 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2140 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
2145 ptr
= reader_get_ptr(reader
);
2147 /* skip opening quote */
2149 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
2150 reader_skipn(reader
, 1);
2152 ptr
= reader_get_ptr(reader
);
2153 start
= reader_get_cur(reader
);
2156 if (*ptr
== '<') return WC_E_LESSTHAN
;
2160 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
2161 /* skip closing quote */
2162 reader_skipn(reader
, 1);
2168 HRESULT hr
= reader_parse_reference(reader
);
2169 if (FAILED(hr
)) return hr
;
2173 reader_normalize_space(reader
, ptr
);
2174 reader_skipn(reader
, 1);
2176 ptr
= reader_get_ptr(reader
);
2182 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2183 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2184 [3 NS] DefaultAttName ::= 'xmlns'
2185 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2186 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2188 struct reader_position position
= reader
->position
;
2189 strval prefix
, local
, qname
, value
;
2190 enum attribute_flags flags
= 0;
2193 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2194 if (FAILED(hr
)) return hr
;
2196 if (strval_eq(reader
, &prefix
, &strval_xmlns
))
2197 flags
|= ATTRIBUTE_NS_DEFINITION
;
2199 if (strval_eq(reader
, &qname
, &strval_xmlns
))
2200 flags
|= ATTRIBUTE_DEFAULT_NS_DEFINITION
;
2202 hr
= reader_parse_eq(reader
);
2203 if (FAILED(hr
)) return hr
;
2205 hr
= reader_parse_attvalue(reader
, &value
);
2206 if (FAILED(hr
)) return hr
;
2208 if (flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
2209 reader_push_ns(reader
, &local
, &value
, !!(flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
));
2211 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2212 return reader_add_attr(reader
, &prefix
, &local
, &qname
, &value
, &position
, flags
);
2215 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2216 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2217 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
2219 struct reader_position position
= reader
->position
;
2222 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2223 if (FAILED(hr
)) return hr
;
2227 static const WCHAR endW
[] = {'/','>',0};
2229 reader_skipspaces(reader
);
2232 if ((reader
->is_empty_element
= !reader_cmp(reader
, endW
)))
2234 struct element
*element
= &reader
->empty_element
;
2237 reader_skipn(reader
, 2);
2239 reader_free_strvalued(reader
, &element
->qname
);
2240 reader_free_strvalued(reader
, &element
->localname
);
2242 element
->prefix
= *prefix
;
2243 reader_strvaldup(reader
, qname
, &element
->qname
);
2244 reader_strvaldup(reader
, local
, &element
->localname
);
2245 element
->position
= position
;
2246 reader_mark_ns_nodes(reader
, element
);
2250 /* got a start tag */
2251 if (!reader_cmp(reader
, gtW
))
2254 reader_skipn(reader
, 1);
2255 return reader_push_element(reader
, prefix
, local
, qname
, &position
);
2258 hr
= reader_parse_attribute(reader
);
2259 if (FAILED(hr
)) return hr
;
2265 /* [39] element ::= EmptyElemTag | STag content ETag */
2266 static HRESULT
reader_parse_element(xmlreader
*reader
)
2270 switch (reader
->resumestate
)
2272 case XmlReadResumeState_Initial
:
2273 /* check if we are really on element */
2274 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2277 reader_skipn(reader
, 1);
2279 reader_shrink(reader
);
2280 reader
->resumestate
= XmlReadResumeState_STag
;
2281 case XmlReadResumeState_STag
:
2283 strval qname
, prefix
, local
;
2285 /* this handles empty elements too */
2286 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
);
2287 if (FAILED(hr
)) return hr
;
2289 /* FIXME: need to check for defined namespace to reject invalid prefix */
2291 /* if we got empty element and stack is empty go straight to Misc */
2292 if (reader
->is_empty_element
&& list_empty(&reader
->elements
))
2293 reader
->instate
= XmlReadInState_MiscEnd
;
2295 reader
->instate
= XmlReadInState_Content
;
2297 reader
->nodetype
= XmlNodeType_Element
;
2298 reader
->resumestate
= XmlReadResumeState_Initial
;
2299 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2300 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2301 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2302 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
2312 /* [13 NS] ETag ::= '</' QName S? '>' */
2313 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2315 struct reader_position position
;
2316 strval prefix
, local
, qname
;
2317 struct element
*element
;
2321 reader_skipn(reader
, 2);
2323 position
= reader
->position
;
2324 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2325 if (FAILED(hr
)) return hr
;
2327 reader_skipspaces(reader
);
2329 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2332 reader_skipn(reader
, 1);
2334 /* Element stack should never be empty at this point, cause we shouldn't get to
2335 content parsing if it's empty. */
2336 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2337 if (!strval_eq(reader
, &element
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2339 /* update position stored for start tag, we won't be using it */
2340 element
->position
= position
;
2342 reader
->nodetype
= XmlNodeType_EndElement
;
2343 reader
->is_empty_element
= FALSE
;
2344 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2349 /* [18] CDSect ::= CDStart CData CDEnd
2350 [19] CDStart ::= '<![CDATA['
2351 [20] CData ::= (Char* - (Char* ']]>' Char*))
2352 [21] CDEnd ::= ']]>' */
2353 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2358 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2360 start
= reader
->resume
[XmlReadResume_Body
];
2361 ptr
= reader_get_ptr(reader
);
2365 /* skip markup '<![CDATA[' */
2366 reader_skipn(reader
, 9);
2367 reader_shrink(reader
);
2368 ptr
= reader_get_ptr(reader
);
2369 start
= reader_get_cur(reader
);
2370 reader
->nodetype
= XmlNodeType_CDATA
;
2371 reader
->resume
[XmlReadResume_Body
] = start
;
2372 reader
->resumestate
= XmlReadResumeState_CDATA
;
2373 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2378 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2382 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2385 reader_skipn(reader
, 3);
2386 TRACE("%s\n", debug_strval(reader
, &value
));
2388 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2389 reader
->resume
[XmlReadResume_Body
] = 0;
2390 reader
->resumestate
= XmlReadResumeState_Initial
;
2395 /* Value normalization is not fully implemented, rules are:
2397 - single '\r' -> '\n';
2398 - sequence '\r\n' -> '\n', in this case value length changes;
2400 if (*ptr
== '\r') *ptr
= '\n';
2401 reader_skipn(reader
, 1);
2409 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2410 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2412 struct reader_position position
;
2416 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2418 start
= reader
->resume
[XmlReadResume_Body
];
2419 ptr
= reader_get_ptr(reader
);
2423 reader_shrink(reader
);
2424 ptr
= reader_get_ptr(reader
);
2425 start
= reader_get_cur(reader
);
2426 /* There's no text */
2427 if (!*ptr
|| *ptr
== '<') return S_OK
;
2428 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2429 reader
->resume
[XmlReadResume_Body
] = start
;
2430 reader
->resumestate
= XmlReadResumeState_CharData
;
2431 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2434 position
= reader
->position
;
2437 static const WCHAR ampW
[] = {'&',0};
2439 /* CDATA closing sequence ']]>' is not allowed */
2440 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2441 return WC_E_CDSECTEND
;
2443 /* Found next markup part */
2448 reader
->empty_element
.position
= position
;
2449 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2450 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2451 reader
->resume
[XmlReadResume_Body
] = 0;
2452 reader
->resumestate
= XmlReadResumeState_Initial
;
2456 /* this covers a case when text has leading whitespace chars */
2457 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2459 if (!reader_cmp(reader
, ampW
))
2460 reader_parse_reference(reader
);
2462 reader_skipn(reader
, 1);
2464 ptr
= reader_get_ptr(reader
);
2470 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2471 static HRESULT
reader_parse_content(xmlreader
*reader
)
2473 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2474 static const WCHAR etagW
[] = {'<','/',0};
2476 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2478 switch (reader
->resumestate
)
2480 case XmlReadResumeState_CDATA
:
2481 return reader_parse_cdata(reader
);
2482 case XmlReadResumeState_Comment
:
2483 return reader_parse_comment(reader
);
2484 case XmlReadResumeState_PIBody
:
2485 case XmlReadResumeState_PITarget
:
2486 return reader_parse_pi(reader
);
2487 case XmlReadResumeState_CharData
:
2488 return reader_parse_chardata(reader
);
2490 ERR("unknown resume state %d\n", reader
->resumestate
);
2494 reader_shrink(reader
);
2496 /* handle end tag here, it indicates end of content as well */
2497 if (!reader_cmp(reader
, etagW
))
2498 return reader_parse_endtag(reader
);
2500 if (!reader_cmp(reader
, commentW
))
2501 return reader_parse_comment(reader
);
2503 if (!reader_cmp(reader
, piW
))
2504 return reader_parse_pi(reader
);
2506 if (!reader_cmp(reader
, cdstartW
))
2507 return reader_parse_cdata(reader
);
2509 if (!reader_cmp(reader
, ltW
))
2510 return reader_parse_element(reader
);
2512 /* what's left must be CharData */
2513 return reader_parse_chardata(reader
);
2516 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2518 XmlNodeType nodetype
= reader_get_nodetype(reader
);
2521 if (!is_reader_pending(reader
))
2522 reader_clear_attrs(reader
);
2524 /* When moving from EndElement or empty element, pop its own namespace definitions */
2527 case XmlNodeType_Attribute
:
2528 reader_dec_depth(reader
);
2530 case XmlNodeType_Element
:
2531 if (reader
->is_empty_element
)
2532 reader_pop_ns_nodes(reader
, &reader
->empty_element
);
2533 else if (FAILED(hr
= reader_inc_depth(reader
)))
2536 case XmlNodeType_EndElement
:
2537 reader_pop_element(reader
);
2538 reader_dec_depth(reader
);
2546 switch (reader
->instate
)
2548 /* if it's a first call for a new input we need to detect stream encoding */
2549 case XmlReadInState_Initial
:
2553 hr
= readerinput_growraw(reader
->input
);
2554 if (FAILED(hr
)) return hr
;
2556 reader
->position
.line_number
= 1;
2557 reader
->position
.line_position
= 1;
2559 /* try to detect encoding by BOM or data and set input code page */
2560 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2561 TRACE("detected encoding %s, 0x%08x\n", enc
== XmlEncoding_Unknown
? "(unknown)" :
2562 debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2563 if (FAILED(hr
)) return hr
;
2565 /* always switch first time cause we have to put something in */
2566 readerinput_switchencoding(reader
->input
, enc
);
2568 /* parse xml declaration */
2569 hr
= reader_parse_xmldecl(reader
);
2570 if (FAILED(hr
)) return hr
;
2572 readerinput_shrinkraw(reader
->input
, -1);
2573 reader
->instate
= XmlReadInState_Misc_DTD
;
2574 if (hr
== S_OK
) return hr
;
2577 case XmlReadInState_Misc_DTD
:
2578 hr
= reader_parse_misc(reader
);
2579 if (FAILED(hr
)) return hr
;
2582 reader
->instate
= XmlReadInState_DTD
;
2586 case XmlReadInState_DTD
:
2587 hr
= reader_parse_dtd(reader
);
2588 if (FAILED(hr
)) return hr
;
2592 reader
->instate
= XmlReadInState_DTD_Misc
;
2596 reader
->instate
= XmlReadInState_Element
;
2598 case XmlReadInState_DTD_Misc
:
2599 hr
= reader_parse_misc(reader
);
2600 if (FAILED(hr
)) return hr
;
2603 reader
->instate
= XmlReadInState_Element
;
2607 case XmlReadInState_Element
:
2608 return reader_parse_element(reader
);
2609 case XmlReadInState_Content
:
2610 return reader_parse_content(reader
);
2611 case XmlReadInState_MiscEnd
:
2612 hr
= reader_parse_misc(reader
);
2613 if (FAILED(hr
)) return hr
;
2617 reader
->instate
= XmlReadInState_Eof
;
2618 reader
->state
= XmlReadState_EndOfFile
;
2619 reader
->nodetype
= XmlNodeType_None
;
2622 case XmlReadInState_Eof
:
2625 FIXME("internal state %d not handled\n", reader
->instate
);
2633 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2635 xmlreader
*This
= impl_from_IXmlReader(iface
);
2637 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2639 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2640 IsEqualGUID(riid
, &IID_IXmlReader
))
2646 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2648 return E_NOINTERFACE
;
2651 IXmlReader_AddRef(iface
);
2656 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2658 xmlreader
*This
= impl_from_IXmlReader(iface
);
2659 ULONG ref
= InterlockedIncrement(&This
->ref
);
2660 TRACE("(%p)->(%d)\n", This
, ref
);
2664 static void reader_clear_ns(xmlreader
*reader
)
2666 struct ns
*ns
, *ns2
;
2668 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
2669 reader_free_strvalued(reader
, &ns
->prefix
);
2670 reader_free_strvalued(reader
, &ns
->uri
);
2671 reader_free(reader
, ns
);
2674 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->nsdef
, struct ns
, entry
) {
2675 reader_free_strvalued(reader
, &ns
->uri
);
2676 reader_free(reader
, ns
);
2680 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2682 xmlreader
*This
= impl_from_IXmlReader(iface
);
2683 LONG ref
= InterlockedDecrement(&This
->ref
);
2685 TRACE("(%p)->(%d)\n", This
, ref
);
2689 IMalloc
*imalloc
= This
->imalloc
;
2690 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2691 if (This
->resolver
) IXmlResolver_Release(This
->resolver
);
2692 if (This
->mlang
) IUnknown_Release(This
->mlang
);
2693 reader_clear_attrs(This
);
2694 reader_clear_ns(This
);
2695 reader_clear_elements(This
);
2696 reader_free_strvalues(This
);
2697 reader_free(This
, This
);
2698 if (imalloc
) IMalloc_Release(imalloc
);
2704 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2706 xmlreader
*This
= impl_from_IXmlReader(iface
);
2707 IXmlReaderInput
*readerinput
;
2710 TRACE("(%p)->(%p)\n", This
, input
);
2714 readerinput_release_stream(This
->input
);
2715 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2719 This
->position
.line_number
= 0;
2720 This
->position
.line_position
= 0;
2721 reader_clear_elements(This
);
2723 This
->nodetype
= XmlNodeType_None
;
2724 This
->resumestate
= XmlReadResumeState_Initial
;
2725 memset(This
->resume
, 0, sizeof(This
->resume
));
2727 /* just reset current input */
2730 This
->state
= XmlReadState_Initial
;
2734 /* now try IXmlReaderInput, ISequentialStream, IStream */
2735 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2738 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2739 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2742 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2743 readerinput
, readerinput
->lpVtbl
);
2744 IUnknown_Release(readerinput
);
2750 if (hr
!= S_OK
|| !readerinput
)
2752 /* create IXmlReaderInput basing on supplied interface */
2753 hr
= CreateXmlReaderInputWithEncodingName(input
,
2754 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2755 if (hr
!= S_OK
) return hr
;
2756 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2759 /* set stream for supplied IXmlReaderInput */
2760 hr
= readerinput_query_for_stream(This
->input
);
2763 This
->state
= XmlReadState_Initial
;
2764 This
->instate
= XmlReadInState_Initial
;
2770 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2772 xmlreader
*This
= impl_from_IXmlReader(iface
);
2774 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2776 if (!value
) return E_INVALIDARG
;
2780 case XmlReaderProperty_MultiLanguage
:
2781 *value
= (LONG_PTR
)This
->mlang
;
2783 IUnknown_AddRef(This
->mlang
);
2785 case XmlReaderProperty_XmlResolver
:
2786 *value
= (LONG_PTR
)This
->resolver
;
2788 IXmlResolver_AddRef(This
->resolver
);
2790 case XmlReaderProperty_DtdProcessing
:
2791 *value
= This
->dtdmode
;
2793 case XmlReaderProperty_ReadState
:
2794 *value
= This
->state
;
2796 case XmlReaderProperty_MaxElementDepth
:
2797 *value
= This
->max_depth
;
2800 FIXME("Unimplemented property (%u)\n", property
);
2807 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2809 xmlreader
*This
= impl_from_IXmlReader(iface
);
2811 TRACE("(%p)->(%s 0x%lx)\n", This
, debugstr_reader_prop(property
), value
);
2815 case XmlReaderProperty_MultiLanguage
:
2817 IUnknown_Release(This
->mlang
);
2818 This
->mlang
= (IUnknown
*)value
;
2820 IUnknown_AddRef(This
->mlang
);
2822 FIXME("Ignoring MultiLanguage %p\n", This
->mlang
);
2824 case XmlReaderProperty_XmlResolver
:
2826 IXmlResolver_Release(This
->resolver
);
2827 This
->resolver
= (IXmlResolver
*)value
;
2829 IXmlResolver_AddRef(This
->resolver
);
2831 case XmlReaderProperty_DtdProcessing
:
2832 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2833 This
->dtdmode
= value
;
2835 case XmlReaderProperty_MaxElementDepth
:
2836 This
->max_depth
= value
;
2839 FIXME("Unimplemented property (%u)\n", property
);
2846 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2848 xmlreader
*This
= impl_from_IXmlReader(iface
);
2849 XmlNodeType oldtype
= This
->nodetype
;
2853 TRACE("(%p)->(%p)\n", This
, nodetype
);
2858 switch (This
->state
)
2860 case XmlReadState_Closed
:
2863 case XmlReadState_Error
:
2867 hr
= reader_parse_nextnode(This
);
2868 if (SUCCEEDED(hr
) && oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2869 This
->state
= XmlReadState_Interactive
;
2873 This
->state
= XmlReadState_Error
;
2874 This
->nodetype
= XmlNodeType_None
;
2880 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2881 *nodetype
= This
->nodetype
;
2886 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2888 xmlreader
*This
= impl_from_IXmlReader(iface
);
2890 TRACE("(%p)->(%p)\n", This
, node_type
);
2893 return E_INVALIDARG
;
2895 *node_type
= reader_get_nodetype(This
);
2896 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2899 static HRESULT
reader_move_to_first_attribute(xmlreader
*reader
)
2901 if (!reader
->attr_count
)
2905 reader_inc_depth(reader
);
2907 reader
->attr
= LIST_ENTRY(list_head(&reader
->attrs
), struct attribute
, entry
);
2908 reader_set_strvalue(reader
, StringValue_Prefix
, &reader
->attr
->prefix
);
2909 reader_set_strvalue(reader
, StringValue_LocalName
, &reader
->attr
->localname
);
2910 reader_set_strvalue(reader
, StringValue_QualifiedName
, &reader
->attr
->qname
);
2911 reader_set_strvalue(reader
, StringValue_Value
, &reader
->attr
->value
);
2916 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2918 xmlreader
*This
= impl_from_IXmlReader(iface
);
2920 TRACE("(%p)\n", This
);
2922 return reader_move_to_first_attribute(This
);
2925 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2927 xmlreader
*This
= impl_from_IXmlReader(iface
);
2928 const struct list
*next
;
2930 TRACE("(%p)\n", This
);
2932 if (!This
->attr_count
) return S_FALSE
;
2935 return reader_move_to_first_attribute(This
);
2937 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2940 This
->attr
= LIST_ENTRY(next
, struct attribute
, entry
);
2941 reader_set_strvalue(This
, StringValue_Prefix
, &This
->attr
->prefix
);
2942 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2943 reader_set_strvalue(This
, StringValue_QualifiedName
, &This
->attr
->qname
);
2944 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2947 return next
? S_OK
: S_FALSE
;
2950 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
2952 LPCWSTR namespaceUri
)
2954 FIXME("(%p %p %p): stub\n", iface
, local_name
, namespaceUri
);
2958 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
2960 xmlreader
*This
= impl_from_IXmlReader(iface
);
2962 TRACE("(%p)\n", This
);
2964 if (!This
->attr_count
) return S_FALSE
;
2967 reader_dec_depth(This
);
2971 /* FIXME: support other node types with 'attributes' like DTD */
2972 if (This
->is_empty_element
) {
2973 reader_set_strvalue(This
, StringValue_Prefix
, &This
->empty_element
.prefix
);
2974 reader_set_strvalue(This
, StringValue_LocalName
, &This
->empty_element
.localname
);
2975 reader_set_strvalue(This
, StringValue_QualifiedName
, &This
->empty_element
.qname
);
2978 struct element
*element
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
2980 reader_set_strvalue(This
, StringValue_Prefix
, &element
->prefix
);
2981 reader_set_strvalue(This
, StringValue_LocalName
, &element
->localname
);
2982 reader_set_strvalue(This
, StringValue_QualifiedName
, &element
->qname
);
2985 reader_set_strvalue(This
, StringValue_Value
, &strval_empty
);
2990 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2992 xmlreader
*This
= impl_from_IXmlReader(iface
);
2993 struct attribute
*attribute
= This
->attr
;
2994 struct element
*element
;
2997 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3002 switch (reader_get_nodetype(This
))
3004 case XmlNodeType_Text
:
3005 case XmlNodeType_CDATA
:
3006 case XmlNodeType_Comment
:
3007 case XmlNodeType_Whitespace
:
3011 case XmlNodeType_Element
:
3012 case XmlNodeType_EndElement
:
3013 element
= reader_get_element(This
);
3014 if (element
->prefix
.len
)
3016 *name
= element
->qname
.str
;
3017 *len
= element
->qname
.len
;
3021 *name
= element
->localname
.str
;
3022 *len
= element
->localname
.len
;
3025 case XmlNodeType_Attribute
:
3026 if (attribute
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3030 } else if (attribute
->prefix
.len
)
3032 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3033 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3037 *name
= This
->strvalues
[StringValue_LocalName
].str
;
3038 *len
= This
->strvalues
[StringValue_LocalName
].len
;
3042 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3043 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3050 static struct ns
*reader_lookup_ns(xmlreader
*reader
, const strval
*prefix
)
3052 struct list
*nslist
= prefix
? &reader
->ns
: &reader
->nsdef
;
3055 LIST_FOR_EACH_ENTRY_REV(ns
, nslist
, struct ns
, entry
) {
3056 if (strval_eq(reader
, prefix
, &ns
->prefix
))
3063 static struct ns
*reader_lookup_nsdef(xmlreader
*reader
)
3065 if (list_empty(&reader
->nsdef
))
3068 return LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
3071 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
, const WCHAR
**uri
, UINT
*len
)
3073 xmlreader
*This
= impl_from_IXmlReader(iface
);
3074 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3075 XmlNodeType nodetype
;
3079 TRACE("(%p %p %p)\n", iface
, uri
, len
);
3087 switch ((nodetype
= reader_get_nodetype(This
)))
3089 case XmlNodeType_Attribute
:
3091 static const WCHAR xmlns_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3092 '2','0','0','0','/','x','m','l','n','s','/',0};
3093 static const WCHAR xml_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3094 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3095 const strval
*local
= &This
->strvalues
[StringValue_LocalName
];
3097 /* check for reserved prefixes first */
3098 if ((strval_eq(This
, prefix
, &strval_empty
) && strval_eq(This
, local
, &strval_xmlns
)) ||
3099 strval_eq(This
, prefix
, &strval_xmlns
))
3102 *len
= sizeof(xmlns_uriW
)/sizeof(xmlns_uriW
[0]) - 1;
3104 else if (strval_eq(This
, prefix
, &strval_xml
)) {
3106 *len
= sizeof(xml_uriW
)/sizeof(xml_uriW
[0]) - 1;
3110 ns
= reader_lookup_ns(This
, prefix
);
3122 case XmlNodeType_Element
:
3123 case XmlNodeType_EndElement
:
3125 ns
= reader_lookup_ns(This
, prefix
);
3127 /* pick top default ns if any */
3129 ns
= reader_lookup_nsdef(This
);
3141 case XmlNodeType_Text
:
3142 case XmlNodeType_CDATA
:
3143 case XmlNodeType_ProcessingInstruction
:
3144 case XmlNodeType_Comment
:
3145 case XmlNodeType_Whitespace
:
3146 case XmlNodeType_XmlDeclaration
:
3151 FIXME("Unhandled node type %d\n", nodetype
);
3158 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3160 xmlreader
*This
= impl_from_IXmlReader(iface
);
3161 struct element
*element
;
3164 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3169 switch (reader_get_nodetype(This
))
3171 case XmlNodeType_Text
:
3172 case XmlNodeType_CDATA
:
3173 case XmlNodeType_Comment
:
3174 case XmlNodeType_Whitespace
:
3178 case XmlNodeType_Element
:
3179 case XmlNodeType_EndElement
:
3180 element
= reader_get_element(This
);
3181 *name
= element
->localname
.str
;
3182 *len
= element
->localname
.len
;
3184 case XmlNodeType_Attribute
:
3185 if (This
->attr
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3192 *name
= This
->strvalues
[StringValue_LocalName
].str
;
3193 *len
= This
->strvalues
[StringValue_LocalName
].len
;
3197 *name
= This
->strvalues
[StringValue_LocalName
].str
;
3198 *len
= This
->strvalues
[StringValue_LocalName
].len
;
3205 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, const WCHAR
**ret
, UINT
*len
)
3207 xmlreader
*This
= impl_from_IXmlReader(iface
);
3208 XmlNodeType nodetype
;
3211 TRACE("(%p)->(%p %p)\n", This
, ret
, len
);
3219 switch ((nodetype
= reader_get_nodetype(This
)))
3221 case XmlNodeType_Element
:
3222 case XmlNodeType_EndElement
:
3223 case XmlNodeType_Attribute
:
3225 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3228 if (strval_eq(This
, prefix
, &strval_xml
))
3233 else if (strval_eq(This
, prefix
, &strval_xmlns
))
3238 else if ((ns
= reader_lookup_ns(This
, prefix
)))
3240 *ret
= ns
->prefix
.str
;
3241 *len
= ns
->prefix
.len
;
3253 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
3255 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3256 strval
*val
= &reader
->strvalues
[StringValue_Value
];
3259 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
3265 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
&& !val
->len
) || is_reader_pending(reader
))
3270 hr
= IXmlReader_Read(iface
, &type
);
3271 if (FAILED(hr
)) return hr
;
3273 /* return if still pending, partially read values are not reported */
3274 if (is_reader_pending(reader
)) return E_PENDING
;
3277 switch (reader_get_nodetype(reader
))
3279 case XmlNodeType_XmlDeclaration
:
3283 case XmlNodeType_Attribute
:
3285 const strval
*local
= &reader
->strvalues
[StringValue_LocalName
];
3287 /* For namespace definition attributes return values from namespace list */
3288 if (reader
->attr
->flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
3292 if (!(ns
= reader_lookup_ns(reader
, local
)))
3293 ns
= reader_lookup_nsdef(reader
);
3295 *value
= ns
->uri
.str
;
3304 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
3305 if (!ptr
) return E_OUTOFMEMORY
;
3306 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
3318 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
3320 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3321 strval
*val
= &reader
->strvalues
[StringValue_Value
];
3324 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
3326 /* Value is already allocated, chunked reads are not possible. */
3327 if (val
->str
) return S_FALSE
;
3331 len
= min(chunk_size
, val
->len
);
3332 memcpy(buffer
, reader_get_ptr2(reader
, val
->start
), len
);
3335 if (read
) *read
= len
;
3341 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
3343 UINT
*baseUri_length
)
3345 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
3349 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
3351 FIXME("(%p): stub\n", iface
);
3355 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
3357 xmlreader
*This
= impl_from_IXmlReader(iface
);
3358 TRACE("(%p)\n", This
);
3359 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3360 when current node is start tag of an element */
3361 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->is_empty_element
: FALSE
;
3364 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*line_number
)
3366 xmlreader
*This
= impl_from_IXmlReader(iface
);
3367 const struct element
*element
;
3369 TRACE("(%p %p)\n", This
, line_number
);
3372 return E_INVALIDARG
;
3374 switch (reader_get_nodetype(This
))
3376 case XmlNodeType_Element
:
3377 case XmlNodeType_EndElement
:
3378 element
= reader_get_element(This
);
3379 *line_number
= element
->position
.line_number
;
3381 case XmlNodeType_Attribute
:
3382 *line_number
= This
->attr
->position
.line_number
;
3384 case XmlNodeType_Whitespace
:
3385 case XmlNodeType_XmlDeclaration
:
3386 *line_number
= This
->empty_element
.position
.line_number
;
3389 *line_number
= This
->position
.line_number
;
3393 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3396 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*line_position
)
3398 xmlreader
*This
= impl_from_IXmlReader(iface
);
3399 const struct element
*element
;
3401 TRACE("(%p %p)\n", This
, line_position
);
3404 return E_INVALIDARG
;
3406 switch (reader_get_nodetype(This
))
3408 case XmlNodeType_Element
:
3409 case XmlNodeType_EndElement
:
3410 element
= reader_get_element(This
);
3411 *line_position
= element
->position
.line_position
;
3413 case XmlNodeType_Attribute
:
3414 *line_position
= This
->attr
->position
.line_position
;
3416 case XmlNodeType_Whitespace
:
3417 case XmlNodeType_XmlDeclaration
:
3418 *line_position
= This
->empty_element
.position
.line_position
;
3421 *line_position
= This
->position
.line_position
;
3425 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3428 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
3430 xmlreader
*This
= impl_from_IXmlReader(iface
);
3432 TRACE("(%p)->(%p)\n", This
, count
);
3434 if (!count
) return E_INVALIDARG
;
3436 *count
= This
->attr_count
;
3440 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
3442 xmlreader
*This
= impl_from_IXmlReader(iface
);
3443 TRACE("(%p)->(%p)\n", This
, depth
);
3444 *depth
= This
->depth
;
3448 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
3450 xmlreader
*This
= impl_from_IXmlReader(iface
);
3451 TRACE("(%p)\n", iface
);
3452 return This
->state
== XmlReadState_EndOfFile
;
3455 static const struct IXmlReaderVtbl xmlreader_vtbl
=
3457 xmlreader_QueryInterface
,
3461 xmlreader_GetProperty
,
3462 xmlreader_SetProperty
,
3464 xmlreader_GetNodeType
,
3465 xmlreader_MoveToFirstAttribute
,
3466 xmlreader_MoveToNextAttribute
,
3467 xmlreader_MoveToAttributeByName
,
3468 xmlreader_MoveToElement
,
3469 xmlreader_GetQualifiedName
,
3470 xmlreader_GetNamespaceUri
,
3471 xmlreader_GetLocalName
,
3472 xmlreader_GetPrefix
,
3474 xmlreader_ReadValueChunk
,
3475 xmlreader_GetBaseUri
,
3476 xmlreader_IsDefault
,
3477 xmlreader_IsEmptyElement
,
3478 xmlreader_GetLineNumber
,
3479 xmlreader_GetLinePosition
,
3480 xmlreader_GetAttributeCount
,
3485 /** IXmlReaderInput **/
3486 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
3488 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3490 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
3492 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
3493 IsEqualGUID(riid
, &IID_IUnknown
))
3499 WARN("interface %s not implemented\n", debugstr_guid(riid
));
3501 return E_NOINTERFACE
;
3504 IUnknown_AddRef(iface
);
3509 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
3511 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3512 ULONG ref
= InterlockedIncrement(&This
->ref
);
3513 TRACE("(%p)->(%d)\n", This
, ref
);
3517 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
3519 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3520 LONG ref
= InterlockedDecrement(&This
->ref
);
3522 TRACE("(%p)->(%d)\n", This
, ref
);
3526 IMalloc
*imalloc
= This
->imalloc
;
3527 if (This
->input
) IUnknown_Release(This
->input
);
3528 if (This
->stream
) ISequentialStream_Release(This
->stream
);
3529 if (This
->buffer
) free_input_buffer(This
->buffer
);
3530 readerinput_free(This
, This
->baseuri
);
3531 readerinput_free(This
, This
);
3532 if (imalloc
) IMalloc_Release(imalloc
);
3538 static const struct IUnknownVtbl xmlreaderinputvtbl
=
3540 xmlreaderinput_QueryInterface
,
3541 xmlreaderinput_AddRef
,
3542 xmlreaderinput_Release
3545 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
3550 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
3552 if (!IsEqualGUID(riid
, &IID_IXmlReader
))
3554 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid
));
3559 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
3561 reader
= heap_alloc(sizeof(*reader
));
3563 return E_OUTOFMEMORY
;
3565 memset(reader
, 0, sizeof(*reader
));
3566 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
3568 reader
->state
= XmlReadState_Closed
;
3569 reader
->instate
= XmlReadInState_Initial
;
3570 reader
->resumestate
= XmlReadResumeState_Initial
;
3571 reader
->dtdmode
= DtdProcessing_Prohibit
;
3572 reader
->imalloc
= imalloc
;
3573 if (imalloc
) IMalloc_AddRef(imalloc
);
3574 reader
->nodetype
= XmlNodeType_None
;
3575 list_init(&reader
->attrs
);
3576 list_init(&reader
->nsdef
);
3577 list_init(&reader
->ns
);
3578 list_init(&reader
->elements
);
3579 reader
->max_depth
= 256;
3581 for (i
= 0; i
< StringValue_Last
; i
++)
3582 reader
->strvalues
[i
] = strval_empty
;
3584 *obj
= &reader
->IXmlReader_iface
;
3586 TRACE("returning iface %p\n", *obj
);
3591 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
3596 IXmlReaderInput
**ppInput
)
3598 xmlreaderinput
*readerinput
;
3601 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3602 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3604 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3607 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3609 readerinput
= heap_alloc(sizeof(*readerinput
));
3610 if(!readerinput
) return E_OUTOFMEMORY
;
3612 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3613 readerinput
->ref
= 1;
3614 readerinput
->imalloc
= imalloc
;
3615 readerinput
->stream
= NULL
;
3616 if (imalloc
) IMalloc_AddRef(imalloc
);
3617 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3618 readerinput
->hint
= hint
;
3619 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3620 readerinput
->pending
= 0;
3622 hr
= alloc_input_buffer(readerinput
);
3625 readerinput_free(readerinput
, readerinput
->baseuri
);
3626 readerinput_free(readerinput
, readerinput
);
3627 if (imalloc
) IMalloc_Release(imalloc
);
3630 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3632 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3634 TRACE("returning iface %p\n", *ppInput
);