2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
31 #include "xmllite_private.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43 XmlReadInState_Initial
,
44 XmlReadInState_XmlDecl
,
45 XmlReadInState_Misc_DTD
,
47 XmlReadInState_DTD_Misc
,
48 XmlReadInState_Element
,
49 XmlReadInState_Content
,
50 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
52 } XmlReaderInternalState
;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
58 XmlReadResumeState_Initial
,
59 XmlReadResumeState_PITarget
,
60 XmlReadResumeState_PIBody
,
61 XmlReadResumeState_CDATA
,
62 XmlReadResumeState_Comment
,
63 XmlReadResumeState_STag
,
64 XmlReadResumeState_CharData
,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState
;
68 /* saved pointer index to resume from particular input position */
71 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local
, /* local for QName */
73 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
79 StringValue_LocalName
,
81 StringValue_QualifiedName
,
84 } XmlReaderStringValue
;
86 BOOL
is_namestartchar(WCHAR ch
);
88 static const char *debugstr_nodetype(XmlNodeType nodetype
)
90 static const char * const type_names
[] =
99 "ProcessingInstruction",
112 if (nodetype
> _XmlNodeType_Last
)
113 return wine_dbg_sprintf("unknown type=%d", nodetype
);
115 return type_names
[nodetype
];
118 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
120 static const char * const prop_names
[] =
132 if (prop
> _XmlReaderProperty_Last
)
133 return wine_dbg_sprintf("unknown property=%d", prop
);
135 return prop_names
[prop
];
138 struct xml_encoding_data
145 static const struct xml_encoding_data xml_encoding_map
[] =
147 { L
"US-ASCII", XmlEncoding_USASCII
, 20127 },
148 { L
"UTF-16", XmlEncoding_UTF16
, 1200 },
149 { L
"UTF-8", XmlEncoding_UTF8
, CP_UTF8
},
152 const WCHAR
*get_encoding_name(xml_encoding encoding
)
154 return xml_encoding_map
[encoding
].name
;
157 xml_encoding
get_encoding_from_codepage(UINT codepage
)
160 for (i
= 0; i
< ARRAY_SIZE(xml_encoding_map
); i
++)
162 if (xml_encoding_map
[i
].cp
== codepage
) return xml_encoding_map
[i
].enc
;
164 return XmlEncoding_Unknown
;
171 unsigned int allocated
;
172 unsigned int written
;
176 typedef struct input_buffer input_buffer
;
180 IXmlReaderInput IXmlReaderInput_iface
;
182 /* reference passed on IXmlReaderInput creation, is kept when input is created */
185 xml_encoding encoding
;
188 /* stream reference set after SetInput() call from reader,
189 stored as sequential stream, cause currently
190 optimizations possible with IStream aren't implemented */
191 ISequentialStream
*stream
;
192 input_buffer
*buffer
;
193 unsigned int pending
: 1;
196 static const struct IUnknownVtbl xmlreaderinputvtbl
;
198 /* Structure to hold parsed string of specific length.
200 Reader stores node value as 'start' pointer, on request
201 a null-terminated version of it is allocated.
203 To init a strval variable use reader_init_strval(),
204 to set strval as a reader value use reader_set_strval().
208 WCHAR
*str
; /* allocated null-terminated string */
209 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
210 UINT start
; /* input position where value starts */
213 static WCHAR emptyW
[] = L
"";
214 static WCHAR xmlW
[] = L
"xml";
215 static WCHAR xmlnsW
[] = L
"xmlns";
216 static const strval strval_empty
= { emptyW
, 0 };
217 static const strval strval_xml
= { xmlW
, 3 };
218 static const strval strval_xmlns
= { xmlnsW
, 5 };
220 struct reader_position
228 ATTRIBUTE_NS_DEFINITION
= 0x1,
229 ATTRIBUTE_DEFAULT_NS_DEFINITION
= 0x2,
239 struct reader_position position
;
249 struct reader_position position
;
257 struct element
*element
;
262 IXmlReader IXmlReader_iface
;
264 xmlreaderinput
*input
;
267 HRESULT error
; /* error set on XmlReadState_Error */
268 XmlReaderInternalState instate
;
269 XmlReaderResumeState resumestate
;
270 XmlNodeType nodetype
;
271 DtdProcessing dtdmode
;
272 IXmlResolver
*resolver
;
274 struct reader_position position
;
275 struct list attrs
; /* attributes list for current node */
276 struct attribute
*attr
; /* current attribute */
280 struct list elements
;
282 strval strvalues
[StringValue_Last
];
285 BOOL is_empty_element
;
286 struct element empty_element
; /* used for empty elements without end tag <a />,
287 and to keep <?xml reader position */
288 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
293 encoded_buffer utf16
;
294 encoded_buffer encoded
;
296 xmlreaderinput
*input
;
299 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
301 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
304 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
306 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
309 /* reader memory allocation functions */
310 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
312 return m_alloc(reader
->imalloc
, len
);
315 static inline void *reader_alloc_zero(xmlreader
*reader
, size_t len
)
317 void *ret
= reader_alloc(reader
, len
);
323 static inline void reader_free(xmlreader
*reader
, void *mem
)
325 m_free(reader
->imalloc
, mem
);
328 /* Just return pointer from offset, no attempt to read more. */
329 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
331 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
332 return (WCHAR
*)buffer
->data
+ offset
;
335 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
337 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
340 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
344 if (src
->str
!= strval_empty
.str
)
346 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
347 if (!dest
->str
) return E_OUTOFMEMORY
;
348 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
349 dest
->str
[dest
->len
] = 0;
356 /* reader input memory allocation functions */
357 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
359 return m_alloc(input
->imalloc
, len
);
362 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
364 return m_realloc(input
->imalloc
, mem
, len
);
367 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
369 m_free(input
->imalloc
, mem
);
372 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
379 size
= (lstrlenW(str
)+1)*sizeof(WCHAR
);
380 ret
= readerinput_alloc(input
, size
);
381 if (ret
) memcpy(ret
, str
, size
);
387 /* This one frees stored string value if needed */
388 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
390 if (v
->str
!= strval_empty
.str
)
392 reader_free(reader
, v
->str
);
397 static void reader_clear_attrs(xmlreader
*reader
)
399 struct attribute
*attr
, *attr2
;
400 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
402 reader_free_strvalued(reader
, &attr
->localname
);
403 reader_free_strvalued(reader
, &attr
->value
);
404 reader_free(reader
, attr
);
406 list_init(&reader
->attrs
);
407 reader
->attr_count
= 0;
411 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
412 while we are on a node with attributes */
413 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*prefix
, strval
*localname
, strval
*qname
,
414 strval
*value
, const struct reader_position
*position
, unsigned int flags
)
416 struct attribute
*attr
;
419 attr
= reader_alloc(reader
, sizeof(*attr
));
420 if (!attr
) return E_OUTOFMEMORY
;
422 hr
= reader_strvaldup(reader
, localname
, &attr
->localname
);
425 hr
= reader_strvaldup(reader
, value
, &attr
->value
);
427 reader_free_strvalued(reader
, &attr
->value
);
431 reader_free(reader
, attr
);
436 attr
->prefix
= *prefix
;
438 memset(&attr
->prefix
, 0, sizeof(attr
->prefix
));
439 attr
->qname
= qname
? *qname
: *localname
;
440 attr
->position
= *position
;
442 list_add_tail(&reader
->attrs
, &attr
->entry
);
443 reader
->attr_count
++;
448 /* Returns current element, doesn't check if reader is actually positioned on it. */
449 static struct element
*reader_get_element(xmlreader
*reader
)
451 if (reader
->is_empty_element
)
452 return &reader
->empty_element
;
454 return LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
457 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
464 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
466 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
469 /* used to initialize from constant string */
470 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
477 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
479 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
482 static void reader_free_strvalues(xmlreader
*reader
)
485 for (type
= 0; type
< StringValue_Last
; type
++)
486 reader_free_strvalue(reader
, type
);
489 /* This helper should only be used to test if strings are the same,
490 it doesn't try to sort. */
491 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
493 if (str1
->len
!= str2
->len
) return 0;
494 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
497 static void reader_clear_elements(xmlreader
*reader
)
499 struct element
*elem
, *elem2
;
500 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
502 reader_free_strvalued(reader
, &elem
->prefix
);
503 reader_free_strvalued(reader
, &elem
->localname
);
504 reader_free_strvalued(reader
, &elem
->qname
);
505 reader_free(reader
, elem
);
507 list_init(&reader
->elements
);
508 reader_free_strvalued(reader
, &reader
->empty_element
.localname
);
509 reader_free_strvalued(reader
, &reader
->empty_element
.qname
);
510 reader
->is_empty_element
= FALSE
;
513 static struct ns
*reader_lookup_ns(xmlreader
*reader
, const strval
*prefix
)
515 struct list
*nslist
= prefix
? &reader
->ns
: &reader
->nsdef
;
518 LIST_FOR_EACH_ENTRY_REV(ns
, nslist
, struct ns
, entry
) {
519 if (strval_eq(reader
, prefix
, &ns
->prefix
))
526 static HRESULT
reader_inc_depth(xmlreader
*reader
)
528 return (++reader
->depth
>= reader
->max_depth
&& reader
->max_depth
) ? SC_E_MAXELEMENTDEPTH
: S_OK
;
531 static void reader_dec_depth(xmlreader
*reader
)
537 static HRESULT
reader_push_ns(xmlreader
*reader
, const strval
*prefix
, const strval
*uri
, BOOL def
)
542 ns
= reader_alloc(reader
, sizeof(*ns
));
543 if (!ns
) return E_OUTOFMEMORY
;
546 memset(&ns
->prefix
, 0, sizeof(ns
->prefix
));
548 hr
= reader_strvaldup(reader
, prefix
, &ns
->prefix
);
550 reader_free(reader
, ns
);
555 hr
= reader_strvaldup(reader
, uri
, &ns
->uri
);
557 reader_free_strvalued(reader
, &ns
->prefix
);
558 reader_free(reader
, ns
);
563 list_add_head(def
? &reader
->nsdef
: &reader
->ns
, &ns
->entry
);
567 static void reader_free_element(xmlreader
*reader
, struct element
*element
)
569 reader_free_strvalued(reader
, &element
->prefix
);
570 reader_free_strvalued(reader
, &element
->localname
);
571 reader_free_strvalued(reader
, &element
->qname
);
572 reader_free(reader
, element
);
575 static void reader_mark_ns_nodes(xmlreader
*reader
, struct element
*element
)
579 LIST_FOR_EACH_ENTRY(ns
, &reader
->ns
, struct ns
, entry
) {
582 ns
->element
= element
;
585 LIST_FOR_EACH_ENTRY(ns
, &reader
->nsdef
, struct ns
, entry
) {
588 ns
->element
= element
;
592 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*prefix
, strval
*localname
,
593 strval
*qname
, const struct reader_position
*position
)
595 struct element
*element
;
598 element
= reader_alloc_zero(reader
, sizeof(*element
));
600 return E_OUTOFMEMORY
;
602 if ((hr
= reader_strvaldup(reader
, prefix
, &element
->prefix
)) == S_OK
&&
603 (hr
= reader_strvaldup(reader
, localname
, &element
->localname
)) == S_OK
&&
604 (hr
= reader_strvaldup(reader
, qname
, &element
->qname
)) == S_OK
)
606 list_add_head(&reader
->elements
, &element
->entry
);
607 reader_mark_ns_nodes(reader
, element
);
608 reader
->is_empty_element
= FALSE
;
609 element
->position
= *position
;
612 reader_free_element(reader
, element
);
617 static void reader_pop_ns_nodes(xmlreader
*reader
, struct element
*element
)
621 LIST_FOR_EACH_ENTRY_SAFE_REV(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
622 if (ns
->element
!= element
)
625 list_remove(&ns
->entry
);
626 reader_free_strvalued(reader
, &ns
->prefix
);
627 reader_free_strvalued(reader
, &ns
->uri
);
628 reader_free(reader
, ns
);
631 if (!list_empty(&reader
->nsdef
)) {
632 ns
= LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
633 if (ns
->element
== element
) {
634 list_remove(&ns
->entry
);
635 reader_free_strvalued(reader
, &ns
->prefix
);
636 reader_free_strvalued(reader
, &ns
->uri
);
637 reader_free(reader
, ns
);
642 static void reader_pop_element(xmlreader
*reader
)
644 struct element
*element
;
646 if (list_empty(&reader
->elements
))
649 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
650 list_remove(&element
->entry
);
652 reader_pop_ns_nodes(reader
, element
);
653 reader_free_element(reader
, element
);
655 /* It was a root element, the rest is expected as Misc */
656 if (list_empty(&reader
->elements
))
657 reader
->instate
= XmlReadInState_MiscEnd
;
660 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
661 means node value is to be determined. */
662 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
664 strval
*v
= &reader
->strvalues
[type
];
666 reader_free_strvalue(reader
, type
);
675 if (value
->str
== strval_empty
.str
)
679 if (type
== StringValue_Value
)
681 /* defer allocation for value string */
683 v
->start
= value
->start
;
688 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
689 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
690 v
->str
[value
->len
] = 0;
696 static inline int is_reader_pending(xmlreader
*reader
)
698 return reader
->input
->pending
;
701 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
703 const int initial_len
= 0x2000;
704 buffer
->data
= readerinput_alloc(input
, initial_len
);
705 if (!buffer
->data
) return E_OUTOFMEMORY
;
707 memset(buffer
->data
, 0, 4);
709 buffer
->allocated
= initial_len
;
711 buffer
->prev_cr
= FALSE
;
716 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
718 readerinput_free(input
, buffer
->data
);
721 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
723 if (encoding
== XmlEncoding_Unknown
)
725 FIXME("unsupported encoding %d\n", encoding
);
729 *cp
= xml_encoding_map
[encoding
].cp
;
734 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
738 if (!name
) return XmlEncoding_Unknown
;
741 max
= ARRAY_SIZE(xml_encoding_map
) - 1;
748 c
= wcsnicmp(xml_encoding_map
[n
].name
, name
, len
);
750 c
= wcsicmp(xml_encoding_map
[n
].name
, name
);
752 return xml_encoding_map
[n
].enc
;
760 return XmlEncoding_Unknown
;
763 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
765 input_buffer
*buffer
;
768 input
->buffer
= NULL
;
770 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
771 if (!buffer
) return E_OUTOFMEMORY
;
773 buffer
->input
= input
;
774 buffer
->code_page
= ~0; /* code page is unknown at this point */
775 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
777 readerinput_free(input
, buffer
);
781 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
783 free_encoded_buffer(input
, &buffer
->utf16
);
784 readerinput_free(input
, buffer
);
788 input
->buffer
= buffer
;
792 static void free_input_buffer(input_buffer
*buffer
)
794 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
795 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
796 readerinput_free(buffer
->input
, buffer
);
799 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
801 if (readerinput
->stream
) {
802 ISequentialStream_Release(readerinput
->stream
);
803 readerinput
->stream
= NULL
;
807 /* Queries already stored interface for IStream/ISequentialStream.
808 Interface supplied on creation will be overwritten */
809 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
813 readerinput_release_stream(readerinput
);
814 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
816 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
821 /* reads a chunk to raw buffer */
822 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
824 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
825 /* to make sure aligned length won't exceed allocated length */
826 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
830 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
831 variable width encodings like UTF-8 */
832 len
= (len
+ 3) & ~3;
833 /* try to use allocated space or grow */
834 if (buffer
->allocated
- buffer
->written
< len
)
836 buffer
->allocated
*= 2;
837 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
838 len
= buffer
->allocated
- buffer
->written
;
842 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
843 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
844 readerinput
->pending
= hr
== E_PENDING
;
845 if (FAILED(hr
)) return hr
;
846 buffer
->written
+= read
;
847 if (!buffer
->written
)
848 return MX_E_INPUTEND
;
853 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
854 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
856 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
858 length
*= sizeof(WCHAR
);
859 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
860 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
862 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
863 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
864 buffer
->allocated
= grown_size
;
868 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
870 static const char startA
[] = {'<','?'};
871 static const char commentA
[] = {'<','!'};
872 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
873 unsigned char *ptr
= (unsigned char*)buffer
->data
;
875 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
876 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
877 /* test start byte */
880 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
881 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
882 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
883 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
887 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
889 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
890 static const char utf8bom
[] = {0xef,0xbb,0xbf};
891 static const char utf16lebom
[] = {0xff,0xfe};
894 *enc
= XmlEncoding_Unknown
;
896 if (buffer
->written
<= 3)
898 HRESULT hr
= readerinput_growraw(readerinput
);
899 if (FAILED(hr
)) return hr
;
900 if (buffer
->written
< 3) return MX_E_INPUTEND
;
903 ptrW
= (WCHAR
*)buffer
->data
;
904 /* try start symbols if we have enough data to do that, input buffer should contain
905 first chunk already */
906 if (readerinput_is_utf8(readerinput
))
907 *enc
= XmlEncoding_UTF8
;
908 else if (*ptrW
== '<')
911 if (*ptrW
== '?' || *ptrW
== '!' || is_namestartchar(*ptrW
))
912 *enc
= XmlEncoding_UTF16
;
914 /* try with BOM now */
915 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
917 buffer
->cur
+= sizeof(utf8bom
);
918 *enc
= XmlEncoding_UTF8
;
920 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
922 buffer
->cur
+= sizeof(utf16lebom
);
923 *enc
= XmlEncoding_UTF16
;
929 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
931 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
932 int len
= buffer
->written
;
936 /* complete single byte char */
937 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
939 /* find start byte of multibyte char */
940 while (--len
&& !(buffer
->data
[len
] & 0xc0))
946 /* Returns byte length of complete char sequence for buffer code page,
947 it's relative to current buffer position which is currently used for BOM handling
949 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
951 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
954 if (readerinput
->buffer
->code_page
== CP_UTF8
)
955 len
= readerinput_get_utf8_convlen(readerinput
);
957 len
= buffer
->written
;
959 TRACE("%d\n", len
- buffer
->cur
);
960 return len
- buffer
->cur
;
963 /* It's possible that raw buffer has some leftovers from last conversion - some char
964 sequence that doesn't represent a full code point. Length argument should be calculated with
965 readerinput_get_convlen(), if it's -1 it will be calculated here. */
966 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
968 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
971 len
= readerinput_get_convlen(readerinput
);
974 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
975 /* everything below cur is lost too */
976 buffer
->written
-= len
+ buffer
->cur
;
977 /* after this point we don't need cur offset really,
978 it's used only to mark where actual data begins when first chunk is read */
982 static void fixup_buffer_cr(encoded_buffer
*buffer
, int off
)
984 BOOL prev_cr
= buffer
->prev_cr
;
988 src
= dest
= (WCHAR
*)buffer
->data
+ off
;
989 while ((const char*)src
< buffer
->data
+ buffer
->written
)
998 if(prev_cr
&& *src
== '\n')
1005 buffer
->written
= (char*)dest
- buffer
->data
;
1006 buffer
->prev_cr
= prev_cr
;
1010 /* note that raw buffer content is kept */
1011 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
1013 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1014 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1020 hr
= get_code_page(enc
, &cp
);
1021 if (FAILED(hr
)) return;
1023 readerinput
->buffer
->code_page
= cp
;
1024 len
= readerinput_get_convlen(readerinput
);
1026 TRACE("switching to cp %d\n", cp
);
1028 /* just copy in this case */
1029 if (enc
== XmlEncoding_UTF16
)
1031 readerinput_grow(readerinput
, len
);
1032 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
1033 dest
->written
+= len
*sizeof(WCHAR
);
1037 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1038 readerinput_grow(readerinput
, dest_len
);
1039 ptr
= (WCHAR
*)dest
->data
;
1040 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1042 dest
->written
+= dest_len
*sizeof(WCHAR
);
1045 fixup_buffer_cr(dest
, 0);
1048 /* shrinks parsed data a buffer begins with */
1049 static void reader_shrink(xmlreader
*reader
)
1051 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1053 /* avoid to move too often using threshold shrink length */
1054 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
1056 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
1057 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
1059 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
1063 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1064 It won't attempt to shrink but will grow destination buffer if needed */
1065 static HRESULT
reader_more(xmlreader
*reader
)
1067 xmlreaderinput
*readerinput
= reader
->input
;
1068 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1069 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1070 UINT cp
= readerinput
->buffer
->code_page
;
1071 int len
, dest_len
, prev_len
;
1075 /* get some raw data from stream first */
1076 if (FAILED(hr
= readerinput_growraw(readerinput
)))
1079 len
= readerinput_get_convlen(readerinput
);
1080 prev_len
= dest
->written
/ sizeof(WCHAR
);
1082 /* just copy for UTF-16 case */
1085 readerinput_grow(readerinput
, len
);
1086 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
1087 dest
->written
+= len
*sizeof(WCHAR
);
1091 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1092 readerinput_grow(readerinput
, dest_len
);
1093 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
1094 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1096 dest
->written
+= dest_len
*sizeof(WCHAR
);
1097 /* get rid of processed data */
1098 readerinput_shrinkraw(readerinput
, len
);
1101 fixup_buffer_cr(dest
, prev_len
);
1105 static inline UINT
reader_get_cur(xmlreader
*reader
)
1107 return reader
->input
->buffer
->utf16
.cur
;
1110 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
1112 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1113 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
1114 if (!*ptr
) reader_more(reader
);
1115 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
1118 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
1121 const WCHAR
*ptr
= reader_get_ptr(reader
);
1126 reader_more(reader
);
1127 ptr
= reader_get_ptr(reader
);
1129 if (str
[i
] != ptr
[i
])
1130 return ptr
[i
] - str
[i
];
1136 static void reader_update_position(xmlreader
*reader
, WCHAR ch
)
1139 reader
->position
.line_position
= 1;
1140 else if (ch
== '\n')
1142 reader
->position
.line_number
++;
1143 reader
->position
.line_position
= 1;
1146 reader
->position
.line_position
++;
1149 /* moves cursor n WCHARs forward */
1150 static void reader_skipn(xmlreader
*reader
, int n
)
1152 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1155 while (*(ptr
= reader_get_ptr(reader
)) && n
--)
1157 reader_update_position(reader
, *ptr
);
1162 static inline BOOL
is_wchar_space(WCHAR ch
)
1164 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
1167 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1168 static int reader_skipspaces(xmlreader
*reader
)
1170 const WCHAR
*ptr
= reader_get_ptr(reader
);
1171 UINT start
= reader_get_cur(reader
);
1173 while (is_wchar_space(*ptr
))
1175 reader_skipn(reader
, 1);
1176 ptr
= reader_get_ptr(reader
);
1179 return reader_get_cur(reader
) - start
;
1182 /* [26] VersionNum ::= '1.' [0-9]+ */
1183 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
1188 if (reader_cmp(reader
, L
"1.")) return WC_E_XMLDECL
;
1190 start
= reader_get_cur(reader
);
1192 reader_skipn(reader
, 2);
1194 ptr2
= ptr
= reader_get_ptr(reader
);
1195 while (*ptr
>= '0' && *ptr
<= '9')
1197 reader_skipn(reader
, 1);
1198 ptr
= reader_get_ptr(reader
);
1201 if (ptr2
== ptr
) return WC_E_DIGIT
;
1202 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
1203 TRACE("version=%s\n", debug_strval(reader
, val
));
1207 /* [25] Eq ::= S? '=' S? */
1208 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1210 reader_skipspaces(reader
);
1211 if (reader_cmp(reader
, L
"=")) return WC_E_EQUAL
;
1213 reader_skipn(reader
, 1);
1214 reader_skipspaces(reader
);
1218 static BOOL
reader_is_quote(xmlreader
*reader
)
1220 return !reader_cmp(reader
, L
"\'") || !reader_cmp(reader
, L
"\"");
1223 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1224 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1226 struct reader_position position
;
1230 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1232 position
= reader
->position
;
1233 if (reader_cmp(reader
, L
"version")) return WC_E_XMLDECL
;
1234 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1235 /* skip 'version' */
1236 reader_skipn(reader
, 7);
1238 hr
= reader_parse_eq(reader
);
1239 if (FAILED(hr
)) return hr
;
1241 if (!reader_is_quote(reader
))
1244 reader_skipn(reader
, 1);
1246 hr
= reader_parse_versionnum(reader
, &val
);
1247 if (FAILED(hr
)) return hr
;
1249 if (!reader_is_quote(reader
))
1253 reader_skipn(reader
, 1);
1255 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1258 /* ([A-Za-z0-9._] | '-') */
1259 static inline BOOL
is_wchar_encname(WCHAR ch
)
1261 return ((ch
>= 'A' && ch
<= 'Z') ||
1262 (ch
>= 'a' && ch
<= 'z') ||
1263 (ch
>= '0' && ch
<= '9') ||
1264 (ch
== '.') || (ch
== '_') ||
1268 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1269 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1271 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1275 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1276 return WC_E_ENCNAME
;
1278 val
->start
= reader_get_cur(reader
);
1281 while (is_wchar_encname(*++ptr
))
1285 enc
= parse_encoding_name(start
, len
);
1286 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1290 if (enc
== XmlEncoding_Unknown
)
1291 return WC_E_ENCNAME
;
1293 /* skip encoding name */
1294 reader_skipn(reader
, len
);
1298 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1299 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1301 struct reader_position position
;
1305 if (!reader_skipspaces(reader
)) return S_FALSE
;
1307 position
= reader
->position
;
1308 if (reader_cmp(reader
, L
"encoding")) return S_FALSE
;
1309 name
.str
= reader_get_ptr(reader
);
1310 name
.start
= reader_get_cur(reader
);
1312 /* skip 'encoding' */
1313 reader_skipn(reader
, 8);
1315 hr
= reader_parse_eq(reader
);
1316 if (FAILED(hr
)) return hr
;
1318 if (!reader_is_quote(reader
))
1321 reader_skipn(reader
, 1);
1323 hr
= reader_parse_encname(reader
, &val
);
1324 if (FAILED(hr
)) return hr
;
1326 if (!reader_is_quote(reader
))
1330 reader_skipn(reader
, 1);
1332 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1335 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1336 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1338 struct reader_position position
;
1343 if (!reader_skipspaces(reader
)) return S_FALSE
;
1345 position
= reader
->position
;
1346 if (reader_cmp(reader
, L
"standalone")) return S_FALSE
;
1347 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1348 /* skip 'standalone' */
1349 reader_skipn(reader
, 10);
1351 hr
= reader_parse_eq(reader
);
1352 if (FAILED(hr
)) return hr
;
1354 if (!reader_is_quote(reader
))
1357 reader_skipn(reader
, 1);
1359 if (reader_cmp(reader
, L
"yes") && reader_cmp(reader
, L
"no"))
1360 return WC_E_XMLDECL
;
1362 start
= reader_get_cur(reader
);
1363 /* skip 'yes'|'no' */
1364 reader_skipn(reader
, reader_cmp(reader
, L
"yes") ? 2 : 3);
1365 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1366 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1368 if (!reader_is_quote(reader
))
1371 reader_skipn(reader
, 1);
1373 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1376 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1377 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1379 struct reader_position position
;
1382 if (reader_cmp(reader
, L
"<?xml "))
1385 reader_skipn(reader
, 2);
1386 position
= reader
->position
;
1387 reader_skipn(reader
, 3);
1388 hr
= reader_parse_versioninfo(reader
);
1392 hr
= reader_parse_encdecl(reader
);
1396 hr
= reader_parse_sddecl(reader
);
1400 reader_skipspaces(reader
);
1401 if (reader_cmp(reader
, L
"?>"))
1402 return WC_E_XMLDECL
;
1405 reader_skipn(reader
, 2);
1407 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1408 reader
->empty_element
.position
= position
;
1409 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_xml
);
1410 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_xml
);
1415 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1416 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1421 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1423 start
= reader
->resume
[XmlReadResume_Body
];
1424 ptr
= reader_get_ptr(reader
);
1429 reader_skipn(reader
, 4);
1430 reader_shrink(reader
);
1431 ptr
= reader_get_ptr(reader
);
1432 start
= reader_get_cur(reader
);
1433 reader
->nodetype
= XmlNodeType_Comment
;
1434 reader
->resume
[XmlReadResume_Body
] = start
;
1435 reader
->resumestate
= XmlReadResumeState_Comment
;
1436 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1439 /* will exit when there's no more data, it won't attempt to
1440 read more from stream */
1451 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1452 TRACE("%s\n", debug_strval(reader
, &value
));
1454 /* skip rest of markup '->' */
1455 reader_skipn(reader
, 3);
1457 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1458 reader
->resume
[XmlReadResume_Body
] = 0;
1459 reader
->resumestate
= XmlReadResumeState_Initial
;
1463 return WC_E_COMMENT
;
1467 reader_skipn(reader
, 1);
1474 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1475 static inline BOOL
is_char(WCHAR ch
)
1477 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1478 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1479 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1480 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1481 (ch
>= 0xe000 && ch
<= 0xfffd);
1484 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1485 BOOL
is_pubchar(WCHAR ch
)
1487 return (ch
== ' ') ||
1488 (ch
>= 'a' && ch
<= 'z') ||
1489 (ch
>= 'A' && ch
<= 'Z') ||
1490 (ch
>= '0' && ch
<= '9') ||
1491 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1492 (ch
== '=') || (ch
== '?') ||
1493 (ch
== '@') || (ch
== '!') ||
1494 (ch
>= '#' && ch
<= '%') || /* #$% */
1495 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1498 BOOL
is_namestartchar(WCHAR ch
)
1500 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1501 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1502 (ch
>= 0xc0 && ch
<= 0xd6) ||
1503 (ch
>= 0xd8 && ch
<= 0xf6) ||
1504 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1505 (ch
>= 0x370 && ch
<= 0x37d) ||
1506 (ch
>= 0x37f && ch
<= 0x1fff) ||
1507 (ch
>= 0x200c && ch
<= 0x200d) ||
1508 (ch
>= 0x2070 && ch
<= 0x218f) ||
1509 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1510 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1511 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1512 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1513 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1514 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1517 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1518 BOOL
is_ncnamechar(WCHAR ch
)
1520 return (ch
>= 'A' && ch
<= 'Z') ||
1521 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1522 (ch
== '-') || (ch
== '.') ||
1523 (ch
>= '0' && ch
<= '9') ||
1525 (ch
>= 0xc0 && ch
<= 0xd6) ||
1526 (ch
>= 0xd8 && ch
<= 0xf6) ||
1527 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1528 (ch
>= 0x300 && ch
<= 0x36f) ||
1529 (ch
>= 0x370 && ch
<= 0x37d) ||
1530 (ch
>= 0x37f && ch
<= 0x1fff) ||
1531 (ch
>= 0x200c && ch
<= 0x200d) ||
1532 (ch
>= 0x203f && ch
<= 0x2040) ||
1533 (ch
>= 0x2070 && ch
<= 0x218f) ||
1534 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1535 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1536 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1537 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1538 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1539 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1542 BOOL
is_namechar(WCHAR ch
)
1544 return (ch
== ':') || is_ncnamechar(ch
);
1547 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1549 /* When we're on attribute always return attribute type, container node type is kept.
1550 Note that container is not necessarily an element, and attribute doesn't mean it's
1551 an attribute in XML spec terms. */
1552 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1555 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1556 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1557 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1558 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1559 [5] Name ::= NameStartChar (NameChar)* */
1560 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1565 if (reader
->resume
[XmlReadResume_Name
])
1567 start
= reader
->resume
[XmlReadResume_Name
];
1568 ptr
= reader_get_ptr(reader
);
1572 ptr
= reader_get_ptr(reader
);
1573 start
= reader_get_cur(reader
);
1574 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1577 while (is_namechar(*ptr
))
1579 reader_skipn(reader
, 1);
1580 ptr
= reader_get_ptr(reader
);
1583 if (is_reader_pending(reader
))
1585 reader
->resume
[XmlReadResume_Name
] = start
;
1589 reader
->resume
[XmlReadResume_Name
] = 0;
1591 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1592 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1597 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1598 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1600 static const strval xmlval
= { (WCHAR
*)L
"xml", 3 };
1606 hr
= reader_parse_name(reader
, &name
);
1607 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1609 /* now that we got name check for illegal content */
1610 if (strval_eq(reader
, &name
, &xmlval
))
1611 return WC_E_LEADINGXML
;
1613 /* PITarget can't be a qualified name */
1614 ptr
= reader_get_strptr(reader
, &name
);
1615 for (i
= 0; i
< name
.len
; i
++)
1617 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1619 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1624 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1625 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1632 switch (reader
->resumestate
)
1634 case XmlReadResumeState_Initial
:
1636 reader_skipn(reader
, 2);
1637 reader_shrink(reader
);
1638 reader
->resumestate
= XmlReadResumeState_PITarget
;
1639 case XmlReadResumeState_PITarget
:
1640 hr
= reader_parse_pitarget(reader
, &target
);
1641 if (FAILED(hr
)) return hr
;
1642 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1643 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1644 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1645 reader
->resumestate
= XmlReadResumeState_PIBody
;
1646 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1651 start
= reader
->resume
[XmlReadResume_Body
];
1652 ptr
= reader_get_ptr(reader
);
1659 UINT cur
= reader_get_cur(reader
);
1662 /* strip all leading whitespace chars */
1665 ptr
= reader_get_ptr2(reader
, start
);
1666 if (!is_wchar_space(*ptr
)) break;
1670 reader_init_strvalue(start
, cur
-start
, &value
);
1673 reader_skipn(reader
, 2);
1674 TRACE("%s\n", debug_strval(reader
, &value
));
1675 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1676 reader
->resumestate
= XmlReadResumeState_Initial
;
1677 reader
->resume
[XmlReadResume_Body
] = 0;
1678 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1683 reader_skipn(reader
, 1);
1684 ptr
= reader_get_ptr(reader
);
1690 /* This one is used to parse significant whitespace nodes, like in Misc production */
1691 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1693 switch (reader
->resumestate
)
1695 case XmlReadResumeState_Initial
:
1696 reader_shrink(reader
);
1697 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1698 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1699 reader
->nodetype
= XmlNodeType_Whitespace
;
1700 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1701 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1702 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1704 case XmlReadResumeState_Whitespace
:
1709 reader_skipspaces(reader
);
1710 if (is_reader_pending(reader
)) return S_OK
;
1712 start
= reader
->resume
[XmlReadResume_Body
];
1713 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1714 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1715 TRACE("%s\n", debug_strval(reader
, &value
));
1716 reader
->resumestate
= XmlReadResumeState_Initial
;
1725 /* [27] Misc ::= Comment | PI | S */
1726 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1728 HRESULT hr
= S_FALSE
;
1730 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1732 hr
= reader_more(reader
);
1733 if (FAILED(hr
)) return hr
;
1735 /* finish current node */
1736 switch (reader
->resumestate
)
1738 case XmlReadResumeState_PITarget
:
1739 case XmlReadResumeState_PIBody
:
1740 return reader_parse_pi(reader
);
1741 case XmlReadResumeState_Comment
:
1742 return reader_parse_comment(reader
);
1743 case XmlReadResumeState_Whitespace
:
1744 return reader_parse_whitespace(reader
);
1746 ERR("unknown resume state %d\n", reader
->resumestate
);
1752 const WCHAR
*cur
= reader_get_ptr(reader
);
1754 if (is_wchar_space(*cur
))
1755 hr
= reader_parse_whitespace(reader
);
1756 else if (!reader_cmp(reader
, L
"<!--"))
1757 hr
= reader_parse_comment(reader
);
1758 else if (!reader_cmp(reader
, L
"<?"))
1759 hr
= reader_parse_pi(reader
);
1763 if (hr
!= S_FALSE
) return hr
;
1769 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1770 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1772 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1775 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1778 reader_skipn(reader
, 1);
1780 cur
= reader_get_ptr(reader
);
1781 start
= reader_get_cur(reader
);
1782 while (is_char(*cur
) && *cur
!= quote
)
1784 reader_skipn(reader
, 1);
1785 cur
= reader_get_ptr(reader
);
1787 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1788 if (*cur
== quote
) reader_skipn(reader
, 1);
1790 TRACE("%s\n", debug_strval(reader
, literal
));
1794 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1795 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1796 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1798 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1801 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1804 reader_skipn(reader
, 1);
1806 start
= reader_get_cur(reader
);
1807 cur
= reader_get_ptr(reader
);
1808 while (is_pubchar(*cur
) && *cur
!= quote
)
1810 reader_skipn(reader
, 1);
1811 cur
= reader_get_ptr(reader
);
1813 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1814 if (*cur
== quote
) reader_skipn(reader
, 1);
1816 TRACE("%s\n", debug_strval(reader
, literal
));
1820 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1821 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1823 static WCHAR systemW
[] = L
"SYSTEM";
1824 static WCHAR publicW
[] = L
"PUBLIC";
1825 struct reader_position position
= reader
->position
;
1830 if (!reader_cmp(reader
, publicW
)) {
1834 reader_skipn(reader
, 6);
1835 cnt
= reader_skipspaces(reader
);
1836 if (!cnt
) return WC_E_WHITESPACE
;
1838 hr
= reader_parse_pub_literal(reader
, &pub
);
1839 if (FAILED(hr
)) return hr
;
1841 reader_init_cstrvalue(publicW
, lstrlenW(publicW
), &name
);
1842 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &pub
, &position
, 0);
1843 if (FAILED(hr
)) return hr
;
1845 cnt
= reader_skipspaces(reader
);
1846 if (!cnt
) return S_OK
;
1848 /* optional system id */
1849 hr
= reader_parse_sys_literal(reader
, &sys
);
1850 if (FAILED(hr
)) return S_OK
;
1852 reader_init_cstrvalue(systemW
, lstrlenW(systemW
), &name
);
1853 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1854 if (FAILED(hr
)) return hr
;
1857 } else if (!reader_cmp(reader
, systemW
)) {
1859 reader_skipn(reader
, 6);
1860 cnt
= reader_skipspaces(reader
);
1861 if (!cnt
) return WC_E_WHITESPACE
;
1863 hr
= reader_parse_sys_literal(reader
, &sys
);
1864 if (FAILED(hr
)) return hr
;
1866 reader_init_cstrvalue(systemW
, lstrlenW(systemW
), &name
);
1867 return reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1873 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1874 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1880 if (reader_cmp(reader
, L
"<!DOCTYPE")) return S_FALSE
;
1881 reader_shrink(reader
);
1883 /* DTD processing is not allowed by default */
1884 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1886 reader_skipn(reader
, 9);
1887 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1890 hr
= reader_parse_name(reader
, &name
);
1891 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1893 reader_skipspaces(reader
);
1895 hr
= reader_parse_externalid(reader
);
1896 if (FAILED(hr
)) return hr
;
1898 reader_skipspaces(reader
);
1900 cur
= reader_get_ptr(reader
);
1903 FIXME("internal subset parsing not implemented\n");
1908 reader_skipn(reader
, 1);
1910 reader
->nodetype
= XmlNodeType_DocumentType
;
1911 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1912 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1917 /* [11 NS] LocalPart ::= NCName */
1918 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
, BOOL check_for_separator
)
1923 if (reader
->resume
[XmlReadResume_Local
])
1925 start
= reader
->resume
[XmlReadResume_Local
];
1926 ptr
= reader_get_ptr(reader
);
1930 ptr
= reader_get_ptr(reader
);
1931 start
= reader_get_cur(reader
);
1934 while (is_ncnamechar(*ptr
))
1936 reader_skipn(reader
, 1);
1937 ptr
= reader_get_ptr(reader
);
1940 if (check_for_separator
&& *ptr
== ':')
1941 return NC_E_QNAMECOLON
;
1943 if (is_reader_pending(reader
))
1945 reader
->resume
[XmlReadResume_Local
] = start
;
1949 reader
->resume
[XmlReadResume_Local
] = 0;
1951 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1956 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1957 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1958 [9 NS] UnprefixedName ::= LocalPart
1959 [10 NS] Prefix ::= NCName */
1960 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1966 if (reader
->resume
[XmlReadResume_Name
])
1968 start
= reader
->resume
[XmlReadResume_Name
];
1969 ptr
= reader_get_ptr(reader
);
1973 ptr
= reader_get_ptr(reader
);
1974 start
= reader_get_cur(reader
);
1975 reader
->resume
[XmlReadResume_Name
] = start
;
1976 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1979 if (reader
->resume
[XmlReadResume_Local
])
1981 hr
= reader_parse_local(reader
, local
, FALSE
);
1982 if (FAILED(hr
)) return hr
;
1984 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1985 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1990 /* skip prefix part */
1991 while (is_ncnamechar(*ptr
))
1993 reader_skipn(reader
, 1);
1994 ptr
= reader_get_ptr(reader
);
1997 if (is_reader_pending(reader
)) return E_PENDING
;
1999 /* got a qualified name */
2002 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
2005 reader_skipn(reader
, 1);
2006 hr
= reader_parse_local(reader
, local
, TRUE
);
2007 if (FAILED(hr
)) return hr
;
2011 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
2012 reader_init_strvalue(0, 0, prefix
);
2017 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
2019 TRACE("ncname %s\n", debug_strval(reader
, local
));
2021 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
2023 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
2026 reader
->resume
[XmlReadResume_Name
] = 0;
2027 reader
->resume
[XmlReadResume_Local
] = 0;
2032 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
2034 static const strval lt
= { (WCHAR
*)L
"lt", 2 };
2035 static const strval gt
= { (WCHAR
*)L
"gt", 2 };
2036 static const strval amp
= { (WCHAR
*)L
"amp", 3 };
2037 static const strval apos
= { (WCHAR
*)L
"apos", 4 };
2038 static const strval quot
= { (WCHAR
*)L
"quot", 4 };
2039 WCHAR
*str
= reader_get_strptr(reader
, name
);
2044 if (strval_eq(reader
, name
, <
)) return '<';
2047 if (strval_eq(reader
, name
, >
)) return '>';
2050 if (strval_eq(reader
, name
, &
))
2052 else if (strval_eq(reader
, name
, &apos
))
2056 if (strval_eq(reader
, name
, "
)) return '\"';
2065 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2066 [67] Reference ::= EntityRef | CharRef
2067 [68] EntityRef ::= '&' Name ';' */
2068 static HRESULT
reader_parse_reference(xmlreader
*reader
)
2070 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
2071 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
2072 UINT cur
= reader_get_cur(reader
);
2077 reader_skipn(reader
, 1);
2078 ptr
= reader_get_ptr(reader
);
2082 reader_skipn(reader
, 1);
2083 ptr
= reader_get_ptr(reader
);
2085 /* hex char or decimal */
2088 reader_skipn(reader
, 1);
2089 ptr
= reader_get_ptr(reader
);
2093 if ((*ptr
>= '0' && *ptr
<= '9'))
2094 ch
= ch
*16 + *ptr
- '0';
2095 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
2096 ch
= ch
*16 + *ptr
- 'a' + 10;
2097 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
2098 ch
= ch
*16 + *ptr
- 'A' + 10;
2100 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
2101 reader_skipn(reader
, 1);
2102 ptr
= reader_get_ptr(reader
);
2109 if ((*ptr
>= '0' && *ptr
<= '9'))
2111 ch
= ch
*10 + *ptr
- '0';
2112 reader_skipn(reader
, 1);
2113 ptr
= reader_get_ptr(reader
);
2116 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
2120 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
2123 if (is_wchar_space(ch
)) ch
= ' ';
2125 ptr
= reader_get_ptr(reader
);
2126 start
= reader_get_ptr2(reader
, cur
);
2127 len
= buffer
->written
- ((char *)ptr
- buffer
->data
);
2128 memmove(start
+ 1, ptr
+ 1, len
);
2130 buffer
->written
-= (reader_get_cur(reader
) - cur
) * sizeof(WCHAR
);
2131 buffer
->cur
= cur
+ 1;
2140 hr
= reader_parse_name(reader
, &name
);
2141 if (FAILED(hr
)) return hr
;
2143 ptr
= reader_get_ptr(reader
);
2144 if (*ptr
!= ';') return WC_E_SEMICOLON
;
2146 /* predefined entities resolve to a single character */
2147 ch
= get_predefined_entity(reader
, &name
);
2150 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
2151 memmove(start
+1, ptr
+1, len
);
2152 buffer
->cur
= cur
+ 1;
2153 buffer
->written
-= (ptr
- start
) * sizeof(WCHAR
);
2159 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
2160 return WC_E_UNDECLAREDENTITY
;
2168 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2169 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
2174 ptr
= reader_get_ptr(reader
);
2176 /* skip opening quote */
2178 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
2179 reader_skipn(reader
, 1);
2181 ptr
= reader_get_ptr(reader
);
2182 start
= reader_get_cur(reader
);
2185 if (*ptr
== '<') return WC_E_LESSTHAN
;
2189 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
2190 /* skip closing quote */
2191 reader_skipn(reader
, 1);
2197 HRESULT hr
= reader_parse_reference(reader
);
2198 if (FAILED(hr
)) return hr
;
2202 /* replace all whitespace chars with ' ' */
2203 if (is_wchar_space(*ptr
)) *ptr
= ' ';
2204 reader_skipn(reader
, 1);
2206 ptr
= reader_get_ptr(reader
);
2212 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2213 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2214 [3 NS] DefaultAttName ::= 'xmlns'
2215 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2216 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2218 struct reader_position position
= reader
->position
;
2219 strval prefix
, local
, qname
, value
;
2220 enum attribute_flags flags
= 0;
2223 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2224 if (FAILED(hr
)) return hr
;
2226 if (strval_eq(reader
, &prefix
, &strval_xmlns
))
2227 flags
|= ATTRIBUTE_NS_DEFINITION
;
2229 if (strval_eq(reader
, &qname
, &strval_xmlns
))
2230 flags
|= ATTRIBUTE_DEFAULT_NS_DEFINITION
;
2232 hr
= reader_parse_eq(reader
);
2233 if (FAILED(hr
)) return hr
;
2235 hr
= reader_parse_attvalue(reader
, &value
);
2236 if (FAILED(hr
)) return hr
;
2238 if (flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
2239 reader_push_ns(reader
, &local
, &value
, !!(flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
));
2241 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2242 return reader_add_attr(reader
, &prefix
, &local
, &qname
, &value
, &position
, flags
);
2245 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2246 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2247 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
2249 struct reader_position position
= reader
->position
;
2252 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2253 if (FAILED(hr
)) return hr
;
2257 reader_skipspaces(reader
);
2260 if ((reader
->is_empty_element
= !reader_cmp(reader
, L
"/>")))
2262 struct element
*element
= &reader
->empty_element
;
2265 reader_skipn(reader
, 2);
2267 reader_free_strvalued(reader
, &element
->qname
);
2268 reader_free_strvalued(reader
, &element
->localname
);
2270 element
->prefix
= *prefix
;
2271 reader_strvaldup(reader
, qname
, &element
->qname
);
2272 reader_strvaldup(reader
, local
, &element
->localname
);
2273 element
->position
= position
;
2274 reader_mark_ns_nodes(reader
, element
);
2278 /* got a start tag */
2279 if (!reader_cmp(reader
, L
">"))
2282 reader_skipn(reader
, 1);
2283 return reader_push_element(reader
, prefix
, local
, qname
, &position
);
2286 hr
= reader_parse_attribute(reader
);
2287 if (FAILED(hr
)) return hr
;
2293 /* [39] element ::= EmptyElemTag | STag content ETag */
2294 static HRESULT
reader_parse_element(xmlreader
*reader
)
2298 switch (reader
->resumestate
)
2300 case XmlReadResumeState_Initial
:
2301 /* check if we are really on element */
2302 if (reader_cmp(reader
, L
"<")) return S_FALSE
;
2305 reader_skipn(reader
, 1);
2307 reader_shrink(reader
);
2308 reader
->resumestate
= XmlReadResumeState_STag
;
2309 case XmlReadResumeState_STag
:
2311 strval qname
, prefix
, local
;
2313 /* this handles empty elements too */
2314 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
);
2315 if (FAILED(hr
)) return hr
;
2317 /* FIXME: need to check for defined namespace to reject invalid prefix */
2319 /* if we got empty element and stack is empty go straight to Misc */
2320 if (reader
->is_empty_element
&& list_empty(&reader
->elements
))
2321 reader
->instate
= XmlReadInState_MiscEnd
;
2323 reader
->instate
= XmlReadInState_Content
;
2325 reader
->nodetype
= XmlNodeType_Element
;
2326 reader
->resumestate
= XmlReadResumeState_Initial
;
2327 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2328 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2329 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
2339 /* [13 NS] ETag ::= '</' QName S? '>' */
2340 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2342 struct reader_position position
;
2343 strval prefix
, local
, qname
;
2344 struct element
*element
;
2348 reader_skipn(reader
, 2);
2350 position
= reader
->position
;
2351 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2352 if (FAILED(hr
)) return hr
;
2354 reader_skipspaces(reader
);
2356 if (reader_cmp(reader
, L
">")) return WC_E_GREATERTHAN
;
2359 reader_skipn(reader
, 1);
2361 /* Element stack should never be empty at this point, cause we shouldn't get to
2362 content parsing if it's empty. */
2363 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2364 if (!strval_eq(reader
, &element
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2366 /* update position stored for start tag, we won't be using it */
2367 element
->position
= position
;
2369 reader
->nodetype
= XmlNodeType_EndElement
;
2370 reader
->is_empty_element
= FALSE
;
2371 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2376 /* [18] CDSect ::= CDStart CData CDEnd
2377 [19] CDStart ::= '<![CDATA['
2378 [20] CData ::= (Char* - (Char* ']]>' Char*))
2379 [21] CDEnd ::= ']]>' */
2380 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2385 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2387 start
= reader
->resume
[XmlReadResume_Body
];
2388 ptr
= reader_get_ptr(reader
);
2392 /* skip markup '<![CDATA[' */
2393 reader_skipn(reader
, 9);
2394 reader_shrink(reader
);
2395 ptr
= reader_get_ptr(reader
);
2396 start
= reader_get_cur(reader
);
2397 reader
->nodetype
= XmlNodeType_CDATA
;
2398 reader
->resume
[XmlReadResume_Body
] = start
;
2399 reader
->resumestate
= XmlReadResumeState_CDATA
;
2400 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2405 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2409 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2412 reader_skipn(reader
, 3);
2413 TRACE("%s\n", debug_strval(reader
, &value
));
2415 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2416 reader
->resume
[XmlReadResume_Body
] = 0;
2417 reader
->resumestate
= XmlReadResumeState_Initial
;
2422 reader_skipn(reader
, 1);
2423 ptr
= reader_get_ptr(reader
);
2430 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2431 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2433 struct reader_position position
;
2437 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2439 start
= reader
->resume
[XmlReadResume_Body
];
2440 ptr
= reader_get_ptr(reader
);
2444 reader_shrink(reader
);
2445 ptr
= reader_get_ptr(reader
);
2446 start
= reader_get_cur(reader
);
2447 /* There's no text */
2448 if (!*ptr
|| *ptr
== '<') return S_OK
;
2449 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2450 reader
->resume
[XmlReadResume_Body
] = start
;
2451 reader
->resumestate
= XmlReadResumeState_CharData
;
2452 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2455 position
= reader
->position
;
2458 /* CDATA closing sequence ']]>' is not allowed */
2459 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2460 return WC_E_CDSECTEND
;
2462 /* Found next markup part */
2467 reader
->empty_element
.position
= position
;
2468 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2469 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2470 reader
->resume
[XmlReadResume_Body
] = 0;
2471 reader
->resumestate
= XmlReadResumeState_Initial
;
2475 /* this covers a case when text has leading whitespace chars */
2476 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2478 if (!reader_cmp(reader
, L
"&"))
2479 reader_parse_reference(reader
);
2481 reader_skipn(reader
, 1);
2483 ptr
= reader_get_ptr(reader
);
2489 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2490 static HRESULT
reader_parse_content(xmlreader
*reader
)
2492 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2494 switch (reader
->resumestate
)
2496 case XmlReadResumeState_CDATA
:
2497 return reader_parse_cdata(reader
);
2498 case XmlReadResumeState_Comment
:
2499 return reader_parse_comment(reader
);
2500 case XmlReadResumeState_PIBody
:
2501 case XmlReadResumeState_PITarget
:
2502 return reader_parse_pi(reader
);
2503 case XmlReadResumeState_CharData
:
2504 return reader_parse_chardata(reader
);
2506 ERR("unknown resume state %d\n", reader
->resumestate
);
2510 reader_shrink(reader
);
2512 /* handle end tag here, it indicates end of content as well */
2513 if (!reader_cmp(reader
, L
"</"))
2514 return reader_parse_endtag(reader
);
2516 if (!reader_cmp(reader
, L
"<!--"))
2517 return reader_parse_comment(reader
);
2519 if (!reader_cmp(reader
, L
"<?"))
2520 return reader_parse_pi(reader
);
2522 if (!reader_cmp(reader
, L
"<![CDATA["))
2523 return reader_parse_cdata(reader
);
2525 if (!reader_cmp(reader
, L
"<"))
2526 return reader_parse_element(reader
);
2528 /* what's left must be CharData */
2529 return reader_parse_chardata(reader
);
2532 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2534 XmlNodeType nodetype
= reader_get_nodetype(reader
);
2537 if (!is_reader_pending(reader
))
2539 reader
->chunk_read_off
= 0;
2540 reader_clear_attrs(reader
);
2543 /* When moving from EndElement or empty element, pop its own namespace definitions */
2546 case XmlNodeType_Attribute
:
2547 reader_dec_depth(reader
);
2549 case XmlNodeType_Element
:
2550 if (reader
->is_empty_element
)
2551 reader_pop_ns_nodes(reader
, &reader
->empty_element
);
2552 else if (FAILED(hr
= reader_inc_depth(reader
)))
2555 case XmlNodeType_EndElement
:
2556 reader_pop_element(reader
);
2557 reader_dec_depth(reader
);
2565 switch (reader
->instate
)
2567 /* if it's a first call for a new input we need to detect stream encoding */
2568 case XmlReadInState_Initial
:
2572 hr
= readerinput_growraw(reader
->input
);
2573 if (FAILED(hr
)) return hr
;
2575 reader
->position
.line_number
= 1;
2576 reader
->position
.line_position
= 1;
2578 /* try to detect encoding by BOM or data and set input code page */
2579 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2580 TRACE("detected encoding %s, 0x%08x\n", enc
== XmlEncoding_Unknown
? "(unknown)" :
2581 debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2582 if (FAILED(hr
)) return hr
;
2584 /* always switch first time cause we have to put something in */
2585 readerinput_switchencoding(reader
->input
, enc
);
2587 /* parse xml declaration */
2588 hr
= reader_parse_xmldecl(reader
);
2589 if (FAILED(hr
)) return hr
;
2591 readerinput_shrinkraw(reader
->input
, -1);
2592 reader
->instate
= XmlReadInState_Misc_DTD
;
2593 if (hr
== S_OK
) return hr
;
2596 case XmlReadInState_Misc_DTD
:
2597 hr
= reader_parse_misc(reader
);
2598 if (FAILED(hr
)) return hr
;
2601 reader
->instate
= XmlReadInState_DTD
;
2605 case XmlReadInState_DTD
:
2606 hr
= reader_parse_dtd(reader
);
2607 if (FAILED(hr
)) return hr
;
2611 reader
->instate
= XmlReadInState_DTD_Misc
;
2615 reader
->instate
= XmlReadInState_Element
;
2617 case XmlReadInState_DTD_Misc
:
2618 hr
= reader_parse_misc(reader
);
2619 if (FAILED(hr
)) return hr
;
2622 reader
->instate
= XmlReadInState_Element
;
2626 case XmlReadInState_Element
:
2627 return reader_parse_element(reader
);
2628 case XmlReadInState_Content
:
2629 return reader_parse_content(reader
);
2630 case XmlReadInState_MiscEnd
:
2631 hr
= reader_parse_misc(reader
);
2632 if (hr
!= S_FALSE
) return hr
;
2634 if (*reader_get_ptr(reader
))
2636 WARN("found garbage in the end of XML\n");
2640 reader
->instate
= XmlReadInState_Eof
;
2641 reader
->state
= XmlReadState_EndOfFile
;
2642 reader
->nodetype
= XmlNodeType_None
;
2644 case XmlReadInState_Eof
:
2647 FIXME("internal state %d not handled\n", reader
->instate
);
2655 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2657 xmlreader
*This
= impl_from_IXmlReader(iface
);
2659 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2661 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2662 IsEqualGUID(riid
, &IID_IXmlReader
))
2668 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2670 return E_NOINTERFACE
;
2673 IXmlReader_AddRef(iface
);
2678 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2680 xmlreader
*This
= impl_from_IXmlReader(iface
);
2681 ULONG ref
= InterlockedIncrement(&This
->ref
);
2682 TRACE("(%p)->(%d)\n", This
, ref
);
2686 static void reader_clear_ns(xmlreader
*reader
)
2688 struct ns
*ns
, *ns2
;
2690 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
2691 list_remove(&ns
->entry
);
2692 reader_free_strvalued(reader
, &ns
->prefix
);
2693 reader_free_strvalued(reader
, &ns
->uri
);
2694 reader_free(reader
, ns
);
2697 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->nsdef
, struct ns
, entry
) {
2698 list_remove(&ns
->entry
);
2699 reader_free_strvalued(reader
, &ns
->uri
);
2700 reader_free(reader
, ns
);
2704 static void reader_reset_parser(xmlreader
*reader
)
2706 reader
->position
.line_number
= 0;
2707 reader
->position
.line_position
= 0;
2709 reader_clear_elements(reader
);
2710 reader_clear_attrs(reader
);
2711 reader_clear_ns(reader
);
2712 reader_free_strvalues(reader
);
2715 reader
->nodetype
= XmlNodeType_None
;
2716 reader
->resumestate
= XmlReadResumeState_Initial
;
2717 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2718 reader
->is_empty_element
= FALSE
;
2721 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2723 xmlreader
*This
= impl_from_IXmlReader(iface
);
2724 LONG ref
= InterlockedDecrement(&This
->ref
);
2726 TRACE("(%p)->(%d)\n", This
, ref
);
2730 IMalloc
*imalloc
= This
->imalloc
;
2731 reader_reset_parser(This
);
2732 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2733 if (This
->resolver
) IXmlResolver_Release(This
->resolver
);
2734 if (This
->mlang
) IUnknown_Release(This
->mlang
);
2735 reader_free(This
, This
);
2736 if (imalloc
) IMalloc_Release(imalloc
);
2742 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2744 xmlreader
*This
= impl_from_IXmlReader(iface
);
2745 IXmlReaderInput
*readerinput
;
2748 TRACE("(%p)->(%p)\n", This
, input
);
2752 readerinput_release_stream(This
->input
);
2753 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2757 reader_reset_parser(This
);
2759 /* just reset current input */
2762 This
->state
= XmlReadState_Initial
;
2766 /* now try IXmlReaderInput, ISequentialStream, IStream */
2767 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2770 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2771 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2774 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2775 readerinput
, readerinput
->lpVtbl
);
2776 IUnknown_Release(readerinput
);
2782 if (hr
!= S_OK
|| !readerinput
)
2784 /* create IXmlReaderInput basing on supplied interface */
2785 hr
= CreateXmlReaderInputWithEncodingName(input
,
2786 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2787 if (hr
!= S_OK
) return hr
;
2788 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2791 /* set stream for supplied IXmlReaderInput */
2792 hr
= readerinput_query_for_stream(This
->input
);
2795 This
->state
= XmlReadState_Initial
;
2796 This
->instate
= XmlReadInState_Initial
;
2801 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2803 xmlreader
*This
= impl_from_IXmlReader(iface
);
2805 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2807 if (!value
) return E_INVALIDARG
;
2811 case XmlReaderProperty_MultiLanguage
:
2812 *value
= (LONG_PTR
)This
->mlang
;
2814 IUnknown_AddRef(This
->mlang
);
2816 case XmlReaderProperty_XmlResolver
:
2817 *value
= (LONG_PTR
)This
->resolver
;
2819 IXmlResolver_AddRef(This
->resolver
);
2821 case XmlReaderProperty_DtdProcessing
:
2822 *value
= This
->dtdmode
;
2824 case XmlReaderProperty_ReadState
:
2825 *value
= This
->state
;
2827 case XmlReaderProperty_MaxElementDepth
:
2828 *value
= This
->max_depth
;
2831 FIXME("Unimplemented property (%u)\n", property
);
2838 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2840 xmlreader
*This
= impl_from_IXmlReader(iface
);
2842 TRACE("(%p)->(%s 0x%lx)\n", This
, debugstr_reader_prop(property
), value
);
2846 case XmlReaderProperty_MultiLanguage
:
2848 IUnknown_Release(This
->mlang
);
2849 This
->mlang
= (IUnknown
*)value
;
2851 IUnknown_AddRef(This
->mlang
);
2853 FIXME("Ignoring MultiLanguage %p\n", This
->mlang
);
2855 case XmlReaderProperty_XmlResolver
:
2857 IXmlResolver_Release(This
->resolver
);
2858 This
->resolver
= (IXmlResolver
*)value
;
2860 IXmlResolver_AddRef(This
->resolver
);
2862 case XmlReaderProperty_DtdProcessing
:
2863 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2864 This
->dtdmode
= value
;
2866 case XmlReaderProperty_MaxElementDepth
:
2867 This
->max_depth
= value
;
2870 FIXME("Unimplemented property (%u)\n", property
);
2877 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2879 xmlreader
*This
= impl_from_IXmlReader(iface
);
2880 XmlNodeType oldtype
= This
->nodetype
;
2884 TRACE("(%p)->(%p)\n", This
, nodetype
);
2889 switch (This
->state
)
2891 case XmlReadState_Closed
:
2894 case XmlReadState_Error
:
2898 hr
= reader_parse_nextnode(This
);
2899 if (SUCCEEDED(hr
) && oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2900 This
->state
= XmlReadState_Interactive
;
2904 This
->state
= XmlReadState_Error
;
2905 This
->nodetype
= XmlNodeType_None
;
2911 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2912 *nodetype
= This
->nodetype
;
2917 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2919 xmlreader
*This
= impl_from_IXmlReader(iface
);
2921 TRACE("(%p)->(%p)\n", This
, node_type
);
2924 return E_INVALIDARG
;
2926 *node_type
= reader_get_nodetype(This
);
2927 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2930 static void reader_set_current_attribute(xmlreader
*reader
, struct attribute
*attr
)
2932 reader
->attr
= attr
;
2933 reader
->chunk_read_off
= 0;
2934 reader_set_strvalue(reader
, StringValue_Prefix
, &attr
->prefix
);
2935 reader_set_strvalue(reader
, StringValue_QualifiedName
, &attr
->qname
);
2936 reader_set_strvalue(reader
, StringValue_Value
, &attr
->value
);
2939 static HRESULT
reader_move_to_first_attribute(xmlreader
*reader
)
2941 if (!reader
->attr_count
)
2945 reader_inc_depth(reader
);
2947 reader_set_current_attribute(reader
, LIST_ENTRY(list_head(&reader
->attrs
), struct attribute
, entry
));
2952 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2954 xmlreader
*This
= impl_from_IXmlReader(iface
);
2956 TRACE("(%p)\n", This
);
2958 return reader_move_to_first_attribute(This
);
2961 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2963 xmlreader
*This
= impl_from_IXmlReader(iface
);
2964 const struct list
*next
;
2966 TRACE("(%p)\n", This
);
2968 if (!This
->attr_count
) return S_FALSE
;
2971 return reader_move_to_first_attribute(This
);
2973 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2975 reader_set_current_attribute(This
, LIST_ENTRY(next
, struct attribute
, entry
));
2977 return next
? S_OK
: S_FALSE
;
2980 static void reader_get_attribute_ns_uri(xmlreader
*reader
, struct attribute
*attr
, const WCHAR
**uri
, UINT
*len
)
2982 static const WCHAR xmlns_uriW
[] = L
"http://www.w3.org/2000/xmlns/";
2983 static const WCHAR xml_uriW
[] = L
"http://www.w3.org/XML/1998/namespace";
2985 /* Check for reserved prefixes first */
2986 if ((strval_eq(reader
, &attr
->prefix
, &strval_empty
) && strval_eq(reader
, &attr
->localname
, &strval_xmlns
)) ||
2987 strval_eq(reader
, &attr
->prefix
, &strval_xmlns
))
2990 *len
= ARRAY_SIZE(xmlns_uriW
) - 1;
2992 else if (strval_eq(reader
, &attr
->prefix
, &strval_xml
))
2995 *len
= ARRAY_SIZE(xml_uriW
) - 1;
3007 if ((ns
= reader_lookup_ns(reader
, &attr
->prefix
)))
3020 static void reader_get_attribute_local_name(xmlreader
*reader
, struct attribute
*attr
, const WCHAR
**name
, UINT
*len
)
3022 if (attr
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3027 else if (attr
->flags
& ATTRIBUTE_NS_DEFINITION
)
3029 const struct ns
*ns
= reader_lookup_ns(reader
, &attr
->localname
);
3030 *name
= ns
->prefix
.str
;
3031 *len
= ns
->prefix
.len
;
3035 *name
= attr
->localname
.str
;
3036 *len
= attr
->localname
.len
;
3040 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
3041 const WCHAR
*local_name
, const WCHAR
*namespace_uri
)
3043 xmlreader
*This
= impl_from_IXmlReader(iface
);
3044 UINT target_name_len
, target_uri_len
;
3045 struct attribute
*attr
;
3047 TRACE("(%p)->(%s %s)\n", This
, debugstr_w(local_name
), debugstr_w(namespace_uri
));
3050 return E_INVALIDARG
;
3052 if (!This
->attr_count
)
3056 namespace_uri
= emptyW
;
3058 target_name_len
= lstrlenW(local_name
);
3059 target_uri_len
= lstrlenW(namespace_uri
);
3061 LIST_FOR_EACH_ENTRY(attr
, &This
->attrs
, struct attribute
, entry
)
3063 UINT name_len
, uri_len
;
3064 const WCHAR
*name
, *uri
;
3066 reader_get_attribute_local_name(This
, attr
, &name
, &name_len
);
3067 reader_get_attribute_ns_uri(This
, attr
, &uri
, &uri_len
);
3069 if (name_len
== target_name_len
&& uri_len
== target_uri_len
&&
3070 !wcscmp(name
, local_name
) && !wcscmp(uri
, namespace_uri
))
3072 reader_set_current_attribute(This
, attr
);
3080 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
3082 xmlreader
*This
= impl_from_IXmlReader(iface
);
3084 TRACE("(%p)\n", This
);
3086 if (!This
->attr_count
) return S_FALSE
;
3089 reader_dec_depth(This
);
3093 /* FIXME: support other node types with 'attributes' like DTD */
3094 if (This
->is_empty_element
) {
3095 reader_set_strvalue(This
, StringValue_Prefix
, &This
->empty_element
.prefix
);
3096 reader_set_strvalue(This
, StringValue_QualifiedName
, &This
->empty_element
.qname
);
3099 struct element
*element
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
3101 reader_set_strvalue(This
, StringValue_Prefix
, &element
->prefix
);
3102 reader_set_strvalue(This
, StringValue_QualifiedName
, &element
->qname
);
3105 This
->chunk_read_off
= 0;
3106 reader_set_strvalue(This
, StringValue_Value
, &strval_empty
);
3111 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3113 xmlreader
*This
= impl_from_IXmlReader(iface
);
3114 struct attribute
*attribute
= This
->attr
;
3115 struct element
*element
;
3118 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3123 switch (reader_get_nodetype(This
))
3125 case XmlNodeType_Text
:
3126 case XmlNodeType_CDATA
:
3127 case XmlNodeType_Comment
:
3128 case XmlNodeType_Whitespace
:
3132 case XmlNodeType_Element
:
3133 case XmlNodeType_EndElement
:
3134 element
= reader_get_element(This
);
3135 if (element
->prefix
.len
)
3137 *name
= element
->qname
.str
;
3138 *len
= element
->qname
.len
;
3142 *name
= element
->localname
.str
;
3143 *len
= element
->localname
.len
;
3146 case XmlNodeType_Attribute
:
3147 if (attribute
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3151 } else if (attribute
->prefix
.len
)
3153 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3154 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3158 *name
= attribute
->localname
.str
;
3159 *len
= attribute
->localname
.len
;
3163 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3164 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3171 static struct ns
*reader_lookup_nsdef(xmlreader
*reader
)
3173 if (list_empty(&reader
->nsdef
))
3176 return LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
3179 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
, const WCHAR
**uri
, UINT
*len
)
3181 xmlreader
*This
= impl_from_IXmlReader(iface
);
3182 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3183 XmlNodeType nodetype
;
3187 TRACE("(%p %p %p)\n", iface
, uri
, len
);
3192 switch ((nodetype
= reader_get_nodetype(This
)))
3194 case XmlNodeType_Attribute
:
3195 reader_get_attribute_ns_uri(This
, This
->attr
, uri
, len
);
3197 case XmlNodeType_Element
:
3198 case XmlNodeType_EndElement
:
3200 ns
= reader_lookup_ns(This
, prefix
);
3202 /* pick top default ns if any */
3204 ns
= reader_lookup_nsdef(This
);
3216 case XmlNodeType_Text
:
3217 case XmlNodeType_CDATA
:
3218 case XmlNodeType_ProcessingInstruction
:
3219 case XmlNodeType_Comment
:
3220 case XmlNodeType_Whitespace
:
3221 case XmlNodeType_XmlDeclaration
:
3226 FIXME("Unhandled node type %d\n", nodetype
);
3235 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3237 xmlreader
*This
= impl_from_IXmlReader(iface
);
3238 struct element
*element
;
3241 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3246 switch (reader_get_nodetype(This
))
3248 case XmlNodeType_Text
:
3249 case XmlNodeType_CDATA
:
3250 case XmlNodeType_Comment
:
3251 case XmlNodeType_Whitespace
:
3255 case XmlNodeType_Element
:
3256 case XmlNodeType_EndElement
:
3257 element
= reader_get_element(This
);
3258 *name
= element
->localname
.str
;
3259 *len
= element
->localname
.len
;
3261 case XmlNodeType_Attribute
:
3262 reader_get_attribute_local_name(This
, This
->attr
, name
, len
);
3265 *name
= This
->strvalues
[StringValue_LocalName
].str
;
3266 *len
= This
->strvalues
[StringValue_LocalName
].len
;
3273 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, const WCHAR
**ret
, UINT
*len
)
3275 xmlreader
*This
= impl_from_IXmlReader(iface
);
3276 XmlNodeType nodetype
;
3279 TRACE("(%p)->(%p %p)\n", This
, ret
, len
);
3287 switch ((nodetype
= reader_get_nodetype(This
)))
3289 case XmlNodeType_Element
:
3290 case XmlNodeType_EndElement
:
3291 case XmlNodeType_Attribute
:
3293 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3296 if (strval_eq(This
, prefix
, &strval_xml
))
3301 else if (strval_eq(This
, prefix
, &strval_xmlns
))
3306 else if ((ns
= reader_lookup_ns(This
, prefix
)))
3308 *ret
= ns
->prefix
.str
;
3309 *len
= ns
->prefix
.len
;
3321 static const strval
*reader_get_value(xmlreader
*reader
, BOOL ensure_allocated
)
3325 switch (reader_get_nodetype(reader
))
3327 case XmlNodeType_XmlDeclaration
:
3328 case XmlNodeType_EndElement
:
3329 case XmlNodeType_None
:
3330 return &strval_empty
;
3331 case XmlNodeType_Attribute
:
3332 /* For namespace definition attributes return values from namespace list */
3333 if (reader
->attr
->flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
3337 if (!(ns
= reader_lookup_ns(reader
, &reader
->attr
->localname
)))
3338 ns
= reader_lookup_nsdef(reader
);
3342 return &reader
->attr
->value
;
3347 val
= &reader
->strvalues
[StringValue_Value
];
3348 if (!val
->str
&& ensure_allocated
)
3350 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
3351 if (!ptr
) return NULL
;
3352 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
3360 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
3362 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3363 const strval
*val
= &reader
->strvalues
[StringValue_Value
];
3366 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
3370 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
&& !val
->len
) || is_reader_pending(reader
))
3375 hr
= IXmlReader_Read(iface
, &type
);
3376 if (FAILED(hr
)) return hr
;
3378 /* return if still pending, partially read values are not reported */
3379 if (is_reader_pending(reader
)) return E_PENDING
;
3382 val
= reader_get_value(reader
, TRUE
);
3384 return E_OUTOFMEMORY
;
3386 off
= abs(reader
->chunk_read_off
);
3387 assert(off
<= val
->len
);
3388 *value
= val
->str
+ off
;
3389 if (len
) *len
= val
->len
- off
;
3390 reader
->chunk_read_off
= -off
;
3394 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
3396 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3400 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
3402 val
= reader_get_value(reader
, FALSE
);
3404 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3405 if (reader
->chunk_read_off
>= 0)
3407 assert(reader
->chunk_read_off
<= val
->len
);
3408 len
= min(val
->len
- reader
->chunk_read_off
, chunk_size
);
3410 if (read
) *read
= len
;
3414 memcpy(buffer
, reader_get_strptr(reader
, val
) + reader
->chunk_read_off
, len
*sizeof(WCHAR
));
3415 reader
->chunk_read_off
+= len
;
3418 return len
|| !chunk_size
? S_OK
: S_FALSE
;
3421 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
3423 UINT
*baseUri_length
)
3425 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
3429 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
3431 FIXME("(%p): stub\n", iface
);
3435 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
3437 xmlreader
*This
= impl_from_IXmlReader(iface
);
3438 TRACE("(%p)\n", This
);
3439 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3440 when current node is start tag of an element */
3441 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->is_empty_element
: FALSE
;
3444 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*line_number
)
3446 xmlreader
*This
= impl_from_IXmlReader(iface
);
3447 const struct element
*element
;
3449 TRACE("(%p %p)\n", This
, line_number
);
3452 return E_INVALIDARG
;
3454 switch (reader_get_nodetype(This
))
3456 case XmlNodeType_Element
:
3457 case XmlNodeType_EndElement
:
3458 element
= reader_get_element(This
);
3459 *line_number
= element
->position
.line_number
;
3461 case XmlNodeType_Attribute
:
3462 *line_number
= This
->attr
->position
.line_number
;
3464 case XmlNodeType_Whitespace
:
3465 case XmlNodeType_XmlDeclaration
:
3466 *line_number
= This
->empty_element
.position
.line_number
;
3469 *line_number
= This
->position
.line_number
;
3473 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3476 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*line_position
)
3478 xmlreader
*This
= impl_from_IXmlReader(iface
);
3479 const struct element
*element
;
3481 TRACE("(%p %p)\n", This
, line_position
);
3484 return E_INVALIDARG
;
3486 switch (reader_get_nodetype(This
))
3488 case XmlNodeType_Element
:
3489 case XmlNodeType_EndElement
:
3490 element
= reader_get_element(This
);
3491 *line_position
= element
->position
.line_position
;
3493 case XmlNodeType_Attribute
:
3494 *line_position
= This
->attr
->position
.line_position
;
3496 case XmlNodeType_Whitespace
:
3497 case XmlNodeType_XmlDeclaration
:
3498 *line_position
= This
->empty_element
.position
.line_position
;
3501 *line_position
= This
->position
.line_position
;
3505 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3508 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
3510 xmlreader
*This
= impl_from_IXmlReader(iface
);
3512 TRACE("(%p)->(%p)\n", This
, count
);
3514 if (!count
) return E_INVALIDARG
;
3516 *count
= This
->attr_count
;
3520 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
3522 xmlreader
*This
= impl_from_IXmlReader(iface
);
3523 TRACE("(%p)->(%p)\n", This
, depth
);
3524 *depth
= This
->depth
;
3528 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
3530 xmlreader
*This
= impl_from_IXmlReader(iface
);
3531 TRACE("(%p)\n", iface
);
3532 return This
->state
== XmlReadState_EndOfFile
;
3535 static const struct IXmlReaderVtbl xmlreader_vtbl
=
3537 xmlreader_QueryInterface
,
3541 xmlreader_GetProperty
,
3542 xmlreader_SetProperty
,
3544 xmlreader_GetNodeType
,
3545 xmlreader_MoveToFirstAttribute
,
3546 xmlreader_MoveToNextAttribute
,
3547 xmlreader_MoveToAttributeByName
,
3548 xmlreader_MoveToElement
,
3549 xmlreader_GetQualifiedName
,
3550 xmlreader_GetNamespaceUri
,
3551 xmlreader_GetLocalName
,
3552 xmlreader_GetPrefix
,
3554 xmlreader_ReadValueChunk
,
3555 xmlreader_GetBaseUri
,
3556 xmlreader_IsDefault
,
3557 xmlreader_IsEmptyElement
,
3558 xmlreader_GetLineNumber
,
3559 xmlreader_GetLinePosition
,
3560 xmlreader_GetAttributeCount
,
3565 /** IXmlReaderInput **/
3566 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
3568 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3570 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
3572 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
3573 IsEqualGUID(riid
, &IID_IUnknown
))
3579 WARN("interface %s not implemented\n", debugstr_guid(riid
));
3581 return E_NOINTERFACE
;
3584 IUnknown_AddRef(iface
);
3589 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
3591 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3592 ULONG ref
= InterlockedIncrement(&This
->ref
);
3593 TRACE("(%p)->(%d)\n", This
, ref
);
3597 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
3599 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3600 LONG ref
= InterlockedDecrement(&This
->ref
);
3602 TRACE("(%p)->(%d)\n", This
, ref
);
3606 IMalloc
*imalloc
= This
->imalloc
;
3607 if (This
->input
) IUnknown_Release(This
->input
);
3608 if (This
->stream
) ISequentialStream_Release(This
->stream
);
3609 if (This
->buffer
) free_input_buffer(This
->buffer
);
3610 readerinput_free(This
, This
->baseuri
);
3611 readerinput_free(This
, This
);
3612 if (imalloc
) IMalloc_Release(imalloc
);
3618 static const struct IUnknownVtbl xmlreaderinputvtbl
=
3620 xmlreaderinput_QueryInterface
,
3621 xmlreaderinput_AddRef
,
3622 xmlreaderinput_Release
3625 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
3631 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
3634 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
3636 reader
= heap_alloc(sizeof(*reader
));
3638 return E_OUTOFMEMORY
;
3640 memset(reader
, 0, sizeof(*reader
));
3641 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
3643 reader
->state
= XmlReadState_Closed
;
3644 reader
->instate
= XmlReadInState_Initial
;
3645 reader
->resumestate
= XmlReadResumeState_Initial
;
3646 reader
->dtdmode
= DtdProcessing_Prohibit
;
3647 reader
->imalloc
= imalloc
;
3648 if (imalloc
) IMalloc_AddRef(imalloc
);
3649 reader
->nodetype
= XmlNodeType_None
;
3650 list_init(&reader
->attrs
);
3651 list_init(&reader
->nsdef
);
3652 list_init(&reader
->ns
);
3653 list_init(&reader
->elements
);
3654 reader
->max_depth
= 256;
3656 reader
->chunk_read_off
= 0;
3657 for (i
= 0; i
< StringValue_Last
; i
++)
3658 reader
->strvalues
[i
] = strval_empty
;
3660 hr
= IXmlReader_QueryInterface(&reader
->IXmlReader_iface
, riid
, obj
);
3661 IXmlReader_Release(&reader
->IXmlReader_iface
);
3663 TRACE("returning iface %p, hr %#x\n", *obj
, hr
);
3668 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
3673 IXmlReaderInput
**ppInput
)
3675 xmlreaderinput
*readerinput
;
3678 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3679 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3681 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3684 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3686 readerinput
= heap_alloc(sizeof(*readerinput
));
3687 if(!readerinput
) return E_OUTOFMEMORY
;
3689 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3690 readerinput
->ref
= 1;
3691 readerinput
->imalloc
= imalloc
;
3692 readerinput
->stream
= NULL
;
3693 if (imalloc
) IMalloc_AddRef(imalloc
);
3694 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3695 readerinput
->hint
= hint
;
3696 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3697 readerinput
->pending
= 0;
3699 hr
= alloc_input_buffer(readerinput
);
3702 readerinput_free(readerinput
, readerinput
->baseuri
);
3703 readerinput_free(readerinput
, readerinput
);
3704 if (imalloc
) IMalloc_Release(imalloc
);
3707 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3709 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3711 TRACE("returning iface %p\n", *ppInput
);