2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43 XmlReadInState_Initial
,
44 XmlReadInState_XmlDecl
,
45 XmlReadInState_Misc_DTD
,
47 XmlReadInState_DTD_Misc
,
48 XmlReadInState_Element
,
49 XmlReadInState_Content
,
50 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
52 } XmlReaderInternalState
;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
58 XmlReadResumeState_Initial
,
59 XmlReadResumeState_PITarget
,
60 XmlReadResumeState_PIBody
,
61 XmlReadResumeState_CDATA
,
62 XmlReadResumeState_Comment
,
63 XmlReadResumeState_STag
,
64 XmlReadResumeState_CharData
,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState
;
68 /* saved pointer index to resume from particular input position */
71 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local
, /* local for QName */
73 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
79 StringValue_LocalName
,
81 StringValue_QualifiedName
,
84 } XmlReaderStringValue
;
86 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW
[] = {'\"',0};
90 static const WCHAR quoteW
[] = {'\'',0};
91 static const WCHAR ltW
[] = {'<',0};
92 static const WCHAR gtW
[] = {'>',0};
93 static const WCHAR commentW
[] = {'<','!','-','-',0};
94 static const WCHAR piW
[] = {'<','?',0};
96 static BOOL
is_namestartchar(WCHAR ch
);
98 static const char *debugstr_nodetype(XmlNodeType nodetype
)
100 static const char * const type_names
[] =
109 "ProcessingInstruction",
122 if (nodetype
> _XmlNodeType_Last
)
123 return wine_dbg_sprintf("unknown type=%d", nodetype
);
125 return type_names
[nodetype
];
128 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
130 static const char * const prop_names
[] =
142 if (prop
> _XmlReaderProperty_Last
)
143 return wine_dbg_sprintf("unknown property=%d", prop
);
145 return prop_names
[prop
];
148 struct xml_encoding_data
155 static const struct xml_encoding_data xml_encoding_map
[] = {
156 { utf16W
, XmlEncoding_UTF16
, ~0 },
157 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
}
160 const WCHAR
*get_encoding_name(xml_encoding encoding
)
162 return xml_encoding_map
[encoding
].name
;
165 xml_encoding
get_encoding_from_codepage(UINT codepage
)
168 for (i
= 0; i
< sizeof(xml_encoding_map
)/sizeof(xml_encoding_map
[0]); i
++)
170 if (xml_encoding_map
[i
].cp
== codepage
) return xml_encoding_map
[i
].enc
;
172 return XmlEncoding_Unknown
;
179 unsigned int allocated
;
180 unsigned int written
;
183 typedef struct input_buffer input_buffer
;
187 IXmlReaderInput IXmlReaderInput_iface
;
189 /* reference passed on IXmlReaderInput creation, is kept when input is created */
192 xml_encoding encoding
;
195 /* stream reference set after SetInput() call from reader,
196 stored as sequential stream, cause currently
197 optimizations possible with IStream aren't implemented */
198 ISequentialStream
*stream
;
199 input_buffer
*buffer
;
200 unsigned int pending
: 1;
203 static const struct IUnknownVtbl xmlreaderinputvtbl
;
205 /* Structure to hold parsed string of specific length.
207 Reader stores node value as 'start' pointer, on request
208 a null-terminated version of it is allocated.
210 To init a strval variable use reader_init_strval(),
211 to set strval as a reader value use reader_set_strval().
215 WCHAR
*str
; /* allocated null-terminated string */
216 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
217 UINT start
; /* input position where value starts */
220 static WCHAR emptyW
[] = {0};
221 static WCHAR xmlW
[] = {'x','m','l',0};
222 static WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
223 static const strval strval_empty
= { emptyW
};
224 static const strval strval_xml
= { xmlW
, 3 };
225 static const strval strval_xmlns
= { xmlnsW
, 5 };
249 struct element
*element
;
254 IXmlReader IXmlReader_iface
;
256 xmlreaderinput
*input
;
259 XmlReaderInternalState instate
;
260 XmlReaderResumeState resumestate
;
261 XmlNodeType nodetype
;
262 DtdProcessing dtdmode
;
263 IXmlResolver
*resolver
;
265 UINT line
, pos
; /* reader position in XML stream */
266 struct list attrs
; /* attributes list for current node */
267 struct attribute
*attr
; /* current attribute */
271 struct list elements
;
272 strval strvalues
[StringValue_Last
];
275 BOOL is_empty_element
;
276 struct element empty_element
;
277 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
282 encoded_buffer utf16
;
283 encoded_buffer encoded
;
285 xmlreaderinput
*input
;
288 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
290 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
293 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
295 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
298 /* reader memory allocation functions */
299 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
301 return m_alloc(reader
->imalloc
, len
);
304 static inline void *reader_alloc_zero(xmlreader
*reader
, size_t len
)
306 void *ret
= reader_alloc(reader
, len
);
312 static inline void reader_free(xmlreader
*reader
, void *mem
)
314 m_free(reader
->imalloc
, mem
);
317 /* Just return pointer from offset, no attempt to read more. */
318 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
320 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
321 return (WCHAR
*)buffer
->data
+ offset
;
324 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
326 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
329 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
333 if (src
->str
!= strval_empty
.str
)
335 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
336 if (!dest
->str
) return E_OUTOFMEMORY
;
337 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
338 dest
->str
[dest
->len
] = 0;
345 /* reader input memory allocation functions */
346 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
348 return m_alloc(input
->imalloc
, len
);
351 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
353 return m_realloc(input
->imalloc
, mem
, len
);
356 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
358 m_free(input
->imalloc
, mem
);
361 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
368 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
369 ret
= readerinput_alloc(input
, size
);
370 if (ret
) memcpy(ret
, str
, size
);
376 static void reader_clear_attrs(xmlreader
*reader
)
378 struct attribute
*attr
, *attr2
;
379 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
381 reader_free(reader
, attr
);
383 list_init(&reader
->attrs
);
384 reader
->attr_count
= 0;
388 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
389 while we are on a node with attributes */
390 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*prefix
, strval
*localname
, strval
*qname
,
393 struct attribute
*attr
;
395 attr
= reader_alloc(reader
, sizeof(*attr
));
396 if (!attr
) return E_OUTOFMEMORY
;
399 attr
->prefix
= *prefix
;
401 memset(&attr
->prefix
, 0, sizeof(attr
->prefix
));
402 attr
->localname
= *localname
;
403 attr
->qname
= qname
? *qname
: *localname
;
404 attr
->value
= *value
;
405 list_add_tail(&reader
->attrs
, &attr
->entry
);
406 reader
->attr_count
++;
411 /* This one frees stored string value if needed */
412 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
414 if (v
->str
!= strval_empty
.str
)
416 reader_free(reader
, v
->str
);
421 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
428 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
430 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
433 /* used to initialize from constant string */
434 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
441 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
443 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
446 static void reader_free_strvalues(xmlreader
*reader
)
449 for (type
= 0; type
< StringValue_Last
; type
++)
450 reader_free_strvalue(reader
, type
);
453 /* This helper should only be used to test if strings are the same,
454 it doesn't try to sort. */
455 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
457 if (str1
->len
!= str2
->len
) return 0;
458 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
461 static void reader_clear_elements(xmlreader
*reader
)
463 struct element
*elem
, *elem2
;
464 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
466 reader_free_strvalued(reader
, &elem
->prefix
);
467 reader_free_strvalued(reader
, &elem
->localname
);
468 reader_free_strvalued(reader
, &elem
->qname
);
469 reader_free(reader
, elem
);
471 list_init(&reader
->elements
);
472 reader
->is_empty_element
= FALSE
;
475 static HRESULT
reader_inc_depth(xmlreader
*reader
)
477 return (++reader
->depth
>= reader
->max_depth
&& reader
->max_depth
) ? SC_E_MAXELEMENTDEPTH
: S_OK
;
480 static void reader_dec_depth(xmlreader
*reader
)
486 static HRESULT
reader_push_ns(xmlreader
*reader
, const strval
*prefix
, const strval
*uri
, BOOL def
)
491 ns
= reader_alloc(reader
, sizeof(*ns
));
492 if (!ns
) return E_OUTOFMEMORY
;
495 memset(&ns
->prefix
, 0, sizeof(ns
->prefix
));
497 hr
= reader_strvaldup(reader
, prefix
, &ns
->prefix
);
499 reader_free(reader
, ns
);
504 hr
= reader_strvaldup(reader
, uri
, &ns
->uri
);
506 reader_free_strvalued(reader
, &ns
->prefix
);
507 reader_free(reader
, ns
);
512 list_add_head(def
? &reader
->nsdef
: &reader
->ns
, &ns
->entry
);
516 static void reader_free_element(xmlreader
*reader
, struct element
*element
)
518 reader_free_strvalued(reader
, &element
->prefix
);
519 reader_free_strvalued(reader
, &element
->localname
);
520 reader_free_strvalued(reader
, &element
->qname
);
521 reader_free(reader
, element
);
524 static void reader_mark_ns_nodes(xmlreader
*reader
, struct element
*element
)
528 LIST_FOR_EACH_ENTRY(ns
, &reader
->ns
, struct ns
, entry
) {
531 ns
->element
= element
;
534 LIST_FOR_EACH_ENTRY(ns
, &reader
->nsdef
, struct ns
, entry
) {
537 ns
->element
= element
;
541 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*prefix
, strval
*localname
,
544 struct element
*element
;
547 element
= reader_alloc_zero(reader
, sizeof(*element
));
549 return E_OUTOFMEMORY
;
551 if ((hr
= reader_strvaldup(reader
, prefix
, &element
->prefix
)) == S_OK
&&
552 (hr
= reader_strvaldup(reader
, localname
, &element
->localname
)) == S_OK
&&
553 (hr
= reader_strvaldup(reader
, qname
, &element
->qname
)) == S_OK
)
555 list_add_head(&reader
->elements
, &element
->entry
);
556 reader_mark_ns_nodes(reader
, element
);
557 reader
->is_empty_element
= FALSE
;
560 reader_free_element(reader
, element
);
565 static void reader_pop_ns_nodes(xmlreader
*reader
, struct element
*element
)
569 LIST_FOR_EACH_ENTRY_SAFE_REV(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
570 if (ns
->element
!= element
)
573 list_remove(&ns
->entry
);
574 reader_free_strvalued(reader
, &ns
->prefix
);
575 reader_free_strvalued(reader
, &ns
->uri
);
576 reader_free(reader
, ns
);
579 if (!list_empty(&reader
->nsdef
)) {
580 ns
= LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
581 if (ns
->element
== element
) {
582 list_remove(&ns
->entry
);
583 reader_free_strvalued(reader
, &ns
->prefix
);
584 reader_free_strvalued(reader
, &ns
->uri
);
585 reader_free(reader
, ns
);
590 static void reader_pop_element(xmlreader
*reader
)
592 struct element
*element
;
594 if (list_empty(&reader
->elements
))
597 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
598 list_remove(&element
->entry
);
600 reader_pop_ns_nodes(reader
, element
);
601 reader_free_element(reader
, element
);
603 /* It was a root element, the rest is expected as Misc */
604 if (list_empty(&reader
->elements
))
605 reader
->instate
= XmlReadInState_MiscEnd
;
608 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
609 means node value is to be determined. */
610 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
612 strval
*v
= &reader
->strvalues
[type
];
614 reader_free_strvalue(reader
, type
);
623 if (value
->str
== strval_empty
.str
)
627 if (type
== StringValue_Value
)
629 /* defer allocation for value string */
631 v
->start
= value
->start
;
636 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
637 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
638 v
->str
[value
->len
] = 0;
644 static inline int is_reader_pending(xmlreader
*reader
)
646 return reader
->input
->pending
;
649 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
651 const int initial_len
= 0x2000;
652 buffer
->data
= readerinput_alloc(input
, initial_len
);
653 if (!buffer
->data
) return E_OUTOFMEMORY
;
655 memset(buffer
->data
, 0, 4);
657 buffer
->allocated
= initial_len
;
663 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
665 readerinput_free(input
, buffer
->data
);
668 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
670 if (encoding
== XmlEncoding_Unknown
)
672 FIXME("unsupported encoding %d\n", encoding
);
676 *cp
= xml_encoding_map
[encoding
].cp
;
681 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
685 if (!name
) return XmlEncoding_Unknown
;
688 max
= sizeof(xml_encoding_map
)/sizeof(struct xml_encoding_data
) - 1;
695 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
697 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
699 return xml_encoding_map
[n
].enc
;
707 return XmlEncoding_Unknown
;
710 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
712 input_buffer
*buffer
;
715 input
->buffer
= NULL
;
717 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
718 if (!buffer
) return E_OUTOFMEMORY
;
720 buffer
->input
= input
;
721 buffer
->code_page
= ~0; /* code page is unknown at this point */
722 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
724 readerinput_free(input
, buffer
);
728 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
730 free_encoded_buffer(input
, &buffer
->utf16
);
731 readerinput_free(input
, buffer
);
735 input
->buffer
= buffer
;
739 static void free_input_buffer(input_buffer
*buffer
)
741 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
742 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
743 readerinput_free(buffer
->input
, buffer
);
746 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
748 if (readerinput
->stream
) {
749 ISequentialStream_Release(readerinput
->stream
);
750 readerinput
->stream
= NULL
;
754 /* Queries already stored interface for IStream/ISequentialStream.
755 Interface supplied on creation will be overwritten */
756 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
760 readerinput_release_stream(readerinput
);
761 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
763 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
768 /* reads a chunk to raw buffer */
769 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
771 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
772 /* to make sure aligned length won't exceed allocated length */
773 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
777 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
778 variable width encodings like UTF-8 */
779 len
= (len
+ 3) & ~3;
780 /* try to use allocated space or grow */
781 if (buffer
->allocated
- buffer
->written
< len
)
783 buffer
->allocated
*= 2;
784 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
785 len
= buffer
->allocated
- buffer
->written
;
789 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
790 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
791 readerinput
->pending
= hr
== E_PENDING
;
792 if (FAILED(hr
)) return hr
;
793 buffer
->written
+= read
;
798 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
799 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
801 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
803 length
*= sizeof(WCHAR
);
804 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
805 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
807 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
808 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
809 buffer
->allocated
= grown_size
;
813 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
815 static const char startA
[] = {'<','?'};
816 static const char commentA
[] = {'<','!'};
817 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
818 unsigned char *ptr
= (unsigned char*)buffer
->data
;
820 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
821 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
822 /* test start byte */
825 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
826 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
827 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
828 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
832 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
834 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
835 static const char utf8bom
[] = {0xef,0xbb,0xbf};
836 static const char utf16lebom
[] = {0xff,0xfe};
839 *enc
= XmlEncoding_Unknown
;
841 if (buffer
->written
<= 3)
843 HRESULT hr
= readerinput_growraw(readerinput
);
844 if (FAILED(hr
)) return hr
;
845 if (buffer
->written
<= 3) return MX_E_INPUTEND
;
848 ptrW
= (WCHAR
*)buffer
->data
;
849 /* try start symbols if we have enough data to do that, input buffer should contain
850 first chunk already */
851 if (readerinput_is_utf8(readerinput
))
852 *enc
= XmlEncoding_UTF8
;
853 else if (*ptrW
== '<')
856 if (*ptrW
== '?' || *ptrW
== '!' || is_namestartchar(*ptrW
))
857 *enc
= XmlEncoding_UTF16
;
859 /* try with BOM now */
860 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
862 buffer
->cur
+= sizeof(utf8bom
);
863 *enc
= XmlEncoding_UTF8
;
865 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
867 buffer
->cur
+= sizeof(utf16lebom
);
868 *enc
= XmlEncoding_UTF16
;
874 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
876 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
877 int len
= buffer
->written
;
879 /* complete single byte char */
880 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
882 /* find start byte of multibyte char */
883 while (--len
&& !(buffer
->data
[len
] & 0xc0))
889 /* Returns byte length of complete char sequence for buffer code page,
890 it's relative to current buffer position which is currently used for BOM handling
892 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
894 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
897 if (readerinput
->buffer
->code_page
== CP_UTF8
)
898 len
= readerinput_get_utf8_convlen(readerinput
);
900 len
= buffer
->written
;
902 TRACE("%d\n", len
- buffer
->cur
);
903 return len
- buffer
->cur
;
906 /* It's possible that raw buffer has some leftovers from last conversion - some char
907 sequence that doesn't represent a full code point. Length argument should be calculated with
908 readerinput_get_convlen(), if it's -1 it will be calculated here. */
909 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
911 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
914 len
= readerinput_get_convlen(readerinput
);
916 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
917 /* everything below cur is lost too */
918 buffer
->written
-= len
+ buffer
->cur
;
919 /* after this point we don't need cur offset really,
920 it's used only to mark where actual data begins when first chunk is read */
924 /* note that raw buffer content is kept */
925 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
927 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
928 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
934 hr
= get_code_page(enc
, &cp
);
935 if (FAILED(hr
)) return;
937 readerinput
->buffer
->code_page
= cp
;
938 len
= readerinput_get_convlen(readerinput
);
940 TRACE("switching to cp %d\n", cp
);
942 /* just copy in this case */
943 if (enc
== XmlEncoding_UTF16
)
945 readerinput_grow(readerinput
, len
);
946 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
947 dest
->written
+= len
*sizeof(WCHAR
);
951 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
952 readerinput_grow(readerinput
, dest_len
);
953 ptr
= (WCHAR
*)dest
->data
;
954 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
956 dest
->written
+= dest_len
*sizeof(WCHAR
);
959 /* shrinks parsed data a buffer begins with */
960 static void reader_shrink(xmlreader
*reader
)
962 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
964 /* avoid to move too often using threshold shrink length */
965 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
967 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
968 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
970 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
974 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
975 It won't attempt to shrink but will grow destination buffer if needed */
976 static HRESULT
reader_more(xmlreader
*reader
)
978 xmlreaderinput
*readerinput
= reader
->input
;
979 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
980 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
981 UINT cp
= readerinput
->buffer
->code_page
;
986 /* get some raw data from stream first */
987 hr
= readerinput_growraw(readerinput
);
988 len
= readerinput_get_convlen(readerinput
);
990 /* just copy for UTF-16 case */
993 readerinput_grow(readerinput
, len
);
994 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
995 dest
->written
+= len
*sizeof(WCHAR
);
999 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1000 readerinput_grow(readerinput
, dest_len
);
1001 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
1002 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1004 dest
->written
+= dest_len
*sizeof(WCHAR
);
1005 /* get rid of processed data */
1006 readerinput_shrinkraw(readerinput
, len
);
1011 static inline UINT
reader_get_cur(xmlreader
*reader
)
1013 return reader
->input
->buffer
->utf16
.cur
;
1016 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
1018 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1019 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
1020 if (!*ptr
) reader_more(reader
);
1021 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
1024 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
1027 const WCHAR
*ptr
= reader_get_ptr(reader
);
1032 reader_more(reader
);
1033 ptr
= reader_get_ptr(reader
);
1035 if (str
[i
] != ptr
[i
])
1036 return ptr
[i
] - str
[i
];
1042 /* moves cursor n WCHARs forward */
1043 static void reader_skipn(xmlreader
*reader
, int n
)
1045 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1046 const WCHAR
*ptr
= reader_get_ptr(reader
);
1048 while (*ptr
++ && n
--)
1055 static inline BOOL
is_wchar_space(WCHAR ch
)
1057 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
1060 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1061 static int reader_skipspaces(xmlreader
*reader
)
1063 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1064 const WCHAR
*ptr
= reader_get_ptr(reader
);
1065 UINT start
= reader_get_cur(reader
);
1067 while (is_wchar_space(*ptr
))
1071 else if (*ptr
== '\n')
1080 ptr
= reader_get_ptr(reader
);
1083 return reader_get_cur(reader
) - start
;
1086 /* [26] VersionNum ::= '1.' [0-9]+ */
1087 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
1089 static const WCHAR onedotW
[] = {'1','.',0};
1093 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
1095 start
= reader_get_cur(reader
);
1097 reader_skipn(reader
, 2);
1099 ptr2
= ptr
= reader_get_ptr(reader
);
1100 while (*ptr
>= '0' && *ptr
<= '9')
1102 reader_skipn(reader
, 1);
1103 ptr
= reader_get_ptr(reader
);
1106 if (ptr2
== ptr
) return WC_E_DIGIT
;
1107 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
1108 TRACE("version=%s\n", debug_strval(reader
, val
));
1112 /* [25] Eq ::= S? '=' S? */
1113 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1115 static const WCHAR eqW
[] = {'=',0};
1116 reader_skipspaces(reader
);
1117 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
1119 reader_skipn(reader
, 1);
1120 reader_skipspaces(reader
);
1124 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1125 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1127 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
1131 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1133 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
1134 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1135 /* skip 'version' */
1136 reader_skipn(reader
, 7);
1138 hr
= reader_parse_eq(reader
);
1139 if (FAILED(hr
)) return hr
;
1141 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1144 reader_skipn(reader
, 1);
1146 hr
= reader_parse_versionnum(reader
, &val
);
1147 if (FAILED(hr
)) return hr
;
1149 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1153 reader_skipn(reader
, 1);
1155 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
);
1158 /* ([A-Za-z0-9._] | '-') */
1159 static inline BOOL
is_wchar_encname(WCHAR ch
)
1161 return ((ch
>= 'A' && ch
<= 'Z') ||
1162 (ch
>= 'a' && ch
<= 'z') ||
1163 (ch
>= '0' && ch
<= '9') ||
1164 (ch
== '.') || (ch
== '_') ||
1168 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1169 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1171 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1175 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1176 return WC_E_ENCNAME
;
1178 val
->start
= reader_get_cur(reader
);
1181 while (is_wchar_encname(*++ptr
))
1185 enc
= parse_encoding_name(start
, len
);
1186 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1190 if (enc
== XmlEncoding_Unknown
)
1191 return WC_E_ENCNAME
;
1193 /* skip encoding name */
1194 reader_skipn(reader
, len
);
1198 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1199 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1201 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1205 if (!reader_skipspaces(reader
)) return S_FALSE
;
1207 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1208 name
.str
= reader_get_ptr(reader
);
1209 name
.start
= reader_get_cur(reader
);
1211 /* skip 'encoding' */
1212 reader_skipn(reader
, 8);
1214 hr
= reader_parse_eq(reader
);
1215 if (FAILED(hr
)) return hr
;
1217 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1220 reader_skipn(reader
, 1);
1222 hr
= reader_parse_encname(reader
, &val
);
1223 if (FAILED(hr
)) return hr
;
1225 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1229 reader_skipn(reader
, 1);
1231 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
);
1234 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1235 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1237 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1238 static const WCHAR yesW
[] = {'y','e','s',0};
1239 static const WCHAR noW
[] = {'n','o',0};
1244 if (!reader_skipspaces(reader
)) return S_FALSE
;
1246 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1247 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1248 /* skip 'standalone' */
1249 reader_skipn(reader
, 10);
1251 hr
= reader_parse_eq(reader
);
1252 if (FAILED(hr
)) return hr
;
1254 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1257 reader_skipn(reader
, 1);
1259 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1260 return WC_E_XMLDECL
;
1262 start
= reader_get_cur(reader
);
1263 /* skip 'yes'|'no' */
1264 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1265 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1266 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1268 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1271 reader_skipn(reader
, 1);
1273 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
);
1276 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1277 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1279 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1280 static const WCHAR declcloseW
[] = {'?','>',0};
1283 /* check if we have "<?xml " */
1284 if (reader_cmp(reader
, xmldeclW
)) return S_FALSE
;
1286 reader_skipn(reader
, 5);
1287 hr
= reader_parse_versioninfo(reader
);
1291 hr
= reader_parse_encdecl(reader
);
1295 hr
= reader_parse_sddecl(reader
);
1299 reader_skipspaces(reader
);
1300 if (reader_cmp(reader
, declcloseW
)) return WC_E_XMLDECL
;
1301 reader_skipn(reader
, 2);
1303 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1304 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_xml
);
1305 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_xml
);
1306 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1311 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1312 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1317 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1319 start
= reader
->resume
[XmlReadResume_Body
];
1320 ptr
= reader_get_ptr(reader
);
1325 reader_skipn(reader
, 4);
1326 reader_shrink(reader
);
1327 ptr
= reader_get_ptr(reader
);
1328 start
= reader_get_cur(reader
);
1329 reader
->nodetype
= XmlNodeType_Comment
;
1330 reader
->resume
[XmlReadResume_Body
] = start
;
1331 reader
->resumestate
= XmlReadResumeState_Comment
;
1332 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
1333 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
1334 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1337 /* will exit when there's no more data, it won't attempt to
1338 read more from stream */
1349 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1350 TRACE("%s\n", debug_strval(reader
, &value
));
1352 /* skip rest of markup '->' */
1353 reader_skipn(reader
, 3);
1355 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1356 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1357 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1358 reader
->resume
[XmlReadResume_Body
] = 0;
1359 reader
->resumestate
= XmlReadResumeState_Initial
;
1363 return WC_E_COMMENT
;
1367 reader_skipn(reader
, 1);
1374 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1375 static inline BOOL
is_char(WCHAR ch
)
1377 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1378 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1379 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1380 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1381 (ch
>= 0xe000 && ch
<= 0xfffd);
1384 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1385 static inline BOOL
is_pubchar(WCHAR ch
)
1387 return (ch
== ' ') ||
1388 (ch
>= 'a' && ch
<= 'z') ||
1389 (ch
>= 'A' && ch
<= 'Z') ||
1390 (ch
>= '0' && ch
<= '9') ||
1391 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1392 (ch
== '=') || (ch
== '?') ||
1393 (ch
== '@') || (ch
== '!') ||
1394 (ch
>= '#' && ch
<= '%') || /* #$% */
1395 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1398 static inline BOOL
is_namestartchar(WCHAR ch
)
1400 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1401 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1402 (ch
>= 0xc0 && ch
<= 0xd6) ||
1403 (ch
>= 0xd8 && ch
<= 0xf6) ||
1404 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1405 (ch
>= 0x370 && ch
<= 0x37d) ||
1406 (ch
>= 0x37f && ch
<= 0x1fff) ||
1407 (ch
>= 0x200c && ch
<= 0x200d) ||
1408 (ch
>= 0x2070 && ch
<= 0x218f) ||
1409 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1410 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1411 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1412 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1413 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1414 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1417 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1418 static inline BOOL
is_ncnamechar(WCHAR ch
)
1420 return (ch
>= 'A' && ch
<= 'Z') ||
1421 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1422 (ch
== '-') || (ch
== '.') ||
1423 (ch
>= '0' && ch
<= '9') ||
1425 (ch
>= 0xc0 && ch
<= 0xd6) ||
1426 (ch
>= 0xd8 && ch
<= 0xf6) ||
1427 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1428 (ch
>= 0x300 && ch
<= 0x36f) ||
1429 (ch
>= 0x370 && ch
<= 0x37d) ||
1430 (ch
>= 0x37f && ch
<= 0x1fff) ||
1431 (ch
>= 0x200c && ch
<= 0x200d) ||
1432 (ch
>= 0x203f && ch
<= 0x2040) ||
1433 (ch
>= 0x2070 && ch
<= 0x218f) ||
1434 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1435 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1436 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1437 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1438 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1439 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1442 static inline BOOL
is_namechar(WCHAR ch
)
1444 return (ch
== ':') || is_ncnamechar(ch
);
1447 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1449 /* When we're on attribute always return attribute type, container node type is kept.
1450 Note that container is not necessarily an element, and attribute doesn't mean it's
1451 an attribute in XML spec terms. */
1452 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1455 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1456 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1457 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1458 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1459 [5] Name ::= NameStartChar (NameChar)* */
1460 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1465 if (reader
->resume
[XmlReadResume_Name
])
1467 start
= reader
->resume
[XmlReadResume_Name
];
1468 ptr
= reader_get_ptr(reader
);
1472 ptr
= reader_get_ptr(reader
);
1473 start
= reader_get_cur(reader
);
1474 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1477 while (is_namechar(*ptr
))
1479 reader_skipn(reader
, 1);
1480 ptr
= reader_get_ptr(reader
);
1483 if (is_reader_pending(reader
))
1485 reader
->resume
[XmlReadResume_Name
] = start
;
1489 reader
->resume
[XmlReadResume_Name
] = 0;
1491 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1492 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1497 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1498 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1500 static const WCHAR xmlW
[] = {'x','m','l'};
1501 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1507 hr
= reader_parse_name(reader
, &name
);
1508 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1510 /* now that we got name check for illegal content */
1511 if (strval_eq(reader
, &name
, &xmlval
))
1512 return WC_E_LEADINGXML
;
1514 /* PITarget can't be a qualified name */
1515 ptr
= reader_get_strptr(reader
, &name
);
1516 for (i
= 0; i
< name
.len
; i
++)
1518 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1520 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1525 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1526 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1533 switch (reader
->resumestate
)
1535 case XmlReadResumeState_Initial
:
1537 reader_skipn(reader
, 2);
1538 reader_shrink(reader
);
1539 reader
->resumestate
= XmlReadResumeState_PITarget
;
1540 case XmlReadResumeState_PITarget
:
1541 hr
= reader_parse_pitarget(reader
, &target
);
1542 if (FAILED(hr
)) return hr
;
1543 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1544 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1545 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1546 reader
->resumestate
= XmlReadResumeState_PIBody
;
1547 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1552 start
= reader
->resume
[XmlReadResume_Body
];
1553 ptr
= reader_get_ptr(reader
);
1560 UINT cur
= reader_get_cur(reader
);
1563 /* strip all leading whitespace chars */
1566 ptr
= reader_get_ptr2(reader
, start
);
1567 if (!is_wchar_space(*ptr
)) break;
1571 reader_init_strvalue(start
, cur
-start
, &value
);
1574 reader_skipn(reader
, 2);
1575 TRACE("%s\n", debug_strval(reader
, &value
));
1576 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1577 reader
->resumestate
= XmlReadResumeState_Initial
;
1578 reader
->resume
[XmlReadResume_Body
] = 0;
1579 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1584 reader_skipn(reader
, 1);
1585 ptr
= reader_get_ptr(reader
);
1591 /* This one is used to parse significant whitespace nodes, like in Misc production */
1592 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1594 switch (reader
->resumestate
)
1596 case XmlReadResumeState_Initial
:
1597 reader_shrink(reader
);
1598 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1599 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1600 reader
->nodetype
= XmlNodeType_Whitespace
;
1601 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1602 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1603 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1605 case XmlReadResumeState_Whitespace
:
1610 reader_skipspaces(reader
);
1611 if (is_reader_pending(reader
)) return S_OK
;
1613 start
= reader
->resume
[XmlReadResume_Body
];
1614 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1615 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1616 TRACE("%s\n", debug_strval(reader
, &value
));
1617 reader
->resumestate
= XmlReadResumeState_Initial
;
1626 /* [27] Misc ::= Comment | PI | S */
1627 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1629 HRESULT hr
= S_FALSE
;
1631 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1633 hr
= reader_more(reader
);
1634 if (FAILED(hr
)) return hr
;
1636 /* finish current node */
1637 switch (reader
->resumestate
)
1639 case XmlReadResumeState_PITarget
:
1640 case XmlReadResumeState_PIBody
:
1641 return reader_parse_pi(reader
);
1642 case XmlReadResumeState_Comment
:
1643 return reader_parse_comment(reader
);
1644 case XmlReadResumeState_Whitespace
:
1645 return reader_parse_whitespace(reader
);
1647 ERR("unknown resume state %d\n", reader
->resumestate
);
1653 const WCHAR
*cur
= reader_get_ptr(reader
);
1655 if (is_wchar_space(*cur
))
1656 hr
= reader_parse_whitespace(reader
);
1657 else if (!reader_cmp(reader
, commentW
))
1658 hr
= reader_parse_comment(reader
);
1659 else if (!reader_cmp(reader
, piW
))
1660 hr
= reader_parse_pi(reader
);
1664 if (hr
!= S_FALSE
) return hr
;
1670 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1671 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1673 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1676 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1679 reader_skipn(reader
, 1);
1681 cur
= reader_get_ptr(reader
);
1682 start
= reader_get_cur(reader
);
1683 while (is_char(*cur
) && *cur
!= quote
)
1685 reader_skipn(reader
, 1);
1686 cur
= reader_get_ptr(reader
);
1688 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1689 if (*cur
== quote
) reader_skipn(reader
, 1);
1691 TRACE("%s\n", debug_strval(reader
, literal
));
1695 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1696 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1697 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1699 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1702 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1705 reader_skipn(reader
, 1);
1707 start
= reader_get_cur(reader
);
1708 cur
= reader_get_ptr(reader
);
1709 while (is_pubchar(*cur
) && *cur
!= quote
)
1711 reader_skipn(reader
, 1);
1712 cur
= reader_get_ptr(reader
);
1714 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1715 if (*cur
== quote
) reader_skipn(reader
, 1);
1717 TRACE("%s\n", debug_strval(reader
, literal
));
1721 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1722 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1724 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1725 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1730 if (!reader_cmp(reader
, publicW
)) {
1734 reader_skipn(reader
, 6);
1735 cnt
= reader_skipspaces(reader
);
1736 if (!cnt
) return WC_E_WHITESPACE
;
1738 hr
= reader_parse_pub_literal(reader
, &pub
);
1739 if (FAILED(hr
)) return hr
;
1741 reader_init_cstrvalue(publicW
, strlenW(publicW
), &name
);
1742 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &pub
);
1743 if (FAILED(hr
)) return hr
;
1745 cnt
= reader_skipspaces(reader
);
1746 if (!cnt
) return S_OK
;
1748 /* optional system id */
1749 hr
= reader_parse_sys_literal(reader
, &sys
);
1750 if (FAILED(hr
)) return S_OK
;
1752 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1753 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
);
1754 if (FAILED(hr
)) return hr
;
1757 } else if (!reader_cmp(reader
, systemW
)) {
1759 reader_skipn(reader
, 6);
1760 cnt
= reader_skipspaces(reader
);
1761 if (!cnt
) return WC_E_WHITESPACE
;
1763 hr
= reader_parse_sys_literal(reader
, &sys
);
1764 if (FAILED(hr
)) return hr
;
1766 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1767 return reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
);
1773 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1774 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1776 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1781 /* check if we have "<!DOCTYPE" */
1782 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1783 reader_shrink(reader
);
1785 /* DTD processing is not allowed by default */
1786 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1788 reader_skipn(reader
, 9);
1789 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1792 hr
= reader_parse_name(reader
, &name
);
1793 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1795 reader_skipspaces(reader
);
1797 hr
= reader_parse_externalid(reader
);
1798 if (FAILED(hr
)) return hr
;
1800 reader_skipspaces(reader
);
1802 cur
= reader_get_ptr(reader
);
1805 FIXME("internal subset parsing not implemented\n");
1810 reader_skipn(reader
, 1);
1812 reader
->nodetype
= XmlNodeType_DocumentType
;
1813 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1814 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1819 /* [11 NS] LocalPart ::= NCName */
1820 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
)
1825 if (reader
->resume
[XmlReadResume_Local
])
1827 start
= reader
->resume
[XmlReadResume_Local
];
1828 ptr
= reader_get_ptr(reader
);
1832 ptr
= reader_get_ptr(reader
);
1833 start
= reader_get_cur(reader
);
1836 while (is_ncnamechar(*ptr
))
1838 reader_skipn(reader
, 1);
1839 ptr
= reader_get_ptr(reader
);
1842 if (is_reader_pending(reader
))
1844 reader
->resume
[XmlReadResume_Local
] = start
;
1848 reader
->resume
[XmlReadResume_Local
] = 0;
1850 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1855 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1856 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1857 [9 NS] UnprefixedName ::= LocalPart
1858 [10 NS] Prefix ::= NCName */
1859 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1865 if (reader
->resume
[XmlReadResume_Name
])
1867 start
= reader
->resume
[XmlReadResume_Name
];
1868 ptr
= reader_get_ptr(reader
);
1872 ptr
= reader_get_ptr(reader
);
1873 start
= reader_get_cur(reader
);
1874 reader
->resume
[XmlReadResume_Name
] = start
;
1875 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1878 if (reader
->resume
[XmlReadResume_Local
])
1880 hr
= reader_parse_local(reader
, local
);
1881 if (FAILED(hr
)) return hr
;
1883 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1884 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1889 /* skip prefix part */
1890 while (is_ncnamechar(*ptr
))
1892 reader_skipn(reader
, 1);
1893 ptr
= reader_get_ptr(reader
);
1896 if (is_reader_pending(reader
)) return E_PENDING
;
1898 /* got a qualified name */
1901 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
1904 reader_skipn(reader
, 1);
1905 hr
= reader_parse_local(reader
, local
);
1906 if (FAILED(hr
)) return hr
;
1910 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
1911 reader_init_strvalue(0, 0, prefix
);
1916 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
1918 TRACE("ncname %s\n", debug_strval(reader
, local
));
1920 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
1922 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
1925 reader
->resume
[XmlReadResume_Name
] = 0;
1926 reader
->resume
[XmlReadResume_Local
] = 0;
1931 /* Applies normalization rules to a single char, used for attribute values.
1933 Rules include 2 steps:
1935 1) replacing \r\n with a single \n;
1936 2) replacing all whitespace chars with ' '.
1939 static void reader_normalize_space(xmlreader
*reader
, WCHAR
*ptr
)
1941 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1943 if (!is_wchar_space(*ptr
)) return;
1945 if (*ptr
== '\r' && *(ptr
+1) == '\n')
1947 int len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - 2*sizeof(WCHAR
);
1948 memmove(ptr
+1, ptr
+2, len
);
1953 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
1955 static const WCHAR entltW
[] = {'l','t'};
1956 static const WCHAR entgtW
[] = {'g','t'};
1957 static const WCHAR entampW
[] = {'a','m','p'};
1958 static const WCHAR entaposW
[] = {'a','p','o','s'};
1959 static const WCHAR entquotW
[] = {'q','u','o','t'};
1960 static const strval lt
= { (WCHAR
*)entltW
, 2 };
1961 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
1962 static const strval amp
= { (WCHAR
*)entampW
, 3 };
1963 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
1964 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
1965 WCHAR
*str
= reader_get_strptr(reader
, name
);
1970 if (strval_eq(reader
, name
, <
)) return '<';
1973 if (strval_eq(reader
, name
, >
)) return '>';
1976 if (strval_eq(reader
, name
, &
))
1978 else if (strval_eq(reader
, name
, &apos
))
1982 if (strval_eq(reader
, name
, "
)) return '\"';
1991 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1992 [67] Reference ::= EntityRef | CharRef
1993 [68] EntityRef ::= '&' Name ';' */
1994 static HRESULT
reader_parse_reference(xmlreader
*reader
)
1996 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1997 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1998 UINT cur
= reader_get_cur(reader
);
2003 reader_skipn(reader
, 1);
2004 ptr
= reader_get_ptr(reader
);
2008 reader_skipn(reader
, 1);
2009 ptr
= reader_get_ptr(reader
);
2011 /* hex char or decimal */
2014 reader_skipn(reader
, 1);
2015 ptr
= reader_get_ptr(reader
);
2019 if ((*ptr
>= '0' && *ptr
<= '9'))
2020 ch
= ch
*16 + *ptr
- '0';
2021 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
2022 ch
= ch
*16 + *ptr
- 'a' + 10;
2023 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
2024 ch
= ch
*16 + *ptr
- 'A' + 10;
2026 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
2027 reader_skipn(reader
, 1);
2028 ptr
= reader_get_ptr(reader
);
2035 if ((*ptr
>= '0' && *ptr
<= '9'))
2037 ch
= ch
*10 + *ptr
- '0';
2038 reader_skipn(reader
, 1);
2039 ptr
= reader_get_ptr(reader
);
2042 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
2046 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
2049 if (is_wchar_space(ch
)) ch
= ' ';
2051 ptr
= reader_get_ptr(reader
);
2052 start
= reader_get_ptr2(reader
, cur
);
2053 len
= buffer
->written
- ((char *)ptr
- buffer
->data
);
2054 memmove(start
+ 1, ptr
+ 1, len
);
2056 buffer
->written
-= (reader_get_cur(reader
) - cur
) * sizeof(WCHAR
);
2057 buffer
->cur
= cur
+ 1;
2066 hr
= reader_parse_name(reader
, &name
);
2067 if (FAILED(hr
)) return hr
;
2069 ptr
= reader_get_ptr(reader
);
2070 if (*ptr
!= ';') return WC_E_SEMICOLON
;
2072 /* predefined entities resolve to a single character */
2073 ch
= get_predefined_entity(reader
, &name
);
2076 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
2077 memmove(start
+1, ptr
+1, len
);
2078 buffer
->cur
= cur
+ 1;
2084 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
2085 return WC_E_UNDECLAREDENTITY
;
2093 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2094 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
2099 ptr
= reader_get_ptr(reader
);
2101 /* skip opening quote */
2103 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
2104 reader_skipn(reader
, 1);
2106 ptr
= reader_get_ptr(reader
);
2107 start
= reader_get_cur(reader
);
2110 if (*ptr
== '<') return WC_E_LESSTHAN
;
2114 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
2115 /* skip closing quote */
2116 reader_skipn(reader
, 1);
2122 HRESULT hr
= reader_parse_reference(reader
);
2123 if (FAILED(hr
)) return hr
;
2127 reader_normalize_space(reader
, ptr
);
2128 reader_skipn(reader
, 1);
2130 ptr
= reader_get_ptr(reader
);
2136 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2137 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2138 [3 NS] DefaultAttName ::= 'xmlns'
2139 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2140 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2142 strval prefix
, local
, qname
, value
;
2143 BOOL ns
= FALSE
, nsdef
= FALSE
;
2146 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2147 if (FAILED(hr
)) return hr
;
2149 if (strval_eq(reader
, &prefix
, &strval_xmlns
))
2152 if (strval_eq(reader
, &qname
, &strval_xmlns
))
2155 hr
= reader_parse_eq(reader
);
2156 if (FAILED(hr
)) return hr
;
2158 hr
= reader_parse_attvalue(reader
, &value
);
2159 if (FAILED(hr
)) return hr
;
2162 reader_push_ns(reader
, nsdef
? &strval_xmlns
: &local
, &value
, nsdef
);
2164 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2165 return reader_add_attr(reader
, &prefix
, &local
, &qname
, &value
);
2168 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2169 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2170 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
, int *empty
)
2174 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2175 if (FAILED(hr
)) return hr
;
2179 static const WCHAR endW
[] = {'/','>',0};
2181 reader_skipspaces(reader
);
2184 if ((*empty
= !reader_cmp(reader
, endW
)))
2187 reader_skipn(reader
, 2);
2188 reader
->is_empty_element
= TRUE
;
2189 reader
->empty_element
.prefix
= *prefix
;
2190 reader
->empty_element
.localname
= *local
;
2191 reader
->empty_element
.qname
= *qname
;
2192 reader_mark_ns_nodes(reader
, &reader
->empty_element
);
2196 /* got a start tag */
2197 if (!reader_cmp(reader
, gtW
))
2200 reader_skipn(reader
, 1);
2201 return reader_push_element(reader
, prefix
, local
, qname
);
2204 hr
= reader_parse_attribute(reader
);
2205 if (FAILED(hr
)) return hr
;
2211 /* [39] element ::= EmptyElemTag | STag content ETag */
2212 static HRESULT
reader_parse_element(xmlreader
*reader
)
2216 switch (reader
->resumestate
)
2218 case XmlReadResumeState_Initial
:
2219 /* check if we are really on element */
2220 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2223 reader_skipn(reader
, 1);
2225 reader_shrink(reader
);
2226 reader
->resumestate
= XmlReadResumeState_STag
;
2227 case XmlReadResumeState_STag
:
2229 strval qname
, prefix
, local
;
2232 /* this handles empty elements too */
2233 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
, &empty
);
2234 if (FAILED(hr
)) return hr
;
2236 /* FIXME: need to check for defined namespace to reject invalid prefix */
2238 /* if we got empty element and stack is empty go straight to Misc */
2239 if (empty
&& list_empty(&reader
->elements
))
2240 reader
->instate
= XmlReadInState_MiscEnd
;
2242 reader
->instate
= XmlReadInState_Content
;
2244 reader
->nodetype
= XmlNodeType_Element
;
2245 reader
->resumestate
= XmlReadResumeState_Initial
;
2246 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2247 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2248 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2249 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
2259 /* [13 NS] ETag ::= '</' QName S? '>' */
2260 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2262 strval prefix
, local
, qname
;
2263 struct element
*elem
;
2267 reader_skipn(reader
, 2);
2269 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2270 if (FAILED(hr
)) return hr
;
2272 reader_skipspaces(reader
);
2274 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2277 reader_skipn(reader
, 1);
2279 /* Element stack should never be empty at this point, cause we shouldn't get to
2280 content parsing if it's empty. */
2281 elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2282 if (!strval_eq(reader
, &elem
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2284 reader
->nodetype
= XmlNodeType_EndElement
;
2285 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2286 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2287 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2292 /* [18] CDSect ::= CDStart CData CDEnd
2293 [19] CDStart ::= '<![CDATA['
2294 [20] CData ::= (Char* - (Char* ']]>' Char*))
2295 [21] CDEnd ::= ']]>' */
2296 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2301 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2303 start
= reader
->resume
[XmlReadResume_Body
];
2304 ptr
= reader_get_ptr(reader
);
2308 /* skip markup '<![CDATA[' */
2309 reader_skipn(reader
, 9);
2310 reader_shrink(reader
);
2311 ptr
= reader_get_ptr(reader
);
2312 start
= reader_get_cur(reader
);
2313 reader
->nodetype
= XmlNodeType_CDATA
;
2314 reader
->resume
[XmlReadResume_Body
] = start
;
2315 reader
->resumestate
= XmlReadResumeState_CDATA
;
2316 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
2317 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
2318 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2323 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2327 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2330 reader_skipn(reader
, 3);
2331 TRACE("%s\n", debug_strval(reader
, &value
));
2333 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2334 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2335 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2336 reader
->resume
[XmlReadResume_Body
] = 0;
2337 reader
->resumestate
= XmlReadResumeState_Initial
;
2342 /* Value normalization is not fully implemented, rules are:
2344 - single '\r' -> '\n';
2345 - sequence '\r\n' -> '\n', in this case value length changes;
2347 if (*ptr
== '\r') *ptr
= '\n';
2348 reader_skipn(reader
, 1);
2356 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2357 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2362 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2364 start
= reader
->resume
[XmlReadResume_Body
];
2365 ptr
= reader_get_ptr(reader
);
2369 reader_shrink(reader
);
2370 ptr
= reader_get_ptr(reader
);
2371 start
= reader_get_cur(reader
);
2372 /* There's no text */
2373 if (!*ptr
|| *ptr
== '<') return S_OK
;
2374 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2375 reader
->resume
[XmlReadResume_Body
] = start
;
2376 reader
->resumestate
= XmlReadResumeState_CharData
;
2377 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2378 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2379 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2384 static const WCHAR ampW
[] = {'&',0};
2386 /* CDATA closing sequence ']]>' is not allowed */
2387 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2388 return WC_E_CDSECTEND
;
2390 /* Found next markup part */
2395 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2396 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2397 reader
->resume
[XmlReadResume_Body
] = 0;
2398 reader
->resumestate
= XmlReadResumeState_Initial
;
2402 /* this covers a case when text has leading whitespace chars */
2403 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2405 if (!reader_cmp(reader
, ampW
))
2406 reader_parse_reference(reader
);
2408 reader_skipn(reader
, 1);
2410 ptr
= reader_get_ptr(reader
);
2416 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2417 static HRESULT
reader_parse_content(xmlreader
*reader
)
2419 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2420 static const WCHAR etagW
[] = {'<','/',0};
2422 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2424 switch (reader
->resumestate
)
2426 case XmlReadResumeState_CDATA
:
2427 return reader_parse_cdata(reader
);
2428 case XmlReadResumeState_Comment
:
2429 return reader_parse_comment(reader
);
2430 case XmlReadResumeState_PIBody
:
2431 case XmlReadResumeState_PITarget
:
2432 return reader_parse_pi(reader
);
2433 case XmlReadResumeState_CharData
:
2434 return reader_parse_chardata(reader
);
2436 ERR("unknown resume state %d\n", reader
->resumestate
);
2440 reader_shrink(reader
);
2442 /* handle end tag here, it indicates end of content as well */
2443 if (!reader_cmp(reader
, etagW
))
2444 return reader_parse_endtag(reader
);
2446 if (!reader_cmp(reader
, commentW
))
2447 return reader_parse_comment(reader
);
2449 if (!reader_cmp(reader
, piW
))
2450 return reader_parse_pi(reader
);
2452 if (!reader_cmp(reader
, cdstartW
))
2453 return reader_parse_cdata(reader
);
2455 if (!reader_cmp(reader
, ltW
))
2456 return reader_parse_element(reader
);
2458 /* what's left must be CharData */
2459 return reader_parse_chardata(reader
);
2462 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2464 XmlNodeType nodetype
= reader_get_nodetype(reader
);
2467 if (!is_reader_pending(reader
))
2468 reader_clear_attrs(reader
);
2470 /* When moving from EndElement or empty element, pop its own namespace definitions */
2473 case XmlNodeType_Attribute
:
2474 reader_dec_depth(reader
);
2476 case XmlNodeType_Element
:
2477 if (reader
->is_empty_element
)
2478 reader_pop_ns_nodes(reader
, &reader
->empty_element
);
2479 else if (FAILED(hr
= reader_inc_depth(reader
)))
2482 case XmlNodeType_EndElement
:
2483 reader_pop_element(reader
);
2484 reader_dec_depth(reader
);
2492 switch (reader
->instate
)
2494 /* if it's a first call for a new input we need to detect stream encoding */
2495 case XmlReadInState_Initial
:
2499 hr
= readerinput_growraw(reader
->input
);
2500 if (FAILED(hr
)) return hr
;
2502 /* try to detect encoding by BOM or data and set input code page */
2503 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2504 TRACE("detected encoding %s, 0x%08x\n", enc
== XmlEncoding_Unknown
? "(unknown)" :
2505 debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2506 if (FAILED(hr
)) return hr
;
2508 /* always switch first time cause we have to put something in */
2509 readerinput_switchencoding(reader
->input
, enc
);
2511 /* parse xml declaration */
2512 hr
= reader_parse_xmldecl(reader
);
2513 if (FAILED(hr
)) return hr
;
2515 readerinput_shrinkraw(reader
->input
, -1);
2516 reader
->instate
= XmlReadInState_Misc_DTD
;
2517 if (hr
== S_OK
) return hr
;
2520 case XmlReadInState_Misc_DTD
:
2521 hr
= reader_parse_misc(reader
);
2522 if (FAILED(hr
)) return hr
;
2525 reader
->instate
= XmlReadInState_DTD
;
2529 case XmlReadInState_DTD
:
2530 hr
= reader_parse_dtd(reader
);
2531 if (FAILED(hr
)) return hr
;
2535 reader
->instate
= XmlReadInState_DTD_Misc
;
2539 reader
->instate
= XmlReadInState_Element
;
2541 case XmlReadInState_DTD_Misc
:
2542 hr
= reader_parse_misc(reader
);
2543 if (FAILED(hr
)) return hr
;
2546 reader
->instate
= XmlReadInState_Element
;
2550 case XmlReadInState_Element
:
2551 return reader_parse_element(reader
);
2552 case XmlReadInState_Content
:
2553 return reader_parse_content(reader
);
2554 case XmlReadInState_MiscEnd
:
2555 hr
= reader_parse_misc(reader
);
2556 if (FAILED(hr
)) return hr
;
2560 reader
->instate
= XmlReadInState_Eof
;
2561 reader
->state
= XmlReadState_EndOfFile
;
2562 reader
->nodetype
= XmlNodeType_None
;
2565 case XmlReadInState_Eof
:
2568 FIXME("internal state %d not handled\n", reader
->instate
);
2576 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2578 xmlreader
*This
= impl_from_IXmlReader(iface
);
2580 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2582 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2583 IsEqualGUID(riid
, &IID_IXmlReader
))
2589 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2591 return E_NOINTERFACE
;
2594 IXmlReader_AddRef(iface
);
2599 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2601 xmlreader
*This
= impl_from_IXmlReader(iface
);
2602 ULONG ref
= InterlockedIncrement(&This
->ref
);
2603 TRACE("(%p)->(%d)\n", This
, ref
);
2607 static void reader_clear_ns(xmlreader
*reader
)
2609 struct ns
*ns
, *ns2
;
2611 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
2612 reader_free_strvalued(reader
, &ns
->prefix
);
2613 reader_free_strvalued(reader
, &ns
->uri
);
2614 reader_free(reader
, ns
);
2617 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->nsdef
, struct ns
, entry
) {
2618 reader_free_strvalued(reader
, &ns
->uri
);
2619 reader_free(reader
, ns
);
2623 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2625 xmlreader
*This
= impl_from_IXmlReader(iface
);
2626 LONG ref
= InterlockedDecrement(&This
->ref
);
2628 TRACE("(%p)->(%d)\n", This
, ref
);
2632 IMalloc
*imalloc
= This
->imalloc
;
2633 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2634 if (This
->resolver
) IXmlResolver_Release(This
->resolver
);
2635 if (This
->mlang
) IUnknown_Release(This
->mlang
);
2636 reader_clear_attrs(This
);
2637 reader_clear_ns(This
);
2638 reader_clear_elements(This
);
2639 reader_free_strvalues(This
);
2640 reader_free(This
, This
);
2641 if (imalloc
) IMalloc_Release(imalloc
);
2647 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2649 xmlreader
*This
= impl_from_IXmlReader(iface
);
2650 IXmlReaderInput
*readerinput
;
2653 TRACE("(%p)->(%p)\n", This
, input
);
2657 readerinput_release_stream(This
->input
);
2658 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2662 This
->line
= This
->pos
= 0;
2663 reader_clear_elements(This
);
2665 This
->nodetype
= XmlNodeType_None
;
2666 This
->resumestate
= XmlReadResumeState_Initial
;
2667 memset(This
->resume
, 0, sizeof(This
->resume
));
2669 /* just reset current input */
2672 This
->state
= XmlReadState_Initial
;
2676 /* now try IXmlReaderInput, ISequentialStream, IStream */
2677 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2680 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2681 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2684 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2685 readerinput
, readerinput
->lpVtbl
);
2686 IUnknown_Release(readerinput
);
2692 if (hr
!= S_OK
|| !readerinput
)
2694 /* create IXmlReaderInput basing on supplied interface */
2695 hr
= CreateXmlReaderInputWithEncodingName(input
,
2696 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2697 if (hr
!= S_OK
) return hr
;
2698 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2701 /* set stream for supplied IXmlReaderInput */
2702 hr
= readerinput_query_for_stream(This
->input
);
2705 This
->state
= XmlReadState_Initial
;
2706 This
->instate
= XmlReadInState_Initial
;
2712 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2714 xmlreader
*This
= impl_from_IXmlReader(iface
);
2716 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2718 if (!value
) return E_INVALIDARG
;
2722 case XmlReaderProperty_MultiLanguage
:
2723 *value
= (LONG_PTR
)This
->mlang
;
2725 IUnknown_AddRef(This
->mlang
);
2727 case XmlReaderProperty_XmlResolver
:
2728 *value
= (LONG_PTR
)This
->resolver
;
2730 IXmlResolver_AddRef(This
->resolver
);
2732 case XmlReaderProperty_DtdProcessing
:
2733 *value
= This
->dtdmode
;
2735 case XmlReaderProperty_ReadState
:
2736 *value
= This
->state
;
2738 case XmlReaderProperty_MaxElementDepth
:
2739 *value
= This
->max_depth
;
2742 FIXME("Unimplemented property (%u)\n", property
);
2749 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2751 xmlreader
*This
= impl_from_IXmlReader(iface
);
2753 TRACE("(%p)->(%s 0x%lx)\n", This
, debugstr_reader_prop(property
), value
);
2757 case XmlReaderProperty_MultiLanguage
:
2759 IUnknown_Release(This
->mlang
);
2760 This
->mlang
= (IUnknown
*)value
;
2762 IUnknown_AddRef(This
->mlang
);
2764 FIXME("Ignoring MultiLanguage %p\n", This
->mlang
);
2766 case XmlReaderProperty_XmlResolver
:
2768 IXmlResolver_Release(This
->resolver
);
2769 This
->resolver
= (IXmlResolver
*)value
;
2771 IXmlResolver_AddRef(This
->resolver
);
2773 case XmlReaderProperty_DtdProcessing
:
2774 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2775 This
->dtdmode
= value
;
2777 case XmlReaderProperty_MaxElementDepth
:
2778 This
->max_depth
= value
;
2781 FIXME("Unimplemented property (%u)\n", property
);
2788 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2790 xmlreader
*This
= impl_from_IXmlReader(iface
);
2791 XmlNodeType oldtype
= This
->nodetype
;
2794 TRACE("(%p)->(%p)\n", This
, nodetype
);
2796 if (This
->state
== XmlReadState_Closed
) return S_FALSE
;
2798 hr
= reader_parse_nextnode(This
);
2799 if (oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2800 This
->state
= XmlReadState_Interactive
;
2802 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2804 *nodetype
= This
->nodetype
;
2809 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2811 xmlreader
*This
= impl_from_IXmlReader(iface
);
2813 TRACE("(%p)->(%p)\n", This
, node_type
);
2816 return E_INVALIDARG
;
2818 *node_type
= reader_get_nodetype(This
);
2819 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2822 static HRESULT
reader_move_to_first_attribute(xmlreader
*reader
)
2824 if (!reader
->attr_count
)
2828 reader_inc_depth(reader
);
2830 reader
->attr
= LIST_ENTRY(list_head(&reader
->attrs
), struct attribute
, entry
);
2831 reader_set_strvalue(reader
, StringValue_Prefix
, &reader
->attr
->prefix
);
2832 reader_set_strvalue(reader
, StringValue_LocalName
, &reader
->attr
->localname
);
2833 reader_set_strvalue(reader
, StringValue_QualifiedName
, &reader
->attr
->qname
);
2834 reader_set_strvalue(reader
, StringValue_Value
, &reader
->attr
->value
);
2839 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2841 xmlreader
*This
= impl_from_IXmlReader(iface
);
2843 TRACE("(%p)\n", This
);
2845 return reader_move_to_first_attribute(This
);
2848 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2850 xmlreader
*This
= impl_from_IXmlReader(iface
);
2851 const struct list
*next
;
2853 TRACE("(%p)\n", This
);
2855 if (!This
->attr_count
) return S_FALSE
;
2858 return reader_move_to_first_attribute(This
);
2860 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2863 This
->attr
= LIST_ENTRY(next
, struct attribute
, entry
);
2864 reader_set_strvalue(This
, StringValue_Prefix
, &This
->attr
->prefix
);
2865 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2866 reader_set_strvalue(This
, StringValue_QualifiedName
, &This
->attr
->qname
);
2867 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2870 return next
? S_OK
: S_FALSE
;
2873 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
2875 LPCWSTR namespaceUri
)
2877 FIXME("(%p %p %p): stub\n", iface
, local_name
, namespaceUri
);
2881 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
2883 xmlreader
*This
= impl_from_IXmlReader(iface
);
2885 TRACE("(%p)\n", This
);
2887 if (!This
->attr_count
) return S_FALSE
;
2890 reader_dec_depth(This
);
2894 /* FIXME: support other node types with 'attributes' like DTD */
2895 if (This
->is_empty_element
) {
2896 reader_set_strvalue(This
, StringValue_Prefix
, &This
->empty_element
.prefix
);
2897 reader_set_strvalue(This
, StringValue_LocalName
, &This
->empty_element
.localname
);
2898 reader_set_strvalue(This
, StringValue_QualifiedName
, &This
->empty_element
.qname
);
2901 struct element
*element
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
2903 reader_set_strvalue(This
, StringValue_Prefix
, &element
->prefix
);
2904 reader_set_strvalue(This
, StringValue_LocalName
, &element
->localname
);
2905 reader_set_strvalue(This
, StringValue_QualifiedName
, &element
->qname
);
2908 reader_set_strvalue(This
, StringValue_Value
, &strval_empty
);
2913 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2915 xmlreader
*This
= impl_from_IXmlReader(iface
);
2917 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2918 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
2919 if (len
) *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
2923 static struct ns
*reader_lookup_ns(xmlreader
*reader
, const strval
*prefix
)
2925 struct list
*nslist
= prefix
? &reader
->ns
: &reader
->nsdef
;
2928 LIST_FOR_EACH_ENTRY_REV(ns
, nslist
, struct ns
, entry
) {
2929 if (strval_eq(reader
, prefix
, &ns
->prefix
))
2936 static struct ns
*reader_lookup_nsdef(xmlreader
*reader
)
2938 if (list_empty(&reader
->nsdef
))
2941 return LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
2944 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
, const WCHAR
**uri
, UINT
*len
)
2946 xmlreader
*This
= impl_from_IXmlReader(iface
);
2947 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
2948 XmlNodeType nodetype
;
2952 TRACE("(%p %p %p)\n", iface
, uri
, len
);
2960 switch ((nodetype
= reader_get_nodetype(This
)))
2962 case XmlNodeType_Attribute
:
2964 static const WCHAR xmlns_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2965 '2','0','0','0','/','x','m','l','n','s','/',0};
2966 static const WCHAR xml_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2967 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
2968 const strval
*local
= &This
->strvalues
[StringValue_LocalName
];
2970 /* check for reserved prefixes first */
2971 if ((strval_eq(This
, prefix
, &strval_empty
) && strval_eq(This
, local
, &strval_xmlns
)) ||
2972 strval_eq(This
, prefix
, &strval_xmlns
))
2975 *len
= sizeof(xmlns_uriW
)/sizeof(xmlns_uriW
[0]) - 1;
2977 else if (strval_eq(This
, prefix
, &strval_xml
)) {
2979 *len
= sizeof(xml_uriW
)/sizeof(xml_uriW
[0]) - 1;
2983 ns
= reader_lookup_ns(This
, prefix
);
2995 case XmlNodeType_Element
:
2996 case XmlNodeType_EndElement
:
2998 ns
= reader_lookup_ns(This
, prefix
);
3000 /* pick top default ns if any */
3002 ns
= reader_lookup_nsdef(This
);
3015 FIXME("Unhandled node type %d\n", nodetype
);
3022 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3024 xmlreader
*This
= impl_from_IXmlReader(iface
);
3026 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3027 *name
= This
->strvalues
[StringValue_LocalName
].str
;
3028 if (len
) *len
= This
->strvalues
[StringValue_LocalName
].len
;
3032 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, LPCWSTR
*prefix
, UINT
*len
)
3034 xmlreader
*This
= impl_from_IXmlReader(iface
);
3036 TRACE("(%p)->(%p %p)\n", This
, prefix
, len
);
3037 *prefix
= This
->strvalues
[StringValue_Prefix
].str
;
3038 if (len
) *len
= This
->strvalues
[StringValue_Prefix
].len
;
3042 static BOOL
is_namespace_definition(xmlreader
*reader
)
3044 const strval
*local
= &reader
->strvalues
[StringValue_LocalName
];
3045 const strval
*prefix
= &reader
->strvalues
[StringValue_Prefix
];
3047 if (reader_get_nodetype(reader
) != XmlNodeType_Attribute
)
3050 return ((strval_eq(reader
, prefix
, &strval_empty
) && strval_eq(reader
, local
, &strval_xmlns
)) ||
3051 strval_eq(reader
, prefix
, &strval_xmlns
));
3054 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
3056 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3057 strval
*val
= &reader
->strvalues
[StringValue_Value
];
3059 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
3063 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
) || is_reader_pending(reader
))
3068 hr
= IXmlReader_Read(iface
, &type
);
3069 if (FAILED(hr
)) return hr
;
3071 /* return if still pending, partially read values are not reported */
3072 if (is_reader_pending(reader
)) return E_PENDING
;
3077 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
3078 if (!ptr
) return E_OUTOFMEMORY
;
3079 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
3084 /* For namespace definition attributes return values from namespace list */
3085 if (is_namespace_definition(reader
)) {
3086 const strval
*local
= &reader
->strvalues
[StringValue_LocalName
];
3089 ns
= reader_lookup_ns(reader
, local
);
3091 ns
= reader_lookup_nsdef(reader
);
3097 if (len
) *len
= val
->len
;
3101 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
3103 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3104 strval
*val
= &reader
->strvalues
[StringValue_Value
];
3107 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
3109 /* Value is already allocated, chunked reads are not possible. */
3110 if (val
->str
) return S_FALSE
;
3114 len
= min(chunk_size
, val
->len
);
3115 memcpy(buffer
, reader_get_ptr2(reader
, val
->start
), len
);
3118 if (read
) *read
= len
;
3124 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
3126 UINT
*baseUri_length
)
3128 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
3132 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
3134 FIXME("(%p): stub\n", iface
);
3138 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
3140 xmlreader
*This
= impl_from_IXmlReader(iface
);
3141 TRACE("(%p)\n", This
);
3142 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3143 when current node is start tag of an element */
3144 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->is_empty_element
: FALSE
;
3147 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*lineNumber
)
3149 xmlreader
*This
= impl_from_IXmlReader(iface
);
3151 TRACE("(%p %p)\n", This
, lineNumber
);
3153 if (!lineNumber
) return E_INVALIDARG
;
3155 *lineNumber
= This
->line
;
3160 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*linePosition
)
3162 xmlreader
*This
= impl_from_IXmlReader(iface
);
3164 TRACE("(%p %p)\n", This
, linePosition
);
3166 if (!linePosition
) return E_INVALIDARG
;
3168 *linePosition
= This
->pos
;
3173 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
3175 xmlreader
*This
= impl_from_IXmlReader(iface
);
3177 TRACE("(%p)->(%p)\n", This
, count
);
3179 if (!count
) return E_INVALIDARG
;
3181 *count
= This
->attr_count
;
3185 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
3187 xmlreader
*This
= impl_from_IXmlReader(iface
);
3188 TRACE("(%p)->(%p)\n", This
, depth
);
3189 *depth
= This
->depth
;
3193 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
3195 xmlreader
*This
= impl_from_IXmlReader(iface
);
3196 TRACE("(%p)\n", iface
);
3197 return This
->state
== XmlReadState_EndOfFile
;
3200 static const struct IXmlReaderVtbl xmlreader_vtbl
=
3202 xmlreader_QueryInterface
,
3206 xmlreader_GetProperty
,
3207 xmlreader_SetProperty
,
3209 xmlreader_GetNodeType
,
3210 xmlreader_MoveToFirstAttribute
,
3211 xmlreader_MoveToNextAttribute
,
3212 xmlreader_MoveToAttributeByName
,
3213 xmlreader_MoveToElement
,
3214 xmlreader_GetQualifiedName
,
3215 xmlreader_GetNamespaceUri
,
3216 xmlreader_GetLocalName
,
3217 xmlreader_GetPrefix
,
3219 xmlreader_ReadValueChunk
,
3220 xmlreader_GetBaseUri
,
3221 xmlreader_IsDefault
,
3222 xmlreader_IsEmptyElement
,
3223 xmlreader_GetLineNumber
,
3224 xmlreader_GetLinePosition
,
3225 xmlreader_GetAttributeCount
,
3230 /** IXmlReaderInput **/
3231 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
3233 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3235 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
3237 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
3238 IsEqualGUID(riid
, &IID_IUnknown
))
3244 WARN("interface %s not implemented\n", debugstr_guid(riid
));
3246 return E_NOINTERFACE
;
3249 IUnknown_AddRef(iface
);
3254 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
3256 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3257 ULONG ref
= InterlockedIncrement(&This
->ref
);
3258 TRACE("(%p)->(%d)\n", This
, ref
);
3262 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
3264 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3265 LONG ref
= InterlockedDecrement(&This
->ref
);
3267 TRACE("(%p)->(%d)\n", This
, ref
);
3271 IMalloc
*imalloc
= This
->imalloc
;
3272 if (This
->input
) IUnknown_Release(This
->input
);
3273 if (This
->stream
) ISequentialStream_Release(This
->stream
);
3274 if (This
->buffer
) free_input_buffer(This
->buffer
);
3275 readerinput_free(This
, This
->baseuri
);
3276 readerinput_free(This
, This
);
3277 if (imalloc
) IMalloc_Release(imalloc
);
3283 static const struct IUnknownVtbl xmlreaderinputvtbl
=
3285 xmlreaderinput_QueryInterface
,
3286 xmlreaderinput_AddRef
,
3287 xmlreaderinput_Release
3290 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
3295 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
3297 if (!IsEqualGUID(riid
, &IID_IXmlReader
))
3299 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid
));
3304 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
3306 reader
= heap_alloc(sizeof(*reader
));
3307 if(!reader
) return E_OUTOFMEMORY
;
3309 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
3311 reader
->input
= NULL
;
3312 reader
->state
= XmlReadState_Closed
;
3313 reader
->instate
= XmlReadInState_Initial
;
3314 reader
->resumestate
= XmlReadResumeState_Initial
;
3315 reader
->dtdmode
= DtdProcessing_Prohibit
;
3316 reader
->resolver
= NULL
;
3317 reader
->mlang
= NULL
;
3318 reader
->line
= reader
->pos
= 0;
3319 reader
->imalloc
= imalloc
;
3320 if (imalloc
) IMalloc_AddRef(imalloc
);
3321 reader
->nodetype
= XmlNodeType_None
;
3322 list_init(&reader
->attrs
);
3323 reader
->attr_count
= 0;
3324 reader
->attr
= NULL
;
3325 list_init(&reader
->nsdef
);
3326 list_init(&reader
->ns
);
3327 list_init(&reader
->elements
);
3329 reader
->max_depth
= 256;
3330 reader
->is_empty_element
= FALSE
;
3331 memset(reader
->resume
, 0, sizeof(reader
->resume
));
3333 for (i
= 0; i
< StringValue_Last
; i
++)
3334 reader
->strvalues
[i
] = strval_empty
;
3336 *obj
= &reader
->IXmlReader_iface
;
3338 TRACE("returning iface %p\n", *obj
);
3343 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
3348 IXmlReaderInput
**ppInput
)
3350 xmlreaderinput
*readerinput
;
3353 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3354 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3356 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3359 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3361 readerinput
= heap_alloc(sizeof(*readerinput
));
3362 if(!readerinput
) return E_OUTOFMEMORY
;
3364 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3365 readerinput
->ref
= 1;
3366 readerinput
->imalloc
= imalloc
;
3367 readerinput
->stream
= NULL
;
3368 if (imalloc
) IMalloc_AddRef(imalloc
);
3369 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3370 readerinput
->hint
= hint
;
3371 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3372 readerinput
->pending
= 0;
3374 hr
= alloc_input_buffer(readerinput
);
3377 readerinput_free(readerinput
, readerinput
->baseuri
);
3378 readerinput_free(readerinput
, readerinput
);
3379 if (imalloc
) IMalloc_Release(imalloc
);
3382 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3384 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3386 TRACE("returning iface %p\n", *ppInput
);