2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43 XmlReadInState_Initial
,
44 XmlReadInState_XmlDecl
,
45 XmlReadInState_Misc_DTD
,
47 XmlReadInState_DTD_Misc
,
48 XmlReadInState_Element
,
49 XmlReadInState_Content
,
50 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
52 } XmlReaderInternalState
;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
58 XmlReadResumeState_Initial
,
59 XmlReadResumeState_PITarget
,
60 XmlReadResumeState_PIBody
,
61 XmlReadResumeState_CDATA
,
62 XmlReadResumeState_Comment
,
63 XmlReadResumeState_STag
,
64 XmlReadResumeState_CharData
,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState
;
68 /* saved pointer index to resume from particular input position */
71 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local
, /* local for QName */
73 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
79 StringValue_LocalName
,
81 StringValue_QualifiedName
,
84 } XmlReaderStringValue
;
86 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW
[] = {'\"',0};
90 static const WCHAR quoteW
[] = {'\'',0};
91 static const WCHAR ltW
[] = {'<',0};
92 static const WCHAR gtW
[] = {'>',0};
93 static const WCHAR commentW
[] = {'<','!','-','-',0};
94 static const WCHAR piW
[] = {'<','?',0};
96 static const char *debugstr_nodetype(XmlNodeType nodetype
)
98 static const char * const type_names
[] =
107 "ProcessingInstruction",
120 if (nodetype
> _XmlNodeType_Last
)
121 return wine_dbg_sprintf("unknown type=%d", nodetype
);
123 return type_names
[nodetype
];
126 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
128 static const char * const prop_names
[] =
140 if (prop
> _XmlReaderProperty_Last
)
141 return wine_dbg_sprintf("unknown property=%d", prop
);
143 return prop_names
[prop
];
146 struct xml_encoding_data
153 static const struct xml_encoding_data xml_encoding_map
[] = {
154 { utf16W
, XmlEncoding_UTF16
, ~0 },
155 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
}
158 const WCHAR
*get_encoding_name(xml_encoding encoding
)
160 return xml_encoding_map
[encoding
].name
;
163 xml_encoding
get_encoding_from_codepage(UINT codepage
)
166 for (i
= 0; i
< sizeof(xml_encoding_map
)/sizeof(xml_encoding_map
[0]); i
++)
168 if (xml_encoding_map
[i
].cp
== codepage
) return xml_encoding_map
[i
].enc
;
170 return XmlEncoding_Unknown
;
177 unsigned int allocated
;
178 unsigned int written
;
181 typedef struct input_buffer input_buffer
;
185 IXmlReaderInput IXmlReaderInput_iface
;
187 /* reference passed on IXmlReaderInput creation, is kept when input is created */
190 xml_encoding encoding
;
193 /* stream reference set after SetInput() call from reader,
194 stored as sequential stream, cause currently
195 optimizations possible with IStream aren't implemented */
196 ISequentialStream
*stream
;
197 input_buffer
*buffer
;
198 unsigned int pending
: 1;
201 static const struct IUnknownVtbl xmlreaderinputvtbl
;
203 /* Structure to hold parsed string of specific length.
205 Reader stores node value as 'start' pointer, on request
206 a null-terminated version of it is allocated.
208 To init a strval variable use reader_init_strval(),
209 to set strval as a reader value use reader_set_strval().
213 WCHAR
*str
; /* allocated null-terminated string */
214 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
215 UINT start
; /* input position where value starts */
218 static WCHAR emptyW
[] = {0};
219 static const strval strval_empty
= { emptyW
};
237 IXmlReader IXmlReader_iface
;
239 xmlreaderinput
*input
;
242 XmlReaderInternalState instate
;
243 XmlReaderResumeState resumestate
;
244 XmlNodeType nodetype
;
245 DtdProcessing dtdmode
;
246 IXmlResolver
*resolver
;
247 UINT line
, pos
; /* reader position in XML stream */
248 struct list attrs
; /* attributes list for current node */
249 struct attribute
*attr
; /* current attribute */
251 struct list elements
;
252 strval strvalues
[StringValue_Last
];
256 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
261 encoded_buffer utf16
;
262 encoded_buffer encoded
;
264 xmlreaderinput
*input
;
267 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
269 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
272 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
274 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
277 /* reader memory allocation functions */
278 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
280 return m_alloc(reader
->imalloc
, len
);
283 static inline void reader_free(xmlreader
*reader
, void *mem
)
285 m_free(reader
->imalloc
, mem
);
288 /* Just return pointer from offset, no attempt to read more. */
289 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
291 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
292 return (WCHAR
*)buffer
->data
+ offset
;
295 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
297 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
300 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
304 if (src
->str
!= strval_empty
.str
)
306 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
307 if (!dest
->str
) return E_OUTOFMEMORY
;
308 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
309 dest
->str
[dest
->len
] = 0;
316 /* reader input memory allocation functions */
317 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
319 return m_alloc(input
->imalloc
, len
);
322 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
324 return m_realloc(input
->imalloc
, mem
, len
);
327 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
329 m_free(input
->imalloc
, mem
);
332 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
339 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
340 ret
= readerinput_alloc(input
, size
);
341 if (ret
) memcpy(ret
, str
, size
);
347 static void reader_clear_attrs(xmlreader
*reader
)
349 struct attribute
*attr
, *attr2
;
350 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
352 reader_free(reader
, attr
);
354 list_init(&reader
->attrs
);
355 reader
->attr_count
= 0;
359 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
360 while we are on a node with attributes */
361 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*localname
, strval
*value
)
363 struct attribute
*attr
;
365 attr
= reader_alloc(reader
, sizeof(*attr
));
366 if (!attr
) return E_OUTOFMEMORY
;
368 attr
->localname
= *localname
;
369 attr
->value
= *value
;
370 list_add_tail(&reader
->attrs
, &attr
->entry
);
371 reader
->attr_count
++;
376 /* This one frees stored string value if needed */
377 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
379 if (v
->str
!= strval_empty
.str
)
381 reader_free(reader
, v
->str
);
386 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
393 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
395 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
398 /* used to initialize from constant string */
399 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
406 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
408 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
411 static void reader_free_strvalues(xmlreader
*reader
)
414 for (type
= 0; type
< StringValue_Last
; type
++)
415 reader_free_strvalue(reader
, type
);
418 /* This helper should only be used to test if strings are the same,
419 it doesn't try to sort. */
420 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
422 if (str1
->len
!= str2
->len
) return 0;
423 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
426 static void reader_clear_elements(xmlreader
*reader
)
428 struct element
*elem
, *elem2
;
429 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
431 reader_free_strvalued(reader
, &elem
->qname
);
432 reader_free(reader
, elem
);
434 list_init(&reader
->elements
);
435 reader
->empty_element
= FALSE
;
438 static HRESULT
reader_inc_depth(xmlreader
*reader
)
440 if (++reader
->depth
> reader
->max_depth
) return SC_E_MAXELEMENTDEPTH
;
444 static void reader_dec_depth(xmlreader
*reader
)
446 if (reader
->depth
> 1) reader
->depth
--;
449 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*qname
, strval
*localname
)
451 struct element
*elem
;
454 elem
= reader_alloc(reader
, sizeof(*elem
));
455 if (!elem
) return E_OUTOFMEMORY
;
457 hr
= reader_strvaldup(reader
, qname
, &elem
->qname
);
459 reader_free(reader
, elem
);
463 hr
= reader_strvaldup(reader
, localname
, &elem
->localname
);
466 reader_free_strvalued(reader
, &elem
->qname
);
467 reader_free(reader
, elem
);
471 if (!list_empty(&reader
->elements
))
473 hr
= reader_inc_depth(reader
);
475 reader_free(reader
, elem
);
480 list_add_head(&reader
->elements
, &elem
->entry
);
481 reader
->empty_element
= FALSE
;
485 static void reader_pop_element(xmlreader
*reader
)
487 struct element
*elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
491 list_remove(&elem
->entry
);
492 reader_free_strvalued(reader
, &elem
->qname
);
493 reader_free_strvalued(reader
, &elem
->localname
);
494 reader_free(reader
, elem
);
495 reader_dec_depth(reader
);
499 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
500 means node value is to be determined. */
501 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
503 strval
*v
= &reader
->strvalues
[type
];
505 reader_free_strvalue(reader
, type
);
514 if (value
->str
== strval_empty
.str
)
518 if (type
== StringValue_Value
)
520 /* defer allocation for value string */
522 v
->start
= value
->start
;
527 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
528 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
529 v
->str
[value
->len
] = 0;
535 static inline int is_reader_pending(xmlreader
*reader
)
537 return reader
->input
->pending
;
540 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
542 const int initial_len
= 0x2000;
543 buffer
->data
= readerinput_alloc(input
, initial_len
);
544 if (!buffer
->data
) return E_OUTOFMEMORY
;
546 memset(buffer
->data
, 0, 4);
548 buffer
->allocated
= initial_len
;
554 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
556 readerinput_free(input
, buffer
->data
);
559 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
561 if (encoding
== XmlEncoding_Unknown
)
563 FIXME("unsupported encoding %d\n", encoding
);
567 *cp
= xml_encoding_map
[encoding
].cp
;
572 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
576 if (!name
) return XmlEncoding_Unknown
;
579 max
= sizeof(xml_encoding_map
)/sizeof(struct xml_encoding_data
) - 1;
586 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
588 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
590 return xml_encoding_map
[n
].enc
;
598 return XmlEncoding_Unknown
;
601 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
603 input_buffer
*buffer
;
606 input
->buffer
= NULL
;
608 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
609 if (!buffer
) return E_OUTOFMEMORY
;
611 buffer
->input
= input
;
612 buffer
->code_page
= ~0; /* code page is unknown at this point */
613 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
615 readerinput_free(input
, buffer
);
619 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
621 free_encoded_buffer(input
, &buffer
->utf16
);
622 readerinput_free(input
, buffer
);
626 input
->buffer
= buffer
;
630 static void free_input_buffer(input_buffer
*buffer
)
632 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
633 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
634 readerinput_free(buffer
->input
, buffer
);
637 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
639 if (readerinput
->stream
) {
640 ISequentialStream_Release(readerinput
->stream
);
641 readerinput
->stream
= NULL
;
645 /* Queries already stored interface for IStream/ISequentialStream.
646 Interface supplied on creation will be overwritten */
647 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
651 readerinput_release_stream(readerinput
);
652 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
654 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
659 /* reads a chunk to raw buffer */
660 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
662 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
663 /* to make sure aligned length won't exceed allocated length */
664 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
668 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
669 variable width encodings like UTF-8 */
670 len
= (len
+ 3) & ~3;
671 /* try to use allocated space or grow */
672 if (buffer
->allocated
- buffer
->written
< len
)
674 buffer
->allocated
*= 2;
675 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
676 len
= buffer
->allocated
- buffer
->written
;
680 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
681 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
682 readerinput
->pending
= hr
== E_PENDING
;
683 if (FAILED(hr
)) return hr
;
684 buffer
->written
+= read
;
689 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
690 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
692 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
694 length
*= sizeof(WCHAR
);
695 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
696 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
698 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
699 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
700 buffer
->allocated
= grown_size
;
704 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
706 static const char startA
[] = {'<','?'};
707 static const char commentA
[] = {'<','!'};
708 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
709 unsigned char *ptr
= (unsigned char*)buffer
->data
;
711 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
712 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
713 /* test start byte */
716 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
717 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
718 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
719 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
723 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
725 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
726 static const WCHAR startW
[] = {'<','?'};
727 static const WCHAR commentW
[] = {'<','!'};
728 static const char utf8bom
[] = {0xef,0xbb,0xbf};
729 static const char utf16lebom
[] = {0xff,0xfe};
731 *enc
= XmlEncoding_Unknown
;
733 if (buffer
->written
<= 3)
735 HRESULT hr
= readerinput_growraw(readerinput
);
736 if (FAILED(hr
)) return hr
;
737 if (buffer
->written
<= 3) return MX_E_INPUTEND
;
740 /* try start symbols if we have enough data to do that, input buffer should contain
741 first chunk already */
742 if (readerinput_is_utf8(readerinput
))
743 *enc
= XmlEncoding_UTF8
;
744 else if (!memcmp(buffer
->data
, startW
, sizeof(startW
)) ||
745 !memcmp(buffer
->data
, commentW
, sizeof(commentW
)))
746 *enc
= XmlEncoding_UTF16
;
747 /* try with BOM now */
748 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
750 buffer
->cur
+= sizeof(utf8bom
);
751 *enc
= XmlEncoding_UTF8
;
753 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
755 buffer
->cur
+= sizeof(utf16lebom
);
756 *enc
= XmlEncoding_UTF16
;
762 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
764 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
765 int len
= buffer
->written
;
767 /* complete single byte char */
768 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
770 /* find start byte of multibyte char */
771 while (--len
&& !(buffer
->data
[len
] & 0xc0))
777 /* Returns byte length of complete char sequence for buffer code page,
778 it's relative to current buffer position which is currently used for BOM handling
780 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
782 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
785 if (readerinput
->buffer
->code_page
== CP_UTF8
)
786 len
= readerinput_get_utf8_convlen(readerinput
);
788 len
= buffer
->written
;
790 TRACE("%d\n", len
- buffer
->cur
);
791 return len
- buffer
->cur
;
794 /* It's possible that raw buffer has some leftovers from last conversion - some char
795 sequence that doesn't represent a full code point. Length argument should be calculated with
796 readerinput_get_convlen(), if it's -1 it will be calculated here. */
797 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
799 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
802 len
= readerinput_get_convlen(readerinput
);
804 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
805 /* everything below cur is lost too */
806 buffer
->written
-= len
+ buffer
->cur
;
807 /* after this point we don't need cur offset really,
808 it's used only to mark where actual data begins when first chunk is read */
812 /* note that raw buffer content is kept */
813 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
815 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
816 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
822 hr
= get_code_page(enc
, &cp
);
823 if (FAILED(hr
)) return;
825 readerinput
->buffer
->code_page
= cp
;
826 len
= readerinput_get_convlen(readerinput
);
828 TRACE("switching to cp %d\n", cp
);
830 /* just copy in this case */
831 if (enc
== XmlEncoding_UTF16
)
833 readerinput_grow(readerinput
, len
);
834 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
835 dest
->written
+= len
*sizeof(WCHAR
);
839 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
840 readerinput_grow(readerinput
, dest_len
);
841 ptr
= (WCHAR
*)dest
->data
;
842 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
844 dest
->written
+= dest_len
*sizeof(WCHAR
);
847 /* shrinks parsed data a buffer begins with */
848 static void reader_shrink(xmlreader
*reader
)
850 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
852 /* avoid to move too often using threshold shrink length */
853 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
855 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
856 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
858 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
862 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
863 It won't attempt to shrink but will grow destination buffer if needed */
864 static HRESULT
reader_more(xmlreader
*reader
)
866 xmlreaderinput
*readerinput
= reader
->input
;
867 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
868 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
869 UINT cp
= readerinput
->buffer
->code_page
;
874 /* get some raw data from stream first */
875 hr
= readerinput_growraw(readerinput
);
876 len
= readerinput_get_convlen(readerinput
);
878 /* just copy for UTF-16 case */
881 readerinput_grow(readerinput
, len
);
882 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
883 dest
->written
+= len
*sizeof(WCHAR
);
887 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
888 readerinput_grow(readerinput
, dest_len
);
889 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
890 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
892 dest
->written
+= dest_len
*sizeof(WCHAR
);
893 /* get rid of processed data */
894 readerinput_shrinkraw(readerinput
, len
);
899 static inline UINT
reader_get_cur(xmlreader
*reader
)
901 return reader
->input
->buffer
->utf16
.cur
;
904 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
906 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
907 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
908 if (!*ptr
) reader_more(reader
);
909 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
912 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
915 const WCHAR
*ptr
= reader_get_ptr(reader
);
921 ptr
= reader_get_ptr(reader
);
923 if (str
[i
] != ptr
[i
])
924 return ptr
[i
] - str
[i
];
930 /* moves cursor n WCHARs forward */
931 static void reader_skipn(xmlreader
*reader
, int n
)
933 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
934 const WCHAR
*ptr
= reader_get_ptr(reader
);
936 while (*ptr
++ && n
--)
943 static inline BOOL
is_wchar_space(WCHAR ch
)
945 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
948 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
949 static int reader_skipspaces(xmlreader
*reader
)
951 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
952 const WCHAR
*ptr
= reader_get_ptr(reader
);
953 UINT start
= reader_get_cur(reader
);
955 while (is_wchar_space(*ptr
))
959 else if (*ptr
== '\n')
968 ptr
= reader_get_ptr(reader
);
971 return reader_get_cur(reader
) - start
;
974 /* [26] VersionNum ::= '1.' [0-9]+ */
975 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
977 static const WCHAR onedotW
[] = {'1','.',0};
981 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
983 start
= reader_get_cur(reader
);
985 reader_skipn(reader
, 2);
987 ptr2
= ptr
= reader_get_ptr(reader
);
988 while (*ptr
>= '0' && *ptr
<= '9')
990 reader_skipn(reader
, 1);
991 ptr
= reader_get_ptr(reader
);
994 if (ptr2
== ptr
) return WC_E_DIGIT
;
995 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
996 TRACE("version=%s\n", debug_strval(reader
, val
));
1000 /* [25] Eq ::= S? '=' S? */
1001 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1003 static const WCHAR eqW
[] = {'=',0};
1004 reader_skipspaces(reader
);
1005 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
1007 reader_skipn(reader
, 1);
1008 reader_skipspaces(reader
);
1012 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1013 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1015 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
1019 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1021 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
1022 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1023 /* skip 'version' */
1024 reader_skipn(reader
, 7);
1026 hr
= reader_parse_eq(reader
);
1027 if (FAILED(hr
)) return hr
;
1029 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1032 reader_skipn(reader
, 1);
1034 hr
= reader_parse_versionnum(reader
, &val
);
1035 if (FAILED(hr
)) return hr
;
1037 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1041 reader_skipn(reader
, 1);
1043 return reader_add_attr(reader
, &name
, &val
);
1046 /* ([A-Za-z0-9._] | '-') */
1047 static inline BOOL
is_wchar_encname(WCHAR ch
)
1049 return ((ch
>= 'A' && ch
<= 'Z') ||
1050 (ch
>= 'a' && ch
<= 'z') ||
1051 (ch
>= '0' && ch
<= '9') ||
1052 (ch
== '.') || (ch
== '_') ||
1056 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1057 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1059 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1063 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1064 return WC_E_ENCNAME
;
1066 val
->start
= reader_get_cur(reader
);
1069 while (is_wchar_encname(*++ptr
))
1073 enc
= parse_encoding_name(start
, len
);
1074 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1078 if (enc
== XmlEncoding_Unknown
)
1079 return WC_E_ENCNAME
;
1081 /* skip encoding name */
1082 reader_skipn(reader
, len
);
1086 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1087 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1089 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1093 if (!reader_skipspaces(reader
)) return S_FALSE
;
1095 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1096 name
.str
= reader_get_ptr(reader
);
1097 name
.start
= reader_get_cur(reader
);
1099 /* skip 'encoding' */
1100 reader_skipn(reader
, 8);
1102 hr
= reader_parse_eq(reader
);
1103 if (FAILED(hr
)) return hr
;
1105 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1108 reader_skipn(reader
, 1);
1110 hr
= reader_parse_encname(reader
, &val
);
1111 if (FAILED(hr
)) return hr
;
1113 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1117 reader_skipn(reader
, 1);
1119 return reader_add_attr(reader
, &name
, &val
);
1122 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1123 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1125 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1126 static const WCHAR yesW
[] = {'y','e','s',0};
1127 static const WCHAR noW
[] = {'n','o',0};
1132 if (!reader_skipspaces(reader
)) return S_FALSE
;
1134 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1135 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1136 /* skip 'standalone' */
1137 reader_skipn(reader
, 10);
1139 hr
= reader_parse_eq(reader
);
1140 if (FAILED(hr
)) return hr
;
1142 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1145 reader_skipn(reader
, 1);
1147 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1148 return WC_E_XMLDECL
;
1150 start
= reader_get_cur(reader
);
1151 /* skip 'yes'|'no' */
1152 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1153 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1154 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1156 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1159 reader_skipn(reader
, 1);
1161 return reader_add_attr(reader
, &name
, &val
);
1164 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1165 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1167 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1168 static const WCHAR declcloseW
[] = {'?','>',0};
1171 /* check if we have "<?xml " */
1172 if (reader_cmp(reader
, xmldeclW
)) return S_FALSE
;
1174 reader_skipn(reader
, 5);
1175 hr
= reader_parse_versioninfo(reader
);
1179 hr
= reader_parse_encdecl(reader
);
1183 hr
= reader_parse_sddecl(reader
);
1187 reader_skipspaces(reader
);
1188 if (reader_cmp(reader
, declcloseW
)) return WC_E_XMLDECL
;
1189 reader_skipn(reader
, 2);
1191 reader_inc_depth(reader
);
1192 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1193 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1194 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1195 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1200 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1201 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1206 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1208 start
= reader
->resume
[XmlReadResume_Body
];
1209 ptr
= reader_get_ptr(reader
);
1214 reader_skipn(reader
, 4);
1215 reader_shrink(reader
);
1216 ptr
= reader_get_ptr(reader
);
1217 start
= reader_get_cur(reader
);
1218 reader
->nodetype
= XmlNodeType_Comment
;
1219 reader
->resume
[XmlReadResume_Body
] = start
;
1220 reader
->resumestate
= XmlReadResumeState_Comment
;
1221 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
1222 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
1223 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1226 /* will exit when there's no more data, it won't attempt to
1227 read more from stream */
1238 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1239 TRACE("%s\n", debug_strval(reader
, &value
));
1241 /* skip rest of markup '->' */
1242 reader_skipn(reader
, 3);
1244 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1245 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1246 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1247 reader
->resume
[XmlReadResume_Body
] = 0;
1248 reader
->resumestate
= XmlReadResumeState_Initial
;
1252 return WC_E_COMMENT
;
1256 reader_skipn(reader
, 1);
1263 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1264 static inline BOOL
is_char(WCHAR ch
)
1266 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1267 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1268 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1269 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1270 (ch
>= 0xe000 && ch
<= 0xfffd);
1273 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1274 static inline BOOL
is_pubchar(WCHAR ch
)
1276 return (ch
== ' ') ||
1277 (ch
>= 'a' && ch
<= 'z') ||
1278 (ch
>= 'A' && ch
<= 'Z') ||
1279 (ch
>= '0' && ch
<= '9') ||
1280 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1281 (ch
== '=') || (ch
== '?') ||
1282 (ch
== '@') || (ch
== '!') ||
1283 (ch
>= '#' && ch
<= '%') || /* #$% */
1284 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1287 static inline BOOL
is_namestartchar(WCHAR ch
)
1289 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1290 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1291 (ch
>= 0xc0 && ch
<= 0xd6) ||
1292 (ch
>= 0xd8 && ch
<= 0xf6) ||
1293 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1294 (ch
>= 0x370 && ch
<= 0x37d) ||
1295 (ch
>= 0x37f && ch
<= 0x1fff) ||
1296 (ch
>= 0x200c && ch
<= 0x200d) ||
1297 (ch
>= 0x2070 && ch
<= 0x218f) ||
1298 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1299 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1300 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1301 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1302 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1303 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1306 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1307 static inline BOOL
is_ncnamechar(WCHAR ch
)
1309 return (ch
>= 'A' && ch
<= 'Z') ||
1310 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1311 (ch
== '-') || (ch
== '.') ||
1312 (ch
>= '0' && ch
<= '9') ||
1314 (ch
>= 0xc0 && ch
<= 0xd6) ||
1315 (ch
>= 0xd8 && ch
<= 0xf6) ||
1316 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1317 (ch
>= 0x300 && ch
<= 0x36f) ||
1318 (ch
>= 0x370 && ch
<= 0x37d) ||
1319 (ch
>= 0x37f && ch
<= 0x1fff) ||
1320 (ch
>= 0x200c && ch
<= 0x200d) ||
1321 (ch
>= 0x203f && ch
<= 0x2040) ||
1322 (ch
>= 0x2070 && ch
<= 0x218f) ||
1323 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1324 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1325 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1326 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1327 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1328 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1331 static inline BOOL
is_namechar(WCHAR ch
)
1333 return (ch
== ':') || is_ncnamechar(ch
);
1336 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1338 /* When we're on attribute always return attribute type, container node type is kept.
1339 Note that container is not necessarily an element, and attribute doesn't mean it's
1340 an attribute in XML spec terms. */
1341 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1344 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1345 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1346 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1347 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1348 [5] Name ::= NameStartChar (NameChar)* */
1349 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1354 if (reader
->resume
[XmlReadResume_Name
])
1356 start
= reader
->resume
[XmlReadResume_Name
];
1357 ptr
= reader_get_ptr(reader
);
1361 ptr
= reader_get_ptr(reader
);
1362 start
= reader_get_cur(reader
);
1363 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1366 while (is_namechar(*ptr
))
1368 reader_skipn(reader
, 1);
1369 ptr
= reader_get_ptr(reader
);
1372 if (is_reader_pending(reader
))
1374 reader
->resume
[XmlReadResume_Name
] = start
;
1378 reader
->resume
[XmlReadResume_Name
] = 0;
1380 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1381 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1386 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1387 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1389 static const WCHAR xmlW
[] = {'x','m','l'};
1390 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1396 hr
= reader_parse_name(reader
, &name
);
1397 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1399 /* now that we got name check for illegal content */
1400 if (strval_eq(reader
, &name
, &xmlval
))
1401 return WC_E_LEADINGXML
;
1403 /* PITarget can't be a qualified name */
1404 ptr
= reader_get_strptr(reader
, &name
);
1405 for (i
= 0; i
< name
.len
; i
++)
1407 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1409 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1414 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1415 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1422 switch (reader
->resumestate
)
1424 case XmlReadResumeState_Initial
:
1426 reader_skipn(reader
, 2);
1427 reader_shrink(reader
);
1428 reader
->resumestate
= XmlReadResumeState_PITarget
;
1429 case XmlReadResumeState_PITarget
:
1430 hr
= reader_parse_pitarget(reader
, &target
);
1431 if (FAILED(hr
)) return hr
;
1432 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1433 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1434 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1435 reader
->resumestate
= XmlReadResumeState_PIBody
;
1436 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1441 start
= reader
->resume
[XmlReadResume_Body
];
1442 ptr
= reader_get_ptr(reader
);
1449 UINT cur
= reader_get_cur(reader
);
1452 /* strip all leading whitespace chars */
1455 ptr
= reader_get_ptr2(reader
, start
);
1456 if (!is_wchar_space(*ptr
)) break;
1460 reader_init_strvalue(start
, cur
-start
, &value
);
1463 reader_skipn(reader
, 2);
1464 TRACE("%s\n", debug_strval(reader
, &value
));
1465 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1466 reader
->resumestate
= XmlReadResumeState_Initial
;
1467 reader
->resume
[XmlReadResume_Body
] = 0;
1468 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1473 reader_skipn(reader
, 1);
1474 ptr
= reader_get_ptr(reader
);
1480 /* This one is used to parse significant whitespace nodes, like in Misc production */
1481 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1483 switch (reader
->resumestate
)
1485 case XmlReadResumeState_Initial
:
1486 reader_shrink(reader
);
1487 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1488 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1489 reader
->nodetype
= XmlNodeType_Whitespace
;
1490 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1491 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1492 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1494 case XmlReadResumeState_Whitespace
:
1499 reader_skipspaces(reader
);
1500 if (is_reader_pending(reader
)) return S_OK
;
1502 start
= reader
->resume
[XmlReadResume_Body
];
1503 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1504 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1505 TRACE("%s\n", debug_strval(reader
, &value
));
1506 reader
->resumestate
= XmlReadResumeState_Initial
;
1515 /* [27] Misc ::= Comment | PI | S */
1516 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1518 HRESULT hr
= S_FALSE
;
1520 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1522 hr
= reader_more(reader
);
1523 if (FAILED(hr
)) return hr
;
1525 /* finish current node */
1526 switch (reader
->resumestate
)
1528 case XmlReadResumeState_PITarget
:
1529 case XmlReadResumeState_PIBody
:
1530 return reader_parse_pi(reader
);
1531 case XmlReadResumeState_Comment
:
1532 return reader_parse_comment(reader
);
1533 case XmlReadResumeState_Whitespace
:
1534 return reader_parse_whitespace(reader
);
1536 ERR("unknown resume state %d\n", reader
->resumestate
);
1542 const WCHAR
*cur
= reader_get_ptr(reader
);
1544 if (is_wchar_space(*cur
))
1545 hr
= reader_parse_whitespace(reader
);
1546 else if (!reader_cmp(reader
, commentW
))
1547 hr
= reader_parse_comment(reader
);
1548 else if (!reader_cmp(reader
, piW
))
1549 hr
= reader_parse_pi(reader
);
1553 if (hr
!= S_FALSE
) return hr
;
1559 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1560 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1562 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1565 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1568 reader_skipn(reader
, 1);
1570 cur
= reader_get_ptr(reader
);
1571 start
= reader_get_cur(reader
);
1572 while (is_char(*cur
) && *cur
!= quote
)
1574 reader_skipn(reader
, 1);
1575 cur
= reader_get_ptr(reader
);
1577 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1578 if (*cur
== quote
) reader_skipn(reader
, 1);
1580 TRACE("%s\n", debug_strval(reader
, literal
));
1584 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1585 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1586 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1588 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1591 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1594 reader_skipn(reader
, 1);
1596 start
= reader_get_cur(reader
);
1597 cur
= reader_get_ptr(reader
);
1598 while (is_pubchar(*cur
) && *cur
!= quote
)
1600 reader_skipn(reader
, 1);
1601 cur
= reader_get_ptr(reader
);
1604 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1605 TRACE("%s\n", debug_strval(reader
, literal
));
1609 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1610 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1612 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1613 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1618 if (reader_cmp(reader
, systemW
))
1620 if (reader_cmp(reader
, publicW
))
1627 reader_skipn(reader
, 6);
1628 cnt
= reader_skipspaces(reader
);
1629 if (!cnt
) return WC_E_WHITESPACE
;
1631 hr
= reader_parse_pub_literal(reader
, &pub
);
1632 if (FAILED(hr
)) return hr
;
1634 reader_init_cstrvalue(publicW
, strlenW(publicW
), &name
);
1635 return reader_add_attr(reader
, &name
, &pub
);
1643 reader_skipn(reader
, 6);
1644 cnt
= reader_skipspaces(reader
);
1645 if (!cnt
) return WC_E_WHITESPACE
;
1647 hr
= reader_parse_sys_literal(reader
, &sys
);
1648 if (FAILED(hr
)) return hr
;
1650 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1651 return reader_add_attr(reader
, &name
, &sys
);
1657 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1658 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1660 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1665 /* check if we have "<!DOCTYPE" */
1666 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1667 reader_shrink(reader
);
1669 /* DTD processing is not allowed by default */
1670 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1672 reader_skipn(reader
, 9);
1673 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1676 hr
= reader_parse_name(reader
, &name
);
1677 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1679 reader_skipspaces(reader
);
1681 hr
= reader_parse_externalid(reader
);
1682 if (FAILED(hr
)) return hr
;
1684 reader_skipspaces(reader
);
1686 cur
= reader_get_ptr(reader
);
1689 FIXME("internal subset parsing not implemented\n");
1694 reader_skipn(reader
, 1);
1696 reader
->nodetype
= XmlNodeType_DocumentType
;
1697 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1698 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1703 /* [11 NS] LocalPart ::= NCName */
1704 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
)
1709 if (reader
->resume
[XmlReadResume_Local
])
1711 start
= reader
->resume
[XmlReadResume_Local
];
1712 ptr
= reader_get_ptr(reader
);
1716 ptr
= reader_get_ptr(reader
);
1717 start
= reader_get_cur(reader
);
1720 while (is_ncnamechar(*ptr
))
1722 reader_skipn(reader
, 1);
1723 ptr
= reader_get_ptr(reader
);
1726 if (is_reader_pending(reader
))
1728 reader
->resume
[XmlReadResume_Local
] = start
;
1732 reader
->resume
[XmlReadResume_Local
] = 0;
1734 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1739 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1740 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1741 [9 NS] UnprefixedName ::= LocalPart
1742 [10 NS] Prefix ::= NCName */
1743 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1749 if (reader
->resume
[XmlReadResume_Name
])
1751 start
= reader
->resume
[XmlReadResume_Name
];
1752 ptr
= reader_get_ptr(reader
);
1756 ptr
= reader_get_ptr(reader
);
1757 start
= reader_get_cur(reader
);
1758 reader
->resume
[XmlReadResume_Name
] = start
;
1759 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1762 if (reader
->resume
[XmlReadResume_Local
])
1764 hr
= reader_parse_local(reader
, local
);
1765 if (FAILED(hr
)) return hr
;
1767 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1768 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1773 /* skip prefix part */
1774 while (is_ncnamechar(*ptr
))
1776 reader_skipn(reader
, 1);
1777 ptr
= reader_get_ptr(reader
);
1780 if (is_reader_pending(reader
)) return E_PENDING
;
1782 /* got a qualified name */
1785 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
1788 reader_skipn(reader
, 1);
1789 hr
= reader_parse_local(reader
, local
);
1790 if (FAILED(hr
)) return hr
;
1794 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
1795 reader_init_strvalue(0, 0, prefix
);
1799 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1802 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
1804 TRACE("ncname %s\n", debug_strval(reader
, local
));
1806 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
1808 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
1811 reader
->resume
[XmlReadResume_Name
] = 0;
1812 reader
->resume
[XmlReadResume_Local
] = 0;
1817 /* Applies normalization rules to a single char, used for attribute values.
1819 Rules include 2 steps:
1821 1) replacing \r\n with a single \n;
1822 2) replacing all whitespace chars with ' '.
1825 static void reader_normalize_space(xmlreader
*reader
, WCHAR
*ptr
)
1827 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1829 if (!is_wchar_space(*ptr
)) return;
1831 if (*ptr
== '\r' && *(ptr
+1) == '\n')
1833 int len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - 2*sizeof(WCHAR
);
1834 memmove(ptr
+1, ptr
+2, len
);
1839 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
1841 static const WCHAR entltW
[] = {'l','t'};
1842 static const WCHAR entgtW
[] = {'g','t'};
1843 static const WCHAR entampW
[] = {'a','m','p'};
1844 static const WCHAR entaposW
[] = {'a','p','o','s'};
1845 static const WCHAR entquotW
[] = {'q','u','o','t'};
1846 static const strval lt
= { (WCHAR
*)entltW
, 2 };
1847 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
1848 static const strval amp
= { (WCHAR
*)entampW
, 3 };
1849 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
1850 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
1851 WCHAR
*str
= reader_get_strptr(reader
, name
);
1856 if (strval_eq(reader
, name
, <
)) return '<';
1859 if (strval_eq(reader
, name
, >
)) return '>';
1862 if (strval_eq(reader
, name
, &
))
1864 else if (strval_eq(reader
, name
, &apos
))
1868 if (strval_eq(reader
, name
, "
)) return '\"';
1877 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1878 [67] Reference ::= EntityRef | CharRef
1879 [68] EntityRef ::= '&' Name ';' */
1880 static HRESULT
reader_parse_reference(xmlreader
*reader
)
1882 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1883 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1884 UINT cur
= reader_get_cur(reader
);
1889 reader_skipn(reader
, 1);
1890 ptr
= reader_get_ptr(reader
);
1894 reader_skipn(reader
, 1);
1895 ptr
= reader_get_ptr(reader
);
1897 /* hex char or decimal */
1900 reader_skipn(reader
, 1);
1901 ptr
= reader_get_ptr(reader
);
1905 if ((*ptr
>= '0' && *ptr
<= '9'))
1906 ch
= ch
*16 + *ptr
- '0';
1907 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
1908 ch
= ch
*16 + *ptr
- 'a' + 10;
1909 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
1910 ch
= ch
*16 + *ptr
- 'A' + 10;
1912 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
1913 reader_skipn(reader
, 1);
1914 ptr
= reader_get_ptr(reader
);
1921 if ((*ptr
>= '0' && *ptr
<= '9'))
1923 ch
= ch
*10 + *ptr
- '0';
1924 reader_skipn(reader
, 1);
1925 ptr
= reader_get_ptr(reader
);
1928 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
1932 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
1935 if (is_wchar_space(ch
)) ch
= ' ';
1937 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1938 memmove(start
+1, ptr
+1, len
);
1939 buffer
->cur
= cur
+ 1;
1948 hr
= reader_parse_name(reader
, &name
);
1949 if (FAILED(hr
)) return hr
;
1951 ptr
= reader_get_ptr(reader
);
1952 if (*ptr
!= ';') return WC_E_SEMICOLON
;
1954 /* predefined entities resolve to a single character */
1955 ch
= get_predefined_entity(reader
, &name
);
1958 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1959 memmove(start
+1, ptr
+1, len
);
1960 buffer
->cur
= cur
+ 1;
1966 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
1967 return WC_E_UNDECLAREDENTITY
;
1975 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1976 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
1981 ptr
= reader_get_ptr(reader
);
1983 /* skip opening quote */
1985 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
1986 reader_skipn(reader
, 1);
1988 ptr
= reader_get_ptr(reader
);
1989 start
= reader_get_cur(reader
);
1992 if (*ptr
== '<') return WC_E_LESSTHAN
;
1996 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
1997 /* skip closing quote */
1998 reader_skipn(reader
, 1);
2004 HRESULT hr
= reader_parse_reference(reader
);
2005 if (FAILED(hr
)) return hr
;
2009 reader_normalize_space(reader
, ptr
);
2010 reader_skipn(reader
, 1);
2012 ptr
= reader_get_ptr(reader
);
2018 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2019 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2020 [3 NS] DefaultAttName ::= 'xmlns'
2021 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2022 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2024 static const WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
2025 strval prefix
, local
, qname
, xmlns
, value
;
2028 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2029 if (FAILED(hr
)) return hr
;
2031 reader_init_cstrvalue((WCHAR
*)xmlnsW
, 5, &xmlns
);
2033 if (strval_eq(reader
, &prefix
, &xmlns
))
2035 FIXME("namespace definitions not supported\n");
2039 if (strval_eq(reader
, &qname
, &xmlns
))
2040 FIXME("default namespace definitions not supported\n");
2042 hr
= reader_parse_eq(reader
);
2043 if (FAILED(hr
)) return hr
;
2045 hr
= reader_parse_attvalue(reader
, &value
);
2046 if (FAILED(hr
)) return hr
;
2048 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2049 return reader_add_attr(reader
, &local
, &value
);
2052 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2053 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2054 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
, int *empty
)
2058 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2059 if (FAILED(hr
)) return hr
;
2063 static const WCHAR endW
[] = {'/','>',0};
2065 reader_skipspaces(reader
);
2068 if ((*empty
= !reader_cmp(reader
, endW
)))
2071 reader_skipn(reader
, 2);
2072 reader
->empty_element
= TRUE
;
2076 /* got a start tag */
2077 if (!reader_cmp(reader
, gtW
))
2080 reader_skipn(reader
, 1);
2081 return reader_push_element(reader
, qname
, local
);
2084 hr
= reader_parse_attribute(reader
);
2085 if (FAILED(hr
)) return hr
;
2091 /* [39] element ::= EmptyElemTag | STag content ETag */
2092 static HRESULT
reader_parse_element(xmlreader
*reader
)
2096 switch (reader
->resumestate
)
2098 case XmlReadResumeState_Initial
:
2099 /* check if we are really on element */
2100 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2103 reader_skipn(reader
, 1);
2105 reader_shrink(reader
);
2106 reader
->resumestate
= XmlReadResumeState_STag
;
2107 case XmlReadResumeState_STag
:
2109 strval qname
, prefix
, local
;
2112 /* this handles empty elements too */
2113 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
, &empty
);
2114 if (FAILED(hr
)) return hr
;
2116 /* FIXME: need to check for defined namespace to reject invalid prefix,
2117 currently reject all prefixes */
2118 if (prefix
.len
) return NC_E_UNDECLAREDPREFIX
;
2120 /* if we got empty element and stack is empty go straight to Misc */
2121 if (empty
&& list_empty(&reader
->elements
))
2122 reader
->instate
= XmlReadInState_MiscEnd
;
2124 reader
->instate
= XmlReadInState_Content
;
2126 reader
->nodetype
= XmlNodeType_Element
;
2127 reader
->resumestate
= XmlReadResumeState_Initial
;
2128 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2129 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2130 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2140 /* [13 NS] ETag ::= '</' QName S? '>' */
2141 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2143 strval prefix
, local
, qname
;
2144 struct element
*elem
;
2148 reader_skipn(reader
, 2);
2150 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2151 if (FAILED(hr
)) return hr
;
2153 reader_skipspaces(reader
);
2155 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2158 reader_skipn(reader
, 1);
2160 /* Element stack should never be empty at this point, cause we shouldn't get to
2161 content parsing if it's empty. */
2162 elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2163 if (!strval_eq(reader
, &elem
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2165 reader_pop_element(reader
);
2167 /* It was a root element, the rest is expected as Misc */
2168 if (list_empty(&reader
->elements
))
2169 reader
->instate
= XmlReadInState_MiscEnd
;
2171 reader
->nodetype
= XmlNodeType_EndElement
;
2172 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2173 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2178 /* [18] CDSect ::= CDStart CData CDEnd
2179 [19] CDStart ::= '<![CDATA['
2180 [20] CData ::= (Char* - (Char* ']]>' Char*))
2181 [21] CDEnd ::= ']]>' */
2182 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2187 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2189 start
= reader
->resume
[XmlReadResume_Body
];
2190 ptr
= reader_get_ptr(reader
);
2194 /* skip markup '<![CDATA[' */
2195 reader_skipn(reader
, 9);
2196 reader_shrink(reader
);
2197 ptr
= reader_get_ptr(reader
);
2198 start
= reader_get_cur(reader
);
2199 reader
->nodetype
= XmlNodeType_CDATA
;
2200 reader
->resume
[XmlReadResume_Body
] = start
;
2201 reader
->resumestate
= XmlReadResumeState_CDATA
;
2202 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
2203 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
2204 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2209 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2213 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2216 reader_skipn(reader
, 3);
2217 TRACE("%s\n", debug_strval(reader
, &value
));
2219 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2220 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2221 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2222 reader
->resume
[XmlReadResume_Body
] = 0;
2223 reader
->resumestate
= XmlReadResumeState_Initial
;
2228 /* Value normalization is not fully implemented, rules are:
2230 - single '\r' -> '\n';
2231 - sequence '\r\n' -> '\n', in this case value length changes;
2233 if (*ptr
== '\r') *ptr
= '\n';
2234 reader_skipn(reader
, 1);
2242 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2243 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2248 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2250 start
= reader
->resume
[XmlReadResume_Body
];
2251 ptr
= reader_get_ptr(reader
);
2255 reader_shrink(reader
);
2256 ptr
= reader_get_ptr(reader
);
2257 start
= reader_get_cur(reader
);
2258 /* There's no text */
2259 if (!*ptr
|| *ptr
== '<') return S_OK
;
2260 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2261 reader
->resume
[XmlReadResume_Body
] = start
;
2262 reader
->resumestate
= XmlReadResumeState_CharData
;
2263 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2264 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2265 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2270 /* CDATA closing sequence ']]>' is not allowed */
2271 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2272 return WC_E_CDSECTEND
;
2274 /* Found next markup part */
2279 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2280 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2281 reader
->resume
[XmlReadResume_Body
] = 0;
2282 reader
->resumestate
= XmlReadResumeState_Initial
;
2286 reader_skipn(reader
, 1);
2288 /* this covers a case when text has leading whitespace chars */
2289 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2296 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2297 static HRESULT
reader_parse_content(xmlreader
*reader
)
2299 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2300 static const WCHAR etagW
[] = {'<','/',0};
2301 static const WCHAR ampW
[] = {'&',0};
2303 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2305 switch (reader
->resumestate
)
2307 case XmlReadResumeState_CDATA
:
2308 return reader_parse_cdata(reader
);
2309 case XmlReadResumeState_Comment
:
2310 return reader_parse_comment(reader
);
2311 case XmlReadResumeState_PIBody
:
2312 case XmlReadResumeState_PITarget
:
2313 return reader_parse_pi(reader
);
2314 case XmlReadResumeState_CharData
:
2315 return reader_parse_chardata(reader
);
2317 ERR("unknown resume state %d\n", reader
->resumestate
);
2321 reader_shrink(reader
);
2323 /* handle end tag here, it indicates end of content as well */
2324 if (!reader_cmp(reader
, etagW
))
2325 return reader_parse_endtag(reader
);
2327 if (!reader_cmp(reader
, commentW
))
2328 return reader_parse_comment(reader
);
2330 if (!reader_cmp(reader
, piW
))
2331 return reader_parse_pi(reader
);
2333 if (!reader_cmp(reader
, cdstartW
))
2334 return reader_parse_cdata(reader
);
2336 if (!reader_cmp(reader
, ampW
))
2337 return reader_parse_reference(reader
);
2339 if (!reader_cmp(reader
, ltW
))
2340 return reader_parse_element(reader
);
2342 /* what's left must be CharData */
2343 return reader_parse_chardata(reader
);
2346 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2350 if (!is_reader_pending(reader
))
2351 reader_clear_attrs(reader
);
2355 switch (reader
->instate
)
2357 /* if it's a first call for a new input we need to detect stream encoding */
2358 case XmlReadInState_Initial
:
2362 hr
= readerinput_growraw(reader
->input
);
2363 if (FAILED(hr
)) return hr
;
2365 /* try to detect encoding by BOM or data and set input code page */
2366 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2367 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2368 if (FAILED(hr
)) return hr
;
2370 /* always switch first time cause we have to put something in */
2371 readerinput_switchencoding(reader
->input
, enc
);
2373 /* parse xml declaration */
2374 hr
= reader_parse_xmldecl(reader
);
2375 if (FAILED(hr
)) return hr
;
2377 readerinput_shrinkraw(reader
->input
, -1);
2378 reader
->instate
= XmlReadInState_Misc_DTD
;
2379 if (hr
== S_OK
) return hr
;
2382 case XmlReadInState_Misc_DTD
:
2383 hr
= reader_parse_misc(reader
);
2384 if (FAILED(hr
)) return hr
;
2387 reader
->instate
= XmlReadInState_DTD
;
2391 case XmlReadInState_DTD
:
2392 hr
= reader_parse_dtd(reader
);
2393 if (FAILED(hr
)) return hr
;
2397 reader
->instate
= XmlReadInState_DTD_Misc
;
2401 reader
->instate
= XmlReadInState_Element
;
2403 case XmlReadInState_DTD_Misc
:
2404 hr
= reader_parse_misc(reader
);
2405 if (FAILED(hr
)) return hr
;
2408 reader
->instate
= XmlReadInState_Element
;
2412 case XmlReadInState_Element
:
2413 return reader_parse_element(reader
);
2414 case XmlReadInState_Content
:
2415 return reader_parse_content(reader
);
2416 case XmlReadInState_MiscEnd
:
2417 hr
= reader_parse_misc(reader
);
2418 if (FAILED(hr
)) return hr
;
2421 reader
->instate
= XmlReadInState_Eof
;
2423 case XmlReadInState_Eof
:
2426 FIXME("internal state %d not handled\n", reader
->instate
);
2434 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2436 xmlreader
*This
= impl_from_IXmlReader(iface
);
2438 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2440 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2441 IsEqualGUID(riid
, &IID_IXmlReader
))
2447 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2449 return E_NOINTERFACE
;
2452 IXmlReader_AddRef(iface
);
2457 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2459 xmlreader
*This
= impl_from_IXmlReader(iface
);
2460 ULONG ref
= InterlockedIncrement(&This
->ref
);
2461 TRACE("(%p)->(%d)\n", This
, ref
);
2465 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2467 xmlreader
*This
= impl_from_IXmlReader(iface
);
2468 LONG ref
= InterlockedDecrement(&This
->ref
);
2470 TRACE("(%p)->(%d)\n", This
, ref
);
2474 IMalloc
*imalloc
= This
->imalloc
;
2475 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2476 if (This
->resolver
) IXmlResolver_Release(This
->resolver
);
2477 reader_clear_attrs(This
);
2478 reader_clear_elements(This
);
2479 reader_free_strvalues(This
);
2480 reader_free(This
, This
);
2481 if (imalloc
) IMalloc_Release(imalloc
);
2487 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2489 xmlreader
*This
= impl_from_IXmlReader(iface
);
2490 IXmlReaderInput
*readerinput
;
2493 TRACE("(%p)->(%p)\n", This
, input
);
2497 readerinput_release_stream(This
->input
);
2498 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2502 This
->line
= This
->pos
= 0;
2503 reader_clear_elements(This
);
2505 This
->resumestate
= XmlReadResumeState_Initial
;
2506 memset(This
->resume
, 0, sizeof(This
->resume
));
2508 /* just reset current input */
2511 This
->state
= XmlReadState_Initial
;
2515 /* now try IXmlReaderInput, ISequentialStream, IStream */
2516 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2519 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2520 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2523 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2524 readerinput
, readerinput
->lpVtbl
);
2525 IUnknown_Release(readerinput
);
2531 if (hr
!= S_OK
|| !readerinput
)
2533 /* create IXmlReaderInput basing on supplied interface */
2534 hr
= CreateXmlReaderInputWithEncodingName(input
,
2535 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2536 if (hr
!= S_OK
) return hr
;
2537 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2540 /* set stream for supplied IXmlReaderInput */
2541 hr
= readerinput_query_for_stream(This
->input
);
2544 This
->state
= XmlReadState_Initial
;
2545 This
->instate
= XmlReadInState_Initial
;
2551 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2553 xmlreader
*This
= impl_from_IXmlReader(iface
);
2555 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2557 if (!value
) return E_INVALIDARG
;
2561 case XmlReaderProperty_XmlResolver
:
2562 *value
= (LONG_PTR
)This
->resolver
;
2564 IXmlResolver_AddRef(This
->resolver
);
2566 case XmlReaderProperty_DtdProcessing
:
2567 *value
= This
->dtdmode
;
2569 case XmlReaderProperty_ReadState
:
2570 *value
= This
->state
;
2573 FIXME("Unimplemented property (%u)\n", property
);
2580 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2582 xmlreader
*This
= impl_from_IXmlReader(iface
);
2584 TRACE("(%p)->(%s 0x%lx)\n", This
, debugstr_reader_prop(property
), value
);
2588 case XmlReaderProperty_XmlResolver
:
2590 IXmlResolver_Release(This
->resolver
);
2591 This
->resolver
= (IXmlResolver
*)value
;
2593 IXmlResolver_AddRef(This
->resolver
);
2595 case XmlReaderProperty_DtdProcessing
:
2596 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2597 This
->dtdmode
= value
;
2600 FIXME("Unimplemented property (%u)\n", property
);
2607 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2609 xmlreader
*This
= impl_from_IXmlReader(iface
);
2610 XmlNodeType oldtype
= This
->nodetype
;
2613 TRACE("(%p)->(%p)\n", This
, nodetype
);
2615 if (This
->state
== XmlReadState_Closed
) return S_FALSE
;
2617 hr
= reader_parse_nextnode(This
);
2618 if (oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2619 This
->state
= XmlReadState_Interactive
;
2622 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2623 *nodetype
= This
->nodetype
;
2629 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2631 xmlreader
*This
= impl_from_IXmlReader(iface
);
2632 TRACE("(%p)->(%p)\n", This
, node_type
);
2634 *node_type
= reader_get_nodetype(This
);
2635 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2638 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2640 xmlreader
*This
= impl_from_IXmlReader(iface
);
2642 TRACE("(%p)\n", This
);
2644 if (!This
->attr_count
) return S_FALSE
;
2645 This
->attr
= LIST_ENTRY(list_head(&This
->attrs
), struct attribute
, entry
);
2646 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2647 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2652 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2654 xmlreader
*This
= impl_from_IXmlReader(iface
);
2655 const struct list
*next
;
2657 TRACE("(%p)\n", This
);
2659 if (!This
->attr_count
) return S_FALSE
;
2662 return IXmlReader_MoveToFirstAttribute(iface
);
2664 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2667 This
->attr
= LIST_ENTRY(next
, struct attribute
, entry
);
2668 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2669 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2672 return next
? S_OK
: S_FALSE
;
2675 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
2677 LPCWSTR namespaceUri
)
2679 FIXME("(%p %p %p): stub\n", iface
, local_name
, namespaceUri
);
2683 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
2685 xmlreader
*This
= impl_from_IXmlReader(iface
);
2686 struct element
*elem
;
2688 TRACE("(%p)\n", This
);
2690 if (!This
->attr_count
) return S_FALSE
;
2693 /* FIXME: support other node types with 'attributes' like DTD */
2694 elem
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
2697 reader_set_strvalue(This
, StringValue_QualifiedName
, &elem
->qname
);
2698 reader_set_strvalue(This
, StringValue_LocalName
, &elem
->localname
);
2704 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2706 xmlreader
*This
= impl_from_IXmlReader(iface
);
2708 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2709 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
2710 if (len
) *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
2714 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
,
2715 LPCWSTR
*namespaceUri
,
2716 UINT
*namespaceUri_length
)
2718 FIXME("(%p %p %p): stub\n", iface
, namespaceUri
, namespaceUri_length
);
2722 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2724 xmlreader
*This
= impl_from_IXmlReader(iface
);
2726 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2727 *name
= This
->strvalues
[StringValue_LocalName
].str
;
2728 if (len
) *len
= This
->strvalues
[StringValue_LocalName
].len
;
2732 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, LPCWSTR
*prefix
, UINT
*len
)
2734 xmlreader
*This
= impl_from_IXmlReader(iface
);
2736 TRACE("(%p)->(%p %p)\n", This
, prefix
, len
);
2737 *prefix
= This
->strvalues
[StringValue_Prefix
].str
;
2738 if (len
) *len
= This
->strvalues
[StringValue_Prefix
].len
;
2742 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
2744 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2745 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2747 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
2751 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
) || is_reader_pending(reader
))
2756 hr
= IXmlReader_Read(iface
, &type
);
2757 if (FAILED(hr
)) return hr
;
2759 /* return if still pending, partially read values are not reported */
2760 if (is_reader_pending(reader
)) return E_PENDING
;
2765 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
2766 if (!ptr
) return E_OUTOFMEMORY
;
2767 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
2773 if (len
) *len
= val
->len
;
2777 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
2779 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2780 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2783 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
2785 /* Value is already allocated, chunked reads are not possible. */
2786 if (val
->str
) return S_FALSE
;
2790 len
= min(chunk_size
, val
->len
);
2791 memcpy(buffer
, reader_get_ptr2(reader
, val
->start
), len
);
2794 if (read
) *read
= len
;
2800 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
2802 UINT
*baseUri_length
)
2804 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
2808 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
2810 FIXME("(%p): stub\n", iface
);
2814 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
2816 xmlreader
*This
= impl_from_IXmlReader(iface
);
2817 TRACE("(%p)\n", This
);
2818 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2819 when current node is start tag of an element */
2820 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->empty_element
: FALSE
;
2823 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*lineNumber
)
2825 xmlreader
*This
= impl_from_IXmlReader(iface
);
2827 TRACE("(%p %p)\n", This
, lineNumber
);
2829 if (!lineNumber
) return E_INVALIDARG
;
2831 *lineNumber
= This
->line
;
2836 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*linePosition
)
2838 xmlreader
*This
= impl_from_IXmlReader(iface
);
2840 TRACE("(%p %p)\n", This
, linePosition
);
2842 if (!linePosition
) return E_INVALIDARG
;
2844 *linePosition
= This
->pos
;
2849 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
2851 xmlreader
*This
= impl_from_IXmlReader(iface
);
2853 TRACE("(%p)->(%p)\n", This
, count
);
2855 if (!count
) return E_INVALIDARG
;
2857 *count
= This
->attr_count
;
2861 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
2863 xmlreader
*This
= impl_from_IXmlReader(iface
);
2864 TRACE("(%p)->(%p)\n", This
, depth
);
2865 *depth
= This
->depth
;
2869 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
2871 FIXME("(%p): stub\n", iface
);
2875 static const struct IXmlReaderVtbl xmlreader_vtbl
=
2877 xmlreader_QueryInterface
,
2881 xmlreader_GetProperty
,
2882 xmlreader_SetProperty
,
2884 xmlreader_GetNodeType
,
2885 xmlreader_MoveToFirstAttribute
,
2886 xmlreader_MoveToNextAttribute
,
2887 xmlreader_MoveToAttributeByName
,
2888 xmlreader_MoveToElement
,
2889 xmlreader_GetQualifiedName
,
2890 xmlreader_GetNamespaceUri
,
2891 xmlreader_GetLocalName
,
2892 xmlreader_GetPrefix
,
2894 xmlreader_ReadValueChunk
,
2895 xmlreader_GetBaseUri
,
2896 xmlreader_IsDefault
,
2897 xmlreader_IsEmptyElement
,
2898 xmlreader_GetLineNumber
,
2899 xmlreader_GetLinePosition
,
2900 xmlreader_GetAttributeCount
,
2905 /** IXmlReaderInput **/
2906 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
2908 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2910 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2912 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
2913 IsEqualGUID(riid
, &IID_IUnknown
))
2919 WARN("interface %s not implemented\n", debugstr_guid(riid
));
2921 return E_NOINTERFACE
;
2924 IUnknown_AddRef(iface
);
2929 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
2931 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2932 ULONG ref
= InterlockedIncrement(&This
->ref
);
2933 TRACE("(%p)->(%d)\n", This
, ref
);
2937 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
2939 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2940 LONG ref
= InterlockedDecrement(&This
->ref
);
2942 TRACE("(%p)->(%d)\n", This
, ref
);
2946 IMalloc
*imalloc
= This
->imalloc
;
2947 if (This
->input
) IUnknown_Release(This
->input
);
2948 if (This
->stream
) ISequentialStream_Release(This
->stream
);
2949 if (This
->buffer
) free_input_buffer(This
->buffer
);
2950 readerinput_free(This
, This
->baseuri
);
2951 readerinput_free(This
, This
);
2952 if (imalloc
) IMalloc_Release(imalloc
);
2958 static const struct IUnknownVtbl xmlreaderinputvtbl
=
2960 xmlreaderinput_QueryInterface
,
2961 xmlreaderinput_AddRef
,
2962 xmlreaderinput_Release
2965 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
2970 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
2972 if (!IsEqualGUID(riid
, &IID_IXmlReader
))
2974 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid
));
2979 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
2981 reader
= heap_alloc(sizeof(*reader
));
2982 if(!reader
) return E_OUTOFMEMORY
;
2984 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
2986 reader
->input
= NULL
;
2987 reader
->state
= XmlReadState_Closed
;
2988 reader
->instate
= XmlReadInState_Initial
;
2989 reader
->resumestate
= XmlReadResumeState_Initial
;
2990 reader
->dtdmode
= DtdProcessing_Prohibit
;
2991 reader
->resolver
= NULL
;
2992 reader
->line
= reader
->pos
= 0;
2993 reader
->imalloc
= imalloc
;
2994 if (imalloc
) IMalloc_AddRef(imalloc
);
2995 reader
->nodetype
= XmlNodeType_None
;
2996 list_init(&reader
->attrs
);
2997 reader
->attr_count
= 0;
2998 reader
->attr
= NULL
;
2999 list_init(&reader
->elements
);
3001 reader
->max_depth
= 256;
3002 reader
->empty_element
= FALSE
;
3003 memset(reader
->resume
, 0, sizeof(reader
->resume
));
3005 for (i
= 0; i
< StringValue_Last
; i
++)
3006 reader
->strvalues
[i
] = strval_empty
;
3008 *obj
= &reader
->IXmlReader_iface
;
3010 TRACE("returning iface %p\n", *obj
);
3015 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
3020 IXmlReaderInput
**ppInput
)
3022 xmlreaderinput
*readerinput
;
3025 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3026 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3028 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3031 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3033 readerinput
= heap_alloc(sizeof(*readerinput
));
3034 if(!readerinput
) return E_OUTOFMEMORY
;
3036 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3037 readerinput
->ref
= 1;
3038 readerinput
->imalloc
= imalloc
;
3039 readerinput
->stream
= NULL
;
3040 if (imalloc
) IMalloc_AddRef(imalloc
);
3041 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3042 readerinput
->hint
= hint
;
3043 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3044 readerinput
->pending
= 0;
3046 hr
= alloc_input_buffer(readerinput
);
3049 readerinput_free(readerinput
, readerinput
->baseuri
);
3050 readerinput_free(readerinput
, readerinput
);
3051 if (imalloc
) IMalloc_Release(imalloc
);
3054 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3056 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3058 TRACE("returning iface %p\n", *ppInput
);