2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
50 XmlReadInState_Initial
,
51 XmlReadInState_XmlDecl
,
52 XmlReadInState_Misc_DTD
,
54 XmlReadInState_DTD_Misc
,
55 XmlReadInState_Element
,
56 XmlReadInState_Content
,
57 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
59 } XmlReaderInternalState
;
61 /* This state denotes where parsing was interrupted by input problem.
62 Reader resumes parsing using this information. */
65 XmlReadResumeState_Initial
,
66 XmlReadResumeState_PITarget
,
67 XmlReadResumeState_PIBody
,
68 XmlReadResumeState_CDATA
,
69 XmlReadResumeState_Comment
,
70 XmlReadResumeState_STag
,
71 XmlReadResumeState_CharData
,
72 XmlReadResumeState_Whitespace
73 } XmlReaderResumeState
;
75 /* saved pointer index to resume from particular input position */
78 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
79 XmlReadResume_Local
, /* local for QName */
80 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
86 StringValue_LocalName
,
88 StringValue_QualifiedName
,
91 } XmlReaderStringValue
;
93 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
94 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
96 static const WCHAR dblquoteW
[] = {'\"',0};
97 static const WCHAR quoteW
[] = {'\'',0};
98 static const WCHAR ltW
[] = {'<',0};
99 static const WCHAR gtW
[] = {'>',0};
100 static const WCHAR commentW
[] = {'<','!','-','-',0};
101 static const WCHAR piW
[] = {'<','?',0};
103 static const char *debugstr_nodetype(XmlNodeType nodetype
)
105 static const char * const type_names
[] =
114 "ProcessingInstruction",
127 if (nodetype
> _XmlNodeType_Last
)
128 return wine_dbg_sprintf("unknown type=%d", nodetype
);
130 return type_names
[nodetype
];
133 static const char *debugstr_prop(XmlReaderProperty prop
)
135 static const char * const prop_names
[] =
147 if (prop
> _XmlReaderProperty_Last
)
148 return wine_dbg_sprintf("unknown property=%d", prop
);
150 return prop_names
[prop
];
153 struct xml_encoding_data
160 static const struct xml_encoding_data xml_encoding_map
[] = {
161 { utf16W
, XmlEncoding_UTF16
, ~0 },
162 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
}
169 unsigned int allocated
;
170 unsigned int written
;
173 typedef struct input_buffer input_buffer
;
177 IXmlReaderInput IXmlReaderInput_iface
;
179 /* reference passed on IXmlReaderInput creation, is kept when input is created */
182 xml_encoding encoding
;
185 /* stream reference set after SetInput() call from reader,
186 stored as sequential stream, cause currently
187 optimizations possible with IStream aren't implemented */
188 ISequentialStream
*stream
;
189 input_buffer
*buffer
;
190 unsigned int pending
: 1;
193 static const struct IUnknownVtbl xmlreaderinputvtbl
;
195 /* Structure to hold parsed string of specific length.
197 Reader stores node value as 'start' pointer, on request
198 a null-terminated version of it is allocated.
200 To init a strval variable use reader_init_strval(),
201 to set strval as a reader value use reader_set_strval().
205 WCHAR
*str
; /* allocated null-terminated string */
206 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
207 UINT start
; /* input position where value starts */
210 static WCHAR emptyW
[] = {0};
211 static const strval strval_empty
= { emptyW
};
229 IXmlReader IXmlReader_iface
;
231 xmlreaderinput
*input
;
234 XmlReaderInternalState instate
;
235 XmlReaderResumeState resumestate
;
236 XmlNodeType nodetype
;
237 DtdProcessing dtdmode
;
238 UINT line
, pos
; /* reader position in XML stream */
239 struct list attrs
; /* attributes list for current node */
240 struct attribute
*attr
; /* current attribute */
242 struct list elements
;
243 strval strvalues
[StringValue_Last
];
247 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
252 encoded_buffer utf16
;
253 encoded_buffer encoded
;
255 xmlreaderinput
*input
;
258 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
260 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
263 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
265 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
268 static inline void *m_alloc(IMalloc
*imalloc
, size_t len
)
271 return IMalloc_Alloc(imalloc
, len
);
273 return heap_alloc(len
);
276 static inline void *m_realloc(IMalloc
*imalloc
, void *mem
, size_t len
)
279 return IMalloc_Realloc(imalloc
, mem
, len
);
281 return heap_realloc(mem
, len
);
284 static inline void m_free(IMalloc
*imalloc
, void *mem
)
287 IMalloc_Free(imalloc
, mem
);
292 /* reader memory allocation functions */
293 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
295 return m_alloc(reader
->imalloc
, len
);
298 static inline void reader_free(xmlreader
*reader
, void *mem
)
300 m_free(reader
->imalloc
, mem
);
303 /* Just return pointer from offset, no attempt to read more. */
304 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
306 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
307 return (WCHAR
*)buffer
->data
+ offset
;
310 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
312 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
315 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
319 if (src
->str
!= strval_empty
.str
)
321 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
322 if (!dest
->str
) return E_OUTOFMEMORY
;
323 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
324 dest
->str
[dest
->len
] = 0;
331 /* reader input memory allocation functions */
332 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
334 return m_alloc(input
->imalloc
, len
);
337 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
339 return m_realloc(input
->imalloc
, mem
, len
);
342 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
344 m_free(input
->imalloc
, mem
);
347 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
354 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
355 ret
= readerinput_alloc(input
, size
);
356 if (ret
) memcpy(ret
, str
, size
);
362 static void reader_clear_attrs(xmlreader
*reader
)
364 struct attribute
*attr
, *attr2
;
365 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
367 reader_free(reader
, attr
);
369 list_init(&reader
->attrs
);
370 reader
->attr_count
= 0;
373 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
374 while we are on a node with attributes */
375 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*localname
, strval
*value
)
377 struct attribute
*attr
;
379 attr
= reader_alloc(reader
, sizeof(*attr
));
380 if (!attr
) return E_OUTOFMEMORY
;
382 attr
->localname
= *localname
;
383 attr
->value
= *value
;
384 list_add_tail(&reader
->attrs
, &attr
->entry
);
385 reader
->attr_count
++;
390 /* This one frees stored string value if needed */
391 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
393 if (v
->str
!= strval_empty
.str
)
395 reader_free(reader
, v
->str
);
400 /* returns length in WCHARs from 'start' to current buffer offset */
401 static inline UINT
reader_get_len(const xmlreader
*reader
, UINT start
)
403 return reader
->input
->buffer
->utf16
.cur
- start
;
406 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
413 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
415 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
418 /* used to initialize from constant string */
419 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
426 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
428 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
431 static void reader_free_strvalues(xmlreader
*reader
)
434 for (type
= 0; type
< StringValue_Last
; type
++)
435 reader_free_strvalue(reader
, type
);
438 /* This helper should only be used to test if strings are the same,
439 it doesn't try to sort. */
440 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
442 if (str1
->len
!= str2
->len
) return 0;
443 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
446 static void reader_clear_elements(xmlreader
*reader
)
448 struct element
*elem
, *elem2
;
449 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
451 reader_free_strvalued(reader
, &elem
->qname
);
452 reader_free(reader
, elem
);
454 list_init(&reader
->elements
);
455 reader
->empty_element
= FALSE
;
458 static HRESULT
reader_inc_depth(xmlreader
*reader
)
460 if (++reader
->depth
> reader
->max_depth
) return SC_E_MAXELEMENTDEPTH
;
464 static void reader_dec_depth(xmlreader
*reader
)
466 if (reader
->depth
> 1) reader
->depth
--;
469 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*qname
, strval
*localname
)
471 struct element
*elem
;
474 elem
= reader_alloc(reader
, sizeof(*elem
));
475 if (!elem
) return E_OUTOFMEMORY
;
477 hr
= reader_strvaldup(reader
, qname
, &elem
->qname
);
479 reader_free(reader
, elem
);
483 hr
= reader_strvaldup(reader
, localname
, &elem
->localname
);
486 reader_free_strvalued(reader
, &elem
->qname
);
487 reader_free(reader
, elem
);
491 if (!list_empty(&reader
->elements
))
493 hr
= reader_inc_depth(reader
);
495 reader_free(reader
, elem
);
500 list_add_head(&reader
->elements
, &elem
->entry
);
501 reader
->empty_element
= FALSE
;
505 static void reader_pop_element(xmlreader
*reader
)
507 struct element
*elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
511 list_remove(&elem
->entry
);
512 reader_free_strvalued(reader
, &elem
->qname
);
513 reader_free_strvalued(reader
, &elem
->localname
);
514 reader_free(reader
, elem
);
515 reader_dec_depth(reader
);
519 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
520 means node value is to be determined. */
521 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
523 strval
*v
= &reader
->strvalues
[type
];
525 reader_free_strvalue(reader
, type
);
534 if (value
->str
== strval_empty
.str
)
538 if (type
== StringValue_Value
)
540 /* defer allocation for value string */
542 v
->start
= value
->start
;
547 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
548 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
549 v
->str
[value
->len
] = 0;
555 static inline int is_reader_pending(xmlreader
*reader
)
557 return reader
->input
->pending
;
560 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
562 const int initial_len
= 0x2000;
563 buffer
->data
= readerinput_alloc(input
, initial_len
);
564 if (!buffer
->data
) return E_OUTOFMEMORY
;
566 memset(buffer
->data
, 0, 4);
568 buffer
->allocated
= initial_len
;
574 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
576 readerinput_free(input
, buffer
->data
);
579 static HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
581 if (encoding
== XmlEncoding_Unknown
)
583 FIXME("unsupported encoding %d\n", encoding
);
587 *cp
= xml_encoding_map
[encoding
].cp
;
592 static xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
596 if (!name
) return XmlEncoding_Unknown
;
599 max
= sizeof(xml_encoding_map
)/sizeof(struct xml_encoding_data
) - 1;
606 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
608 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
610 return xml_encoding_map
[n
].enc
;
618 return XmlEncoding_Unknown
;
621 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
623 input_buffer
*buffer
;
626 input
->buffer
= NULL
;
628 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
629 if (!buffer
) return E_OUTOFMEMORY
;
631 buffer
->input
= input
;
632 buffer
->code_page
= ~0; /* code page is unknown at this point */
633 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
635 readerinput_free(input
, buffer
);
639 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
641 free_encoded_buffer(input
, &buffer
->utf16
);
642 readerinput_free(input
, buffer
);
646 input
->buffer
= buffer
;
650 static void free_input_buffer(input_buffer
*buffer
)
652 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
653 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
654 readerinput_free(buffer
->input
, buffer
);
657 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
659 if (readerinput
->stream
) {
660 ISequentialStream_Release(readerinput
->stream
);
661 readerinput
->stream
= NULL
;
665 /* Queries already stored interface for IStream/ISequentialStream.
666 Interface supplied on creation will be overwritten */
667 static HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
671 readerinput_release_stream(readerinput
);
672 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
674 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
679 /* reads a chunk to raw buffer */
680 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
682 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
683 /* to make sure aligned length won't exceed allocated length */
684 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
688 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
689 variable width encodings like UTF-8 */
690 len
= (len
+ 3) & ~3;
691 /* try to use allocated space or grow */
692 if (buffer
->allocated
- buffer
->written
< len
)
694 buffer
->allocated
*= 2;
695 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
696 len
= buffer
->allocated
- buffer
->written
;
700 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
701 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
702 readerinput
->pending
= hr
== E_PENDING
;
703 if (FAILED(hr
)) return hr
;
704 buffer
->written
+= read
;
709 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
710 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
712 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
714 length
*= sizeof(WCHAR
);
715 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
716 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
718 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
719 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
720 buffer
->allocated
= grown_size
;
724 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
726 static const char startA
[] = {'<','?'};
727 static const char commentA
[] = {'<','!'};
728 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
729 unsigned char *ptr
= (unsigned char*)buffer
->data
;
731 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
732 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
733 /* test start byte */
736 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
737 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
738 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
739 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
743 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
745 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
746 static const WCHAR startW
[] = {'<','?'};
747 static const WCHAR commentW
[] = {'<','!'};
748 static const char utf8bom
[] = {0xef,0xbb,0xbf};
749 static const char utf16lebom
[] = {0xff,0xfe};
751 *enc
= XmlEncoding_Unknown
;
753 if (buffer
->written
<= 3)
755 HRESULT hr
= readerinput_growraw(readerinput
);
756 if (FAILED(hr
)) return hr
;
757 if (buffer
->written
<= 3) return MX_E_INPUTEND
;
760 /* try start symbols if we have enough data to do that, input buffer should contain
761 first chunk already */
762 if (readerinput_is_utf8(readerinput
))
763 *enc
= XmlEncoding_UTF8
;
764 else if (!memcmp(buffer
->data
, startW
, sizeof(startW
)) ||
765 !memcmp(buffer
->data
, commentW
, sizeof(commentW
)))
766 *enc
= XmlEncoding_UTF16
;
767 /* try with BOM now */
768 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
770 buffer
->cur
+= sizeof(utf8bom
);
771 *enc
= XmlEncoding_UTF8
;
773 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
775 buffer
->cur
+= sizeof(utf16lebom
);
776 *enc
= XmlEncoding_UTF16
;
782 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
784 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
785 int len
= buffer
->written
;
787 /* complete single byte char */
788 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
790 /* find start byte of multibyte char */
791 while (--len
&& !(buffer
->data
[len
] & 0xc0))
797 /* Returns byte length of complete char sequence for buffer code page,
798 it's relative to current buffer position which is currently used for BOM handling
800 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
802 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
805 if (readerinput
->buffer
->code_page
== CP_UTF8
)
806 len
= readerinput_get_utf8_convlen(readerinput
);
808 len
= buffer
->written
;
810 TRACE("%d\n", len
- buffer
->cur
);
811 return len
- buffer
->cur
;
814 /* It's possible that raw buffer has some leftovers from last conversion - some char
815 sequence that doesn't represent a full code point. Length argument should be calculated with
816 readerinput_get_convlen(), if it's -1 it will be calculated here. */
817 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
819 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
822 len
= readerinput_get_convlen(readerinput
);
824 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
825 /* everything below cur is lost too */
826 buffer
->written
-= len
+ buffer
->cur
;
827 /* after this point we don't need cur offset really,
828 it's used only to mark where actual data begins when first chunk is read */
832 /* note that raw buffer content is kept */
833 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
835 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
836 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
842 hr
= get_code_page(enc
, &cp
);
843 if (FAILED(hr
)) return;
845 readerinput
->buffer
->code_page
= cp
;
846 len
= readerinput_get_convlen(readerinput
);
848 TRACE("switching to cp %d\n", cp
);
850 /* just copy in this case */
851 if (enc
== XmlEncoding_UTF16
)
853 readerinput_grow(readerinput
, len
);
854 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
855 dest
->written
+= len
*sizeof(WCHAR
);
859 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
860 readerinput_grow(readerinput
, dest_len
);
861 ptr
= (WCHAR
*)dest
->data
;
862 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
864 dest
->written
+= dest_len
*sizeof(WCHAR
);
867 /* shrinks parsed data a buffer begins with */
868 static void reader_shrink(xmlreader
*reader
)
870 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
872 /* avoid to move too often using threshold shrink length */
873 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
875 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
876 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
878 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
882 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
883 It won't attempt to shrink but will grow destination buffer if needed */
884 static HRESULT
reader_more(xmlreader
*reader
)
886 xmlreaderinput
*readerinput
= reader
->input
;
887 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
888 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
889 UINT cp
= readerinput
->buffer
->code_page
;
894 /* get some raw data from stream first */
895 hr
= readerinput_growraw(readerinput
);
896 len
= readerinput_get_convlen(readerinput
);
898 /* just copy for UTF-16 case */
901 readerinput_grow(readerinput
, len
);
902 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
903 dest
->written
+= len
*sizeof(WCHAR
);
907 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
908 readerinput_grow(readerinput
, dest_len
);
909 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
910 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
912 dest
->written
+= dest_len
*sizeof(WCHAR
);
913 /* get rid of processed data */
914 readerinput_shrinkraw(readerinput
, len
);
919 static inline UINT
reader_get_cur(xmlreader
*reader
)
921 return reader
->input
->buffer
->utf16
.cur
;
924 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
926 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
927 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
928 if (!*ptr
) reader_more(reader
);
929 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
932 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
934 const WCHAR
*ptr
= reader_get_ptr(reader
);
935 return strncmpW(str
, ptr
, strlenW(str
));
938 /* moves cursor n WCHARs forward */
939 static void reader_skipn(xmlreader
*reader
, int n
)
941 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
942 const WCHAR
*ptr
= reader_get_ptr(reader
);
944 while (*ptr
++ && n
--)
951 static inline BOOL
is_wchar_space(WCHAR ch
)
953 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
956 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
957 static int reader_skipspaces(xmlreader
*reader
)
959 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
960 const WCHAR
*ptr
= reader_get_ptr(reader
);
961 UINT start
= reader_get_cur(reader
);
963 while (is_wchar_space(*ptr
))
967 else if (*ptr
== '\n')
976 ptr
= reader_get_ptr(reader
);
979 return reader_get_cur(reader
) - start
;
982 /* [26] VersionNum ::= '1.' [0-9]+ */
983 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
985 static const WCHAR onedotW
[] = {'1','.',0};
989 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
991 start
= reader_get_cur(reader
);
993 reader_skipn(reader
, 2);
995 ptr2
= ptr
= reader_get_ptr(reader
);
996 while (*ptr
>= '0' && *ptr
<= '9')
998 reader_skipn(reader
, 1);
999 ptr
= reader_get_ptr(reader
);
1002 if (ptr2
== ptr
) return WC_E_DIGIT
;
1003 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
1004 TRACE("version=%s\n", debug_strval(reader
, val
));
1008 /* [25] Eq ::= S? '=' S? */
1009 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1011 static const WCHAR eqW
[] = {'=',0};
1012 reader_skipspaces(reader
);
1013 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
1015 reader_skipn(reader
, 1);
1016 reader_skipspaces(reader
);
1020 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1021 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1023 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
1027 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1029 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
1030 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1031 /* skip 'version' */
1032 reader_skipn(reader
, 7);
1034 hr
= reader_parse_eq(reader
);
1035 if (FAILED(hr
)) return hr
;
1037 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1040 reader_skipn(reader
, 1);
1042 hr
= reader_parse_versionnum(reader
, &val
);
1043 if (FAILED(hr
)) return hr
;
1045 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1049 reader_skipn(reader
, 1);
1051 return reader_add_attr(reader
, &name
, &val
);
1054 /* ([A-Za-z0-9._] | '-') */
1055 static inline BOOL
is_wchar_encname(WCHAR ch
)
1057 return ((ch
>= 'A' && ch
<= 'Z') ||
1058 (ch
>= 'a' && ch
<= 'z') ||
1059 (ch
>= '0' && ch
<= '9') ||
1060 (ch
== '.') || (ch
== '_') ||
1064 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1065 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1067 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1071 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1072 return WC_E_ENCNAME
;
1075 while (is_wchar_encname(*++ptr
))
1079 enc
= parse_encoding_name(start
, len
);
1080 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1084 if (enc
== XmlEncoding_Unknown
)
1085 return WC_E_ENCNAME
;
1087 /* skip encoding name */
1088 reader_skipn(reader
, len
);
1092 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1093 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1095 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1099 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1101 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1102 name
.str
= reader_get_ptr(reader
);
1104 /* skip 'encoding' */
1105 reader_skipn(reader
, 8);
1107 hr
= reader_parse_eq(reader
);
1108 if (FAILED(hr
)) return hr
;
1110 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1113 reader_skipn(reader
, 1);
1115 hr
= reader_parse_encname(reader
, &val
);
1116 if (FAILED(hr
)) return hr
;
1118 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1122 reader_skipn(reader
, 1);
1124 return reader_add_attr(reader
, &name
, &val
);
1127 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1128 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1130 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1131 static const WCHAR yesW
[] = {'y','e','s',0};
1132 static const WCHAR noW
[] = {'n','o',0};
1137 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1139 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1140 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1141 /* skip 'standalone' */
1142 reader_skipn(reader
, 10);
1144 hr
= reader_parse_eq(reader
);
1145 if (FAILED(hr
)) return hr
;
1147 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1150 reader_skipn(reader
, 1);
1152 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1153 return WC_E_XMLDECL
;
1155 start
= reader_get_cur(reader
);
1156 /* skip 'yes'|'no' */
1157 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1158 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1159 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1161 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1164 reader_skipn(reader
, 1);
1166 return reader_add_attr(reader
, &name
, &val
);
1169 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1170 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1172 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1173 static const WCHAR declcloseW
[] = {'?','>',0};
1176 /* check if we have "<?xml " */
1177 if (reader_cmp(reader
, xmldeclW
)) return S_FALSE
;
1179 reader_skipn(reader
, 5);
1180 hr
= reader_parse_versioninfo(reader
);
1184 hr
= reader_parse_encdecl(reader
);
1188 hr
= reader_parse_sddecl(reader
);
1192 reader_skipspaces(reader
);
1193 if (reader_cmp(reader
, declcloseW
)) return WC_E_XMLDECL
;
1194 reader_skipn(reader
, 2);
1196 reader_inc_depth(reader
);
1197 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1198 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1199 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1200 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1205 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1206 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1211 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1213 start
= reader
->resume
[XmlReadResume_Body
];
1214 ptr
= reader_get_ptr(reader
);
1219 reader_skipn(reader
, 4);
1220 reader_shrink(reader
);
1221 ptr
= reader_get_ptr(reader
);
1222 start
= reader_get_cur(reader
);
1223 reader
->nodetype
= XmlNodeType_Comment
;
1224 reader
->resume
[XmlReadResume_Body
] = start
;
1225 reader
->resumestate
= XmlReadResumeState_Comment
;
1226 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
1227 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
1228 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1231 /* will exit when there's no more data, it won't attempt to
1232 read more from stream */
1243 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1244 TRACE("%s\n", debug_strval(reader
, &value
));
1246 /* skip rest of markup '->' */
1247 reader_skipn(reader
, 3);
1249 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1250 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1251 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1252 reader
->resume
[XmlReadResume_Body
] = 0;
1253 reader
->resumestate
= XmlReadResumeState_Initial
;
1257 return WC_E_COMMENT
;
1261 reader_skipn(reader
, 1);
1268 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1269 static inline BOOL
is_char(WCHAR ch
)
1271 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1272 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1273 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1274 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1275 (ch
>= 0xe000 && ch
<= 0xfffd);
1278 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1279 static inline BOOL
is_pubchar(WCHAR ch
)
1281 return (ch
== ' ') ||
1282 (ch
>= 'a' && ch
<= 'z') ||
1283 (ch
>= 'A' && ch
<= 'Z') ||
1284 (ch
>= '0' && ch
<= '9') ||
1285 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1286 (ch
== '=') || (ch
== '?') ||
1287 (ch
== '@') || (ch
== '!') ||
1288 (ch
>= '#' && ch
<= '%') || /* #$% */
1289 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1292 static inline BOOL
is_namestartchar(WCHAR ch
)
1294 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1295 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1296 (ch
>= 0xc0 && ch
<= 0xd6) ||
1297 (ch
>= 0xd8 && ch
<= 0xf6) ||
1298 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1299 (ch
>= 0x370 && ch
<= 0x37d) ||
1300 (ch
>= 0x37f && ch
<= 0x1fff) ||
1301 (ch
>= 0x200c && ch
<= 0x200d) ||
1302 (ch
>= 0x2070 && ch
<= 0x218f) ||
1303 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1304 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1305 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1306 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1307 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1308 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1311 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1312 static inline BOOL
is_ncnamechar(WCHAR ch
)
1314 return (ch
>= 'A' && ch
<= 'Z') ||
1315 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1316 (ch
== '-') || (ch
== '.') ||
1317 (ch
>= '0' && ch
<= '9') ||
1319 (ch
>= 0xc0 && ch
<= 0xd6) ||
1320 (ch
>= 0xd8 && ch
<= 0xf6) ||
1321 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1322 (ch
>= 0x300 && ch
<= 0x36f) ||
1323 (ch
>= 0x370 && ch
<= 0x37d) ||
1324 (ch
>= 0x37f && ch
<= 0x1fff) ||
1325 (ch
>= 0x200c && ch
<= 0x200d) ||
1326 (ch
>= 0x203f && ch
<= 0x2040) ||
1327 (ch
>= 0x2070 && ch
<= 0x218f) ||
1328 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1329 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1330 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1331 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1332 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1333 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1336 static inline BOOL
is_namechar(WCHAR ch
)
1338 return (ch
== ':') || is_ncnamechar(ch
);
1341 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1343 /* When we're on attribute always return attribute type, container node type is kept.
1344 Note that container is not necessarily an element, and attribute doesn't mean it's
1345 an attribute in XML spec terms. */
1346 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1349 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1350 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1351 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1352 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1353 [5] Name ::= NameStartChar (NameChar)* */
1354 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1359 if (reader
->resume
[XmlReadResume_Name
])
1361 start
= reader
->resume
[XmlReadResume_Name
];
1362 ptr
= reader_get_ptr(reader
);
1366 ptr
= reader_get_ptr(reader
);
1367 start
= reader_get_cur(reader
);
1368 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1371 while (is_namechar(*ptr
))
1373 reader_skipn(reader
, 1);
1374 ptr
= reader_get_ptr(reader
);
1377 if (is_reader_pending(reader
))
1379 reader
->resume
[XmlReadResume_Name
] = start
;
1383 reader
->resume
[XmlReadResume_Name
] = 0;
1385 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1386 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1391 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1392 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1394 static const WCHAR xmlW
[] = {'x','m','l'};
1395 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1401 hr
= reader_parse_name(reader
, &name
);
1402 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1404 /* now that we got name check for illegal content */
1405 if (strval_eq(reader
, &name
, &xmlval
))
1406 return WC_E_LEADINGXML
;
1408 /* PITarget can't be a qualified name */
1409 ptr
= reader_get_strptr(reader
, &name
);
1410 for (i
= 0; i
< name
.len
; i
++)
1412 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1414 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1419 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1420 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1427 switch (reader
->resumestate
)
1429 case XmlReadResumeState_Initial
:
1431 reader_skipn(reader
, 2);
1432 reader_shrink(reader
);
1433 reader
->resumestate
= XmlReadResumeState_PITarget
;
1434 case XmlReadResumeState_PITarget
:
1435 hr
= reader_parse_pitarget(reader
, &target
);
1436 if (FAILED(hr
)) return hr
;
1437 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1438 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1439 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1440 reader
->resumestate
= XmlReadResumeState_PIBody
;
1441 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1446 start
= reader
->resume
[XmlReadResume_Body
];
1447 ptr
= reader_get_ptr(reader
);
1454 UINT cur
= reader_get_cur(reader
);
1457 /* strip all leading whitespace chars */
1460 ptr
= reader_get_ptr2(reader
, start
);
1461 if (!is_wchar_space(*ptr
)) break;
1465 reader_init_strvalue(start
, cur
-start
, &value
);
1468 reader_skipn(reader
, 2);
1469 TRACE("%s\n", debug_strval(reader
, &value
));
1470 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1471 reader
->resumestate
= XmlReadResumeState_Initial
;
1472 reader
->resume
[XmlReadResume_Body
] = 0;
1473 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1478 reader_skipn(reader
, 1);
1479 ptr
= reader_get_ptr(reader
);
1485 /* This one is used to parse significant whitespace nodes, like in Misc production */
1486 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1488 switch (reader
->resumestate
)
1490 case XmlReadResumeState_Initial
:
1491 reader_shrink(reader
);
1492 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1493 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1494 reader
->nodetype
= XmlNodeType_Whitespace
;
1495 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1496 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1497 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1499 case XmlReadResumeState_Whitespace
:
1504 reader_skipspaces(reader
);
1505 if (is_reader_pending(reader
)) return S_OK
;
1507 start
= reader
->resume
[XmlReadResume_Body
];
1508 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1509 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1510 TRACE("%s\n", debug_strval(reader
, &value
));
1511 reader
->resumestate
= XmlReadResumeState_Initial
;
1520 /* [27] Misc ::= Comment | PI | S */
1521 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1523 HRESULT hr
= S_FALSE
;
1525 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1527 hr
= reader_more(reader
);
1528 if (FAILED(hr
)) return hr
;
1530 /* finish current node */
1531 switch (reader
->resumestate
)
1533 case XmlReadResumeState_PITarget
:
1534 case XmlReadResumeState_PIBody
:
1535 return reader_parse_pi(reader
);
1536 case XmlReadResumeState_Comment
:
1537 return reader_parse_comment(reader
);
1538 case XmlReadResumeState_Whitespace
:
1539 return reader_parse_whitespace(reader
);
1541 ERR("unknown resume state %d\n", reader
->resumestate
);
1547 const WCHAR
*cur
= reader_get_ptr(reader
);
1549 if (is_wchar_space(*cur
))
1550 hr
= reader_parse_whitespace(reader
);
1551 else if (!reader_cmp(reader
, commentW
))
1552 hr
= reader_parse_comment(reader
);
1553 else if (!reader_cmp(reader
, piW
))
1554 hr
= reader_parse_pi(reader
);
1558 if (hr
!= S_FALSE
) return hr
;
1564 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1565 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1567 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1570 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1573 reader_skipn(reader
, 1);
1575 cur
= reader_get_ptr(reader
);
1576 start
= reader_get_cur(reader
);
1577 while (is_char(*cur
) && *cur
!= quote
)
1579 reader_skipn(reader
, 1);
1580 cur
= reader_get_ptr(reader
);
1582 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1583 if (*cur
== quote
) reader_skipn(reader
, 1);
1585 TRACE("%s\n", debug_strval(reader
, literal
));
1589 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1590 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1591 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1593 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1596 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1599 reader_skipn(reader
, 1);
1601 start
= reader_get_cur(reader
);
1602 cur
= reader_get_ptr(reader
);
1603 while (is_pubchar(*cur
) && *cur
!= quote
)
1605 reader_skipn(reader
, 1);
1606 cur
= reader_get_ptr(reader
);
1609 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1610 TRACE("%s\n", debug_strval(reader
, literal
));
1614 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1615 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1617 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1618 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1623 if (reader_cmp(reader
, systemW
))
1625 if (reader_cmp(reader
, publicW
))
1632 reader_skipn(reader
, 6);
1633 cnt
= reader_skipspaces(reader
);
1634 if (!cnt
) return WC_E_WHITESPACE
;
1636 hr
= reader_parse_pub_literal(reader
, &pub
);
1637 if (FAILED(hr
)) return hr
;
1639 reader_init_cstrvalue(publicW
, strlenW(publicW
), &name
);
1640 return reader_add_attr(reader
, &name
, &pub
);
1648 reader_skipn(reader
, 6);
1649 cnt
= reader_skipspaces(reader
);
1650 if (!cnt
) return WC_E_WHITESPACE
;
1652 hr
= reader_parse_sys_literal(reader
, &sys
);
1653 if (FAILED(hr
)) return hr
;
1655 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1656 return reader_add_attr(reader
, &name
, &sys
);
1662 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1663 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1665 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1670 /* check if we have "<!DOCTYPE" */
1671 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1672 reader_shrink(reader
);
1674 /* DTD processing is not allowed by default */
1675 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1677 reader_skipn(reader
, 9);
1678 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1681 hr
= reader_parse_name(reader
, &name
);
1682 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1684 reader_skipspaces(reader
);
1686 hr
= reader_parse_externalid(reader
);
1687 if (FAILED(hr
)) return hr
;
1689 reader_skipspaces(reader
);
1691 cur
= reader_get_ptr(reader
);
1694 FIXME("internal subset parsing not implemented\n");
1699 reader_skipn(reader
, 1);
1701 reader
->nodetype
= XmlNodeType_DocumentType
;
1702 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1703 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1708 /* [11 NS] LocalPart ::= NCName */
1709 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
)
1714 if (reader
->resume
[XmlReadResume_Local
])
1716 start
= reader
->resume
[XmlReadResume_Local
];
1717 ptr
= reader_get_ptr(reader
);
1721 ptr
= reader_get_ptr(reader
);
1722 start
= reader_get_cur(reader
);
1725 while (is_ncnamechar(*ptr
))
1727 reader_skipn(reader
, 1);
1728 ptr
= reader_get_ptr(reader
);
1731 if (is_reader_pending(reader
))
1733 reader
->resume
[XmlReadResume_Local
] = start
;
1737 reader
->resume
[XmlReadResume_Local
] = 0;
1739 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1744 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1745 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1746 [9 NS] UnprefixedName ::= LocalPart
1747 [10 NS] Prefix ::= NCName */
1748 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1754 if (reader
->resume
[XmlReadResume_Name
])
1756 start
= reader
->resume
[XmlReadResume_Name
];
1757 ptr
= reader_get_ptr(reader
);
1761 ptr
= reader_get_ptr(reader
);
1762 start
= reader_get_cur(reader
);
1763 reader
->resume
[XmlReadResume_Name
] = start
;
1764 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1767 if (reader
->resume
[XmlReadResume_Local
])
1769 hr
= reader_parse_local(reader
, local
);
1770 if (FAILED(hr
)) return hr
;
1772 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1773 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1778 /* skip prefix part */
1779 while (is_ncnamechar(*ptr
))
1781 reader_skipn(reader
, 1);
1782 ptr
= reader_get_ptr(reader
);
1785 if (is_reader_pending(reader
)) return E_PENDING
;
1787 /* got a qualified name */
1790 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
1793 reader_skipn(reader
, 1);
1794 hr
= reader_parse_local(reader
, local
);
1795 if (FAILED(hr
)) return hr
;
1799 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
1800 reader_init_strvalue(0, 0, prefix
);
1804 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1807 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
1809 TRACE("ncname %s\n", debug_strval(reader
, local
));
1811 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
1813 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
1816 reader
->resume
[XmlReadResume_Name
] = 0;
1817 reader
->resume
[XmlReadResume_Local
] = 0;
1822 /* Applies normalization rules to a single char, used for attribute values.
1824 Rules include 2 steps:
1826 1) replacing \r\n with a single \n;
1827 2) replacing all whitespace chars with ' '.
1830 static void reader_normalize_space(xmlreader
*reader
, WCHAR
*ptr
)
1832 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1834 if (!is_wchar_space(*ptr
)) return;
1836 if (*ptr
== '\r' && *(ptr
+1) == '\n')
1838 int len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - 2*sizeof(WCHAR
);
1839 memmove(ptr
+1, ptr
+2, len
);
1844 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
1846 static const WCHAR entltW
[] = {'l','t'};
1847 static const WCHAR entgtW
[] = {'g','t'};
1848 static const WCHAR entampW
[] = {'a','m','p'};
1849 static const WCHAR entaposW
[] = {'a','p','o','s'};
1850 static const WCHAR entquotW
[] = {'q','u','o','t'};
1851 static const strval lt
= { (WCHAR
*)entltW
, 2 };
1852 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
1853 static const strval amp
= { (WCHAR
*)entampW
, 3 };
1854 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
1855 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
1856 WCHAR
*str
= reader_get_strptr(reader
, name
);
1861 if (strval_eq(reader
, name
, <
)) return '<';
1864 if (strval_eq(reader
, name
, >
)) return '>';
1867 if (strval_eq(reader
, name
, &
))
1869 else if (strval_eq(reader
, name
, &apos
))
1873 if (strval_eq(reader
, name
, "
)) return '\"';
1882 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1883 [67] Reference ::= EntityRef | CharRef
1884 [68] EntityRef ::= '&' Name ';' */
1885 static HRESULT
reader_parse_reference(xmlreader
*reader
)
1887 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1888 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1889 UINT cur
= reader_get_cur(reader
);
1894 reader_skipn(reader
, 1);
1895 ptr
= reader_get_ptr(reader
);
1899 reader_skipn(reader
, 1);
1900 ptr
= reader_get_ptr(reader
);
1902 /* hex char or decimal */
1905 reader_skipn(reader
, 1);
1906 ptr
= reader_get_ptr(reader
);
1910 if ((*ptr
>= '0' && *ptr
<= '9'))
1911 ch
= ch
*16 + *ptr
- '0';
1912 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
1913 ch
= ch
*16 + *ptr
- 'a' + 10;
1914 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
1915 ch
= ch
*16 + *ptr
- 'A' + 10;
1917 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
1918 reader_skipn(reader
, 1);
1919 ptr
= reader_get_ptr(reader
);
1926 if ((*ptr
>= '0' && *ptr
<= '9'))
1928 ch
= ch
*10 + *ptr
- '0';
1929 reader_skipn(reader
, 1);
1930 ptr
= reader_get_ptr(reader
);
1933 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
1937 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
1940 if (is_wchar_space(ch
)) ch
= ' ';
1942 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1943 memmove(start
+1, ptr
+1, len
);
1944 buffer
->cur
= cur
+ 1;
1953 hr
= reader_parse_name(reader
, &name
);
1954 if (FAILED(hr
)) return hr
;
1956 ptr
= reader_get_ptr(reader
);
1957 if (*ptr
!= ';') return WC_E_SEMICOLON
;
1959 /* predefined entities resolve to a single character */
1960 ch
= get_predefined_entity(reader
, &name
);
1963 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1964 memmove(start
+1, ptr
+1, len
);
1965 buffer
->cur
= cur
+ 1;
1971 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
1972 return WC_E_UNDECLAREDENTITY
;
1980 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1981 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
1986 ptr
= reader_get_ptr(reader
);
1988 /* skip opening quote */
1990 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
1991 reader_skipn(reader
, 1);
1993 ptr
= reader_get_ptr(reader
);
1994 start
= reader_get_cur(reader
);
1997 if (*ptr
== '<') return WC_E_LESSTHAN
;
2001 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
2002 /* skip closing quote */
2003 reader_skipn(reader
, 1);
2009 HRESULT hr
= reader_parse_reference(reader
);
2010 if (FAILED(hr
)) return hr
;
2014 reader_normalize_space(reader
, ptr
);
2015 reader_skipn(reader
, 1);
2017 ptr
= reader_get_ptr(reader
);
2023 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2024 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2025 [3 NS] DefaultAttName ::= 'xmlns'
2026 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2027 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2029 static const WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
2030 strval prefix
, local
, qname
, xmlns
, value
;
2033 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2034 if (FAILED(hr
)) return hr
;
2036 reader_init_cstrvalue((WCHAR
*)xmlnsW
, 5, &xmlns
);
2038 if (strval_eq(reader
, &prefix
, &xmlns
))
2040 FIXME("namespace definitions not supported\n");
2044 if (strval_eq(reader
, &qname
, &xmlns
))
2046 FIXME("default namespace definitions not supported\n");
2050 hr
= reader_parse_eq(reader
);
2051 if (FAILED(hr
)) return hr
;
2053 hr
= reader_parse_attvalue(reader
, &value
);
2054 if (FAILED(hr
)) return hr
;
2056 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2057 return reader_add_attr(reader
, &local
, &value
);
2060 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2061 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2062 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
, int *empty
)
2066 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2067 if (FAILED(hr
)) return hr
;
2071 static const WCHAR endW
[] = {'/','>',0};
2073 reader_skipspaces(reader
);
2076 if ((*empty
= !reader_cmp(reader
, endW
)))
2079 reader_skipn(reader
, 2);
2080 reader
->empty_element
= TRUE
;
2084 /* got a start tag */
2085 if (!reader_cmp(reader
, gtW
))
2088 reader_skipn(reader
, 1);
2089 return reader_push_element(reader
, qname
, local
);
2092 hr
= reader_parse_attribute(reader
);
2093 if (FAILED(hr
)) return hr
;
2099 /* [39] element ::= EmptyElemTag | STag content ETag */
2100 static HRESULT
reader_parse_element(xmlreader
*reader
)
2104 switch (reader
->resumestate
)
2106 case XmlReadResumeState_Initial
:
2107 /* check if we are really on element */
2108 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2111 reader_skipn(reader
, 1);
2113 reader_shrink(reader
);
2114 reader
->resumestate
= XmlReadResumeState_STag
;
2115 case XmlReadResumeState_STag
:
2117 strval qname
, prefix
, local
;
2120 /* this handles empty elements too */
2121 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
, &empty
);
2122 if (FAILED(hr
)) return hr
;
2124 /* FIXME: need to check for defined namespace to reject invalid prefix,
2125 currently reject all prefixes */
2126 if (prefix
.len
) return NC_E_UNDECLAREDPREFIX
;
2128 /* if we got empty element and stack is empty go straight to Misc */
2129 if (empty
&& list_empty(&reader
->elements
))
2130 reader
->instate
= XmlReadInState_MiscEnd
;
2132 reader
->instate
= XmlReadInState_Content
;
2134 reader
->nodetype
= XmlNodeType_Element
;
2135 reader
->resumestate
= XmlReadResumeState_Initial
;
2136 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2137 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2138 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2148 /* [13 NS] ETag ::= '</' QName S? '>' */
2149 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2151 strval prefix
, local
, qname
;
2152 struct element
*elem
;
2156 reader_skipn(reader
, 2);
2158 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2159 if (FAILED(hr
)) return hr
;
2161 reader_skipspaces(reader
);
2163 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2166 reader_skipn(reader
, 1);
2168 /* Element stack should never be empty at this point, cause we shouldn't get to
2169 content parsing if it's empty. */
2170 elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2171 if (!strval_eq(reader
, &elem
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2173 reader_pop_element(reader
);
2175 /* It was a root element, the rest is expected as Misc */
2176 if (list_empty(&reader
->elements
))
2177 reader
->instate
= XmlReadInState_MiscEnd
;
2179 reader
->nodetype
= XmlNodeType_EndElement
;
2180 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2181 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2186 /* [18] CDSect ::= CDStart CData CDEnd
2187 [19] CDStart ::= '<![CDATA['
2188 [20] CData ::= (Char* - (Char* ']]>' Char*))
2189 [21] CDEnd ::= ']]>' */
2190 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2195 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2197 start
= reader
->resume
[XmlReadResume_Body
];
2198 ptr
= reader_get_ptr(reader
);
2202 /* skip markup '<![CDATA[' */
2203 reader_skipn(reader
, 9);
2204 reader_shrink(reader
);
2205 ptr
= reader_get_ptr(reader
);
2206 start
= reader_get_cur(reader
);
2207 reader
->nodetype
= XmlNodeType_CDATA
;
2208 reader
->resume
[XmlReadResume_Body
] = start
;
2209 reader
->resumestate
= XmlReadResumeState_CDATA
;
2210 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
2211 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
2212 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2217 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2221 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2224 reader_skipn(reader
, 3);
2225 TRACE("%s\n", debug_strval(reader
, &value
));
2227 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2228 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2229 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2230 reader
->resume
[XmlReadResume_Body
] = 0;
2231 reader
->resumestate
= XmlReadResumeState_Initial
;
2236 /* Value normalization is not fully implemented, rules are:
2238 - single '\r' -> '\n';
2239 - sequence '\r\n' -> '\n', in this case value length changes;
2241 if (*ptr
== '\r') *ptr
= '\n';
2242 reader_skipn(reader
, 1);
2250 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2251 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2256 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2258 start
= reader
->resume
[XmlReadResume_Body
];
2259 ptr
= reader_get_ptr(reader
);
2263 reader_shrink(reader
);
2264 ptr
= reader_get_ptr(reader
);
2265 start
= reader_get_cur(reader
);
2266 /* There's no text */
2267 if (!*ptr
|| *ptr
== '<') return S_OK
;
2268 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2269 reader
->resume
[XmlReadResume_Body
] = start
;
2270 reader
->resumestate
= XmlReadResumeState_CharData
;
2271 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2272 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2273 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2278 /* CDATA closing sequence ']]>' is not allowed */
2279 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2280 return WC_E_CDSECTEND
;
2282 /* Found next markup part */
2287 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2288 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2289 reader
->resume
[XmlReadResume_Body
] = 0;
2290 reader
->resumestate
= XmlReadResumeState_Initial
;
2294 reader_skipn(reader
, 1);
2296 /* this covers a case when text has leading whitespace chars */
2297 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2304 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2305 static HRESULT
reader_parse_content(xmlreader
*reader
)
2307 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2308 static const WCHAR etagW
[] = {'<','/',0};
2309 static const WCHAR ampW
[] = {'&',0};
2311 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2313 switch (reader
->resumestate
)
2315 case XmlReadResumeState_CDATA
:
2316 return reader_parse_cdata(reader
);
2317 case XmlReadResumeState_Comment
:
2318 return reader_parse_comment(reader
);
2319 case XmlReadResumeState_PIBody
:
2320 case XmlReadResumeState_PITarget
:
2321 return reader_parse_pi(reader
);
2322 case XmlReadResumeState_CharData
:
2323 return reader_parse_chardata(reader
);
2325 ERR("unknown resume state %d\n", reader
->resumestate
);
2329 reader_shrink(reader
);
2331 /* handle end tag here, it indicates end of content as well */
2332 if (!reader_cmp(reader
, etagW
))
2333 return reader_parse_endtag(reader
);
2335 if (!reader_cmp(reader
, commentW
))
2336 return reader_parse_comment(reader
);
2338 if (!reader_cmp(reader
, piW
))
2339 return reader_parse_pi(reader
);
2341 if (!reader_cmp(reader
, cdstartW
))
2342 return reader_parse_cdata(reader
);
2344 if (!reader_cmp(reader
, ampW
))
2345 return reader_parse_reference(reader
);
2347 if (!reader_cmp(reader
, ltW
))
2348 return reader_parse_element(reader
);
2350 /* what's left must be CharData */
2351 return reader_parse_chardata(reader
);
2354 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2358 if (!is_reader_pending(reader
))
2359 reader_clear_attrs(reader
);
2363 switch (reader
->instate
)
2365 /* if it's a first call for a new input we need to detect stream encoding */
2366 case XmlReadInState_Initial
:
2370 hr
= readerinput_growraw(reader
->input
);
2371 if (FAILED(hr
)) return hr
;
2373 /* try to detect encoding by BOM or data and set input code page */
2374 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2375 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2376 if (FAILED(hr
)) return hr
;
2378 /* always switch first time cause we have to put something in */
2379 readerinput_switchencoding(reader
->input
, enc
);
2381 /* parse xml declaration */
2382 hr
= reader_parse_xmldecl(reader
);
2383 if (FAILED(hr
)) return hr
;
2385 readerinput_shrinkraw(reader
->input
, -1);
2386 reader
->instate
= XmlReadInState_Misc_DTD
;
2387 if (hr
== S_OK
) return hr
;
2390 case XmlReadInState_Misc_DTD
:
2391 hr
= reader_parse_misc(reader
);
2392 if (FAILED(hr
)) return hr
;
2395 reader
->instate
= XmlReadInState_DTD
;
2399 case XmlReadInState_DTD
:
2400 hr
= reader_parse_dtd(reader
);
2401 if (FAILED(hr
)) return hr
;
2405 reader
->instate
= XmlReadInState_DTD_Misc
;
2409 reader
->instate
= XmlReadInState_Element
;
2411 case XmlReadInState_DTD_Misc
:
2412 hr
= reader_parse_misc(reader
);
2413 if (FAILED(hr
)) return hr
;
2416 reader
->instate
= XmlReadInState_Element
;
2420 case XmlReadInState_Element
:
2421 return reader_parse_element(reader
);
2422 case XmlReadInState_Content
:
2423 return reader_parse_content(reader
);
2424 case XmlReadInState_MiscEnd
:
2425 hr
= reader_parse_misc(reader
);
2426 if (FAILED(hr
)) return hr
;
2429 reader
->instate
= XmlReadInState_Eof
;
2431 case XmlReadInState_Eof
:
2434 FIXME("internal state %d not handled\n", reader
->instate
);
2442 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2444 xmlreader
*This
= impl_from_IXmlReader(iface
);
2446 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2448 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2449 IsEqualGUID(riid
, &IID_IXmlReader
))
2455 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2457 return E_NOINTERFACE
;
2460 IXmlReader_AddRef(iface
);
2465 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2467 xmlreader
*This
= impl_from_IXmlReader(iface
);
2468 ULONG ref
= InterlockedIncrement(&This
->ref
);
2469 TRACE("(%p)->(%d)\n", This
, ref
);
2473 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2475 xmlreader
*This
= impl_from_IXmlReader(iface
);
2476 LONG ref
= InterlockedDecrement(&This
->ref
);
2478 TRACE("(%p)->(%d)\n", This
, ref
);
2482 IMalloc
*imalloc
= This
->imalloc
;
2483 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2484 reader_clear_attrs(This
);
2485 reader_clear_elements(This
);
2486 reader_free_strvalues(This
);
2487 reader_free(This
, This
);
2488 if (imalloc
) IMalloc_Release(imalloc
);
2494 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2496 xmlreader
*This
= impl_from_IXmlReader(iface
);
2497 IXmlReaderInput
*readerinput
;
2500 TRACE("(%p)->(%p)\n", This
, input
);
2504 readerinput_release_stream(This
->input
);
2505 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2509 This
->line
= This
->pos
= 0;
2510 reader_clear_elements(This
);
2512 This
->resumestate
= XmlReadResumeState_Initial
;
2513 memset(This
->resume
, 0, sizeof(This
->resume
));
2515 /* just reset current input */
2518 This
->state
= XmlReadState_Initial
;
2522 /* now try IXmlReaderInput, ISequentialStream, IStream */
2523 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2526 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2527 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2530 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2531 readerinput
, readerinput
->lpVtbl
);
2532 IUnknown_Release(readerinput
);
2538 if (hr
!= S_OK
|| !readerinput
)
2540 /* create IXmlReaderInput basing on supplied interface */
2541 hr
= CreateXmlReaderInputWithEncodingName(input
,
2542 NULL
, NULL
, FALSE
, NULL
, &readerinput
);
2543 if (hr
!= S_OK
) return hr
;
2544 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2547 /* set stream for supplied IXmlReaderInput */
2548 hr
= readerinput_query_for_stream(This
->input
);
2551 This
->state
= XmlReadState_Initial
;
2552 This
->instate
= XmlReadInState_Initial
;
2558 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2560 xmlreader
*This
= impl_from_IXmlReader(iface
);
2562 TRACE("(%p)->(%s %p)\n", This
, debugstr_prop(property
), value
);
2564 if (!value
) return E_INVALIDARG
;
2568 case XmlReaderProperty_DtdProcessing
:
2569 *value
= This
->dtdmode
;
2571 case XmlReaderProperty_ReadState
:
2572 *value
= This
->state
;
2575 FIXME("Unimplemented property (%u)\n", property
);
2582 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2584 xmlreader
*This
= impl_from_IXmlReader(iface
);
2586 TRACE("(%p)->(%s %lu)\n", This
, debugstr_prop(property
), value
);
2590 case XmlReaderProperty_DtdProcessing
:
2591 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2592 This
->dtdmode
= value
;
2595 FIXME("Unimplemented property (%u)\n", property
);
2602 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2604 xmlreader
*This
= impl_from_IXmlReader(iface
);
2605 XmlNodeType oldtype
= This
->nodetype
;
2608 TRACE("(%p)->(%p)\n", This
, nodetype
);
2610 if (This
->state
== XmlReadState_Closed
) return S_FALSE
;
2612 hr
= reader_parse_nextnode(This
);
2613 if (oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2614 This
->state
= XmlReadState_Interactive
;
2617 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2618 *nodetype
= This
->nodetype
;
2624 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2626 xmlreader
*This
= impl_from_IXmlReader(iface
);
2627 TRACE("(%p)->(%p)\n", This
, node_type
);
2629 *node_type
= reader_get_nodetype(This
);
2630 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2633 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2635 xmlreader
*This
= impl_from_IXmlReader(iface
);
2637 TRACE("(%p)\n", This
);
2639 if (!This
->attr_count
) return S_FALSE
;
2640 This
->attr
= LIST_ENTRY(list_head(&This
->attrs
), struct attribute
, entry
);
2641 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2642 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2647 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2649 xmlreader
*This
= impl_from_IXmlReader(iface
);
2650 const struct list
*next
;
2652 TRACE("(%p)\n", This
);
2654 if (!This
->attr_count
) return S_FALSE
;
2657 return IXmlReader_MoveToFirstAttribute(iface
);
2659 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2662 This
->attr
= LIST_ENTRY(next
, struct attribute
, entry
);
2663 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2664 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2667 return next
? S_OK
: S_FALSE
;
2670 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
2672 LPCWSTR namespaceUri
)
2674 FIXME("(%p %p %p): stub\n", iface
, local_name
, namespaceUri
);
2678 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
2680 xmlreader
*This
= impl_from_IXmlReader(iface
);
2681 struct element
*elem
;
2683 TRACE("(%p)\n", This
);
2685 if (!This
->attr_count
) return S_FALSE
;
2688 /* FIXME: support other node types with 'attributes' like DTD */
2689 elem
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
2692 reader_set_strvalue(This
, StringValue_QualifiedName
, &elem
->qname
);
2693 reader_set_strvalue(This
, StringValue_LocalName
, &elem
->localname
);
2699 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2701 xmlreader
*This
= impl_from_IXmlReader(iface
);
2703 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2704 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
2705 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
2709 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
,
2710 LPCWSTR
*namespaceUri
,
2711 UINT
*namespaceUri_length
)
2713 FIXME("(%p %p %p): stub\n", iface
, namespaceUri
, namespaceUri_length
);
2717 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2719 xmlreader
*This
= impl_from_IXmlReader(iface
);
2721 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2722 *name
= This
->strvalues
[StringValue_LocalName
].str
;
2723 if (len
) *len
= This
->strvalues
[StringValue_LocalName
].len
;
2727 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, LPCWSTR
*prefix
, UINT
*len
)
2729 xmlreader
*This
= impl_from_IXmlReader(iface
);
2731 TRACE("(%p)->(%p %p)\n", This
, prefix
, len
);
2732 *prefix
= This
->strvalues
[StringValue_Prefix
].str
;
2733 if (len
) *len
= This
->strvalues
[StringValue_Prefix
].len
;
2737 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
2739 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2740 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2742 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
2746 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
) || is_reader_pending(reader
))
2751 hr
= IXmlReader_Read(iface
, &type
);
2752 if (FAILED(hr
)) return hr
;
2754 /* return if still pending, partially read values are not reported */
2755 if (is_reader_pending(reader
)) return E_PENDING
;
2760 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
2761 if (!ptr
) return E_OUTOFMEMORY
;
2762 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
2768 if (len
) *len
= val
->len
;
2772 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
2774 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2775 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2778 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
2780 /* Value is already allocated, chunked reads are not possible. */
2781 if (val
->str
) return S_FALSE
;
2785 len
= min(chunk_size
, val
->len
);
2786 memcpy(buffer
, reader_get_ptr2(reader
, val
->start
), len
);
2789 if (read
) *read
= len
;
2795 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
2797 UINT
*baseUri_length
)
2799 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
2803 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
2805 FIXME("(%p): stub\n", iface
);
2809 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
2811 xmlreader
*This
= impl_from_IXmlReader(iface
);
2812 TRACE("(%p)\n", This
);
2813 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2814 when current node is start tag of an element */
2815 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->empty_element
: FALSE
;
2818 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*lineNumber
)
2820 xmlreader
*This
= impl_from_IXmlReader(iface
);
2822 TRACE("(%p %p)\n", This
, lineNumber
);
2824 if (!lineNumber
) return E_INVALIDARG
;
2826 *lineNumber
= This
->line
;
2831 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*linePosition
)
2833 xmlreader
*This
= impl_from_IXmlReader(iface
);
2835 TRACE("(%p %p)\n", This
, linePosition
);
2837 if (!linePosition
) return E_INVALIDARG
;
2839 *linePosition
= This
->pos
;
2844 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
2846 xmlreader
*This
= impl_from_IXmlReader(iface
);
2848 TRACE("(%p)->(%p)\n", This
, count
);
2850 if (!count
) return E_INVALIDARG
;
2852 *count
= This
->attr_count
;
2856 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
2858 xmlreader
*This
= impl_from_IXmlReader(iface
);
2859 TRACE("(%p)->(%p)\n", This
, depth
);
2860 *depth
= This
->depth
;
2864 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
2866 FIXME("(%p): stub\n", iface
);
2870 static const struct IXmlReaderVtbl xmlreader_vtbl
=
2872 xmlreader_QueryInterface
,
2876 xmlreader_GetProperty
,
2877 xmlreader_SetProperty
,
2879 xmlreader_GetNodeType
,
2880 xmlreader_MoveToFirstAttribute
,
2881 xmlreader_MoveToNextAttribute
,
2882 xmlreader_MoveToAttributeByName
,
2883 xmlreader_MoveToElement
,
2884 xmlreader_GetQualifiedName
,
2885 xmlreader_GetNamespaceUri
,
2886 xmlreader_GetLocalName
,
2887 xmlreader_GetPrefix
,
2889 xmlreader_ReadValueChunk
,
2890 xmlreader_GetBaseUri
,
2891 xmlreader_IsDefault
,
2892 xmlreader_IsEmptyElement
,
2893 xmlreader_GetLineNumber
,
2894 xmlreader_GetLinePosition
,
2895 xmlreader_GetAttributeCount
,
2900 /** IXmlReaderInput **/
2901 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
2903 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2905 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2907 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
2908 IsEqualGUID(riid
, &IID_IUnknown
))
2914 WARN("interface %s not implemented\n", debugstr_guid(riid
));
2916 return E_NOINTERFACE
;
2919 IUnknown_AddRef(iface
);
2924 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
2926 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2927 ULONG ref
= InterlockedIncrement(&This
->ref
);
2928 TRACE("(%p)->(%d)\n", This
, ref
);
2932 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
2934 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2935 LONG ref
= InterlockedDecrement(&This
->ref
);
2937 TRACE("(%p)->(%d)\n", This
, ref
);
2941 IMalloc
*imalloc
= This
->imalloc
;
2942 if (This
->input
) IUnknown_Release(This
->input
);
2943 if (This
->stream
) ISequentialStream_Release(This
->stream
);
2944 if (This
->buffer
) free_input_buffer(This
->buffer
);
2945 readerinput_free(This
, This
->baseuri
);
2946 readerinput_free(This
, This
);
2947 if (imalloc
) IMalloc_Release(imalloc
);
2953 static const struct IUnknownVtbl xmlreaderinputvtbl
=
2955 xmlreaderinput_QueryInterface
,
2956 xmlreaderinput_AddRef
,
2957 xmlreaderinput_Release
2960 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
2965 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
2967 if (!IsEqualGUID(riid
, &IID_IXmlReader
))
2969 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid
));
2974 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
2976 reader
= heap_alloc(sizeof(*reader
));
2977 if(!reader
) return E_OUTOFMEMORY
;
2979 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
2981 reader
->input
= NULL
;
2982 reader
->state
= XmlReadState_Closed
;
2983 reader
->instate
= XmlReadInState_Initial
;
2984 reader
->resumestate
= XmlReadResumeState_Initial
;
2985 reader
->dtdmode
= DtdProcessing_Prohibit
;
2986 reader
->line
= reader
->pos
= 0;
2987 reader
->imalloc
= imalloc
;
2988 if (imalloc
) IMalloc_AddRef(imalloc
);
2989 reader
->nodetype
= XmlNodeType_None
;
2990 list_init(&reader
->attrs
);
2991 reader
->attr_count
= 0;
2992 reader
->attr
= NULL
;
2993 list_init(&reader
->elements
);
2995 reader
->max_depth
= 256;
2996 reader
->empty_element
= FALSE
;
2997 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2999 for (i
= 0; i
< StringValue_Last
; i
++)
3000 reader
->strvalues
[i
] = strval_empty
;
3002 *obj
= &reader
->IXmlReader_iface
;
3004 TRACE("returning iface %p\n", *obj
);
3009 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
3014 IXmlReaderInput
**ppInput
)
3016 xmlreaderinput
*readerinput
;
3019 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3020 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3022 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3025 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3027 readerinput
= heap_alloc(sizeof(*readerinput
));
3028 if(!readerinput
) return E_OUTOFMEMORY
;
3030 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3031 readerinput
->ref
= 1;
3032 readerinput
->imalloc
= imalloc
;
3033 readerinput
->stream
= NULL
;
3034 if (imalloc
) IMalloc_AddRef(imalloc
);
3035 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3036 readerinput
->hint
= hint
;
3037 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3038 readerinput
->pending
= 0;
3040 hr
= alloc_input_buffer(readerinput
);
3043 readerinput_free(readerinput
, readerinput
->baseuri
);
3044 readerinput_free(readerinput
, readerinput
);
3045 if (imalloc
) IMalloc_Release(imalloc
);
3048 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3050 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3052 TRACE("returning iface %p\n", *ppInput
);