2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43 XmlReadInState_Initial
,
44 XmlReadInState_XmlDecl
,
45 XmlReadInState_Misc_DTD
,
47 XmlReadInState_DTD_Misc
,
48 XmlReadInState_Element
,
49 XmlReadInState_Content
,
50 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
52 } XmlReaderInternalState
;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
58 XmlReadResumeState_Initial
,
59 XmlReadResumeState_PITarget
,
60 XmlReadResumeState_PIBody
,
61 XmlReadResumeState_CDATA
,
62 XmlReadResumeState_Comment
,
63 XmlReadResumeState_STag
,
64 XmlReadResumeState_CharData
,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState
;
68 /* saved pointer index to resume from particular input position */
71 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local
, /* local for QName */
73 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
79 StringValue_LocalName
,
81 StringValue_QualifiedName
,
84 } XmlReaderStringValue
;
86 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW
[] = {'\"',0};
90 static const WCHAR quoteW
[] = {'\'',0};
91 static const WCHAR ltW
[] = {'<',0};
92 static const WCHAR gtW
[] = {'>',0};
93 static const WCHAR commentW
[] = {'<','!','-','-',0};
94 static const WCHAR piW
[] = {'<','?',0};
96 static const char *debugstr_nodetype(XmlNodeType nodetype
)
98 static const char * const type_names
[] =
107 "ProcessingInstruction",
120 if (nodetype
> _XmlNodeType_Last
)
121 return wine_dbg_sprintf("unknown type=%d", nodetype
);
123 return type_names
[nodetype
];
126 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
128 static const char * const prop_names
[] =
140 if (prop
> _XmlReaderProperty_Last
)
141 return wine_dbg_sprintf("unknown property=%d", prop
);
143 return prop_names
[prop
];
146 struct xml_encoding_data
153 static const struct xml_encoding_data xml_encoding_map
[] = {
154 { utf16W
, XmlEncoding_UTF16
, ~0 },
155 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
}
158 const WCHAR
*get_encoding_name(xml_encoding encoding
)
160 return xml_encoding_map
[encoding
].name
;
167 unsigned int allocated
;
168 unsigned int written
;
171 typedef struct input_buffer input_buffer
;
175 IXmlReaderInput IXmlReaderInput_iface
;
177 /* reference passed on IXmlReaderInput creation, is kept when input is created */
180 xml_encoding encoding
;
183 /* stream reference set after SetInput() call from reader,
184 stored as sequential stream, cause currently
185 optimizations possible with IStream aren't implemented */
186 ISequentialStream
*stream
;
187 input_buffer
*buffer
;
188 unsigned int pending
: 1;
191 static const struct IUnknownVtbl xmlreaderinputvtbl
;
193 /* Structure to hold parsed string of specific length.
195 Reader stores node value as 'start' pointer, on request
196 a null-terminated version of it is allocated.
198 To init a strval variable use reader_init_strval(),
199 to set strval as a reader value use reader_set_strval().
203 WCHAR
*str
; /* allocated null-terminated string */
204 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
205 UINT start
; /* input position where value starts */
208 static WCHAR emptyW
[] = {0};
209 static const strval strval_empty
= { emptyW
};
227 IXmlReader IXmlReader_iface
;
229 xmlreaderinput
*input
;
232 XmlReaderInternalState instate
;
233 XmlReaderResumeState resumestate
;
234 XmlNodeType nodetype
;
235 DtdProcessing dtdmode
;
236 UINT line
, pos
; /* reader position in XML stream */
237 struct list attrs
; /* attributes list for current node */
238 struct attribute
*attr
; /* current attribute */
240 struct list elements
;
241 strval strvalues
[StringValue_Last
];
245 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
250 encoded_buffer utf16
;
251 encoded_buffer encoded
;
253 xmlreaderinput
*input
;
256 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
258 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
261 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
263 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
266 /* reader memory allocation functions */
267 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
269 return m_alloc(reader
->imalloc
, len
);
272 static inline void reader_free(xmlreader
*reader
, void *mem
)
274 m_free(reader
->imalloc
, mem
);
277 /* Just return pointer from offset, no attempt to read more. */
278 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
280 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
281 return (WCHAR
*)buffer
->data
+ offset
;
284 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
286 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
289 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
293 if (src
->str
!= strval_empty
.str
)
295 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
296 if (!dest
->str
) return E_OUTOFMEMORY
;
297 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
298 dest
->str
[dest
->len
] = 0;
305 /* reader input memory allocation functions */
306 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
308 return m_alloc(input
->imalloc
, len
);
311 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
313 return m_realloc(input
->imalloc
, mem
, len
);
316 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
318 m_free(input
->imalloc
, mem
);
321 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
328 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
329 ret
= readerinput_alloc(input
, size
);
330 if (ret
) memcpy(ret
, str
, size
);
336 static void reader_clear_attrs(xmlreader
*reader
)
338 struct attribute
*attr
, *attr2
;
339 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
341 reader_free(reader
, attr
);
343 list_init(&reader
->attrs
);
344 reader
->attr_count
= 0;
348 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
349 while we are on a node with attributes */
350 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*localname
, strval
*value
)
352 struct attribute
*attr
;
354 attr
= reader_alloc(reader
, sizeof(*attr
));
355 if (!attr
) return E_OUTOFMEMORY
;
357 attr
->localname
= *localname
;
358 attr
->value
= *value
;
359 list_add_tail(&reader
->attrs
, &attr
->entry
);
360 reader
->attr_count
++;
365 /* This one frees stored string value if needed */
366 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
368 if (v
->str
!= strval_empty
.str
)
370 reader_free(reader
, v
->str
);
375 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
382 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
384 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
387 /* used to initialize from constant string */
388 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
395 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
397 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
400 static void reader_free_strvalues(xmlreader
*reader
)
403 for (type
= 0; type
< StringValue_Last
; type
++)
404 reader_free_strvalue(reader
, type
);
407 /* This helper should only be used to test if strings are the same,
408 it doesn't try to sort. */
409 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
411 if (str1
->len
!= str2
->len
) return 0;
412 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
415 static void reader_clear_elements(xmlreader
*reader
)
417 struct element
*elem
, *elem2
;
418 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
420 reader_free_strvalued(reader
, &elem
->qname
);
421 reader_free(reader
, elem
);
423 list_init(&reader
->elements
);
424 reader
->empty_element
= FALSE
;
427 static HRESULT
reader_inc_depth(xmlreader
*reader
)
429 if (++reader
->depth
> reader
->max_depth
) return SC_E_MAXELEMENTDEPTH
;
433 static void reader_dec_depth(xmlreader
*reader
)
435 if (reader
->depth
> 1) reader
->depth
--;
438 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*qname
, strval
*localname
)
440 struct element
*elem
;
443 elem
= reader_alloc(reader
, sizeof(*elem
));
444 if (!elem
) return E_OUTOFMEMORY
;
446 hr
= reader_strvaldup(reader
, qname
, &elem
->qname
);
448 reader_free(reader
, elem
);
452 hr
= reader_strvaldup(reader
, localname
, &elem
->localname
);
455 reader_free_strvalued(reader
, &elem
->qname
);
456 reader_free(reader
, elem
);
460 if (!list_empty(&reader
->elements
))
462 hr
= reader_inc_depth(reader
);
464 reader_free(reader
, elem
);
469 list_add_head(&reader
->elements
, &elem
->entry
);
470 reader
->empty_element
= FALSE
;
474 static void reader_pop_element(xmlreader
*reader
)
476 struct element
*elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
480 list_remove(&elem
->entry
);
481 reader_free_strvalued(reader
, &elem
->qname
);
482 reader_free_strvalued(reader
, &elem
->localname
);
483 reader_free(reader
, elem
);
484 reader_dec_depth(reader
);
488 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
489 means node value is to be determined. */
490 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
492 strval
*v
= &reader
->strvalues
[type
];
494 reader_free_strvalue(reader
, type
);
503 if (value
->str
== strval_empty
.str
)
507 if (type
== StringValue_Value
)
509 /* defer allocation for value string */
511 v
->start
= value
->start
;
516 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
517 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
518 v
->str
[value
->len
] = 0;
524 static inline int is_reader_pending(xmlreader
*reader
)
526 return reader
->input
->pending
;
529 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
531 const int initial_len
= 0x2000;
532 buffer
->data
= readerinput_alloc(input
, initial_len
);
533 if (!buffer
->data
) return E_OUTOFMEMORY
;
535 memset(buffer
->data
, 0, 4);
537 buffer
->allocated
= initial_len
;
543 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
545 readerinput_free(input
, buffer
->data
);
548 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
550 if (encoding
== XmlEncoding_Unknown
)
552 FIXME("unsupported encoding %d\n", encoding
);
556 *cp
= xml_encoding_map
[encoding
].cp
;
561 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
565 if (!name
) return XmlEncoding_Unknown
;
568 max
= sizeof(xml_encoding_map
)/sizeof(struct xml_encoding_data
) - 1;
575 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
577 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
579 return xml_encoding_map
[n
].enc
;
587 return XmlEncoding_Unknown
;
590 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
592 input_buffer
*buffer
;
595 input
->buffer
= NULL
;
597 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
598 if (!buffer
) return E_OUTOFMEMORY
;
600 buffer
->input
= input
;
601 buffer
->code_page
= ~0; /* code page is unknown at this point */
602 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
604 readerinput_free(input
, buffer
);
608 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
610 free_encoded_buffer(input
, &buffer
->utf16
);
611 readerinput_free(input
, buffer
);
615 input
->buffer
= buffer
;
619 static void free_input_buffer(input_buffer
*buffer
)
621 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
622 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
623 readerinput_free(buffer
->input
, buffer
);
626 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
628 if (readerinput
->stream
) {
629 ISequentialStream_Release(readerinput
->stream
);
630 readerinput
->stream
= NULL
;
634 /* Queries already stored interface for IStream/ISequentialStream.
635 Interface supplied on creation will be overwritten */
636 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
640 readerinput_release_stream(readerinput
);
641 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
643 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
648 /* reads a chunk to raw buffer */
649 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
651 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
652 /* to make sure aligned length won't exceed allocated length */
653 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
657 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
658 variable width encodings like UTF-8 */
659 len
= (len
+ 3) & ~3;
660 /* try to use allocated space or grow */
661 if (buffer
->allocated
- buffer
->written
< len
)
663 buffer
->allocated
*= 2;
664 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
665 len
= buffer
->allocated
- buffer
->written
;
669 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
670 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
671 readerinput
->pending
= hr
== E_PENDING
;
672 if (FAILED(hr
)) return hr
;
673 buffer
->written
+= read
;
678 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
679 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
681 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
683 length
*= sizeof(WCHAR
);
684 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
685 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
687 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
688 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
689 buffer
->allocated
= grown_size
;
693 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
695 static const char startA
[] = {'<','?'};
696 static const char commentA
[] = {'<','!'};
697 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
698 unsigned char *ptr
= (unsigned char*)buffer
->data
;
700 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
701 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
702 /* test start byte */
705 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
706 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
707 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
708 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
712 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
714 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
715 static const WCHAR startW
[] = {'<','?'};
716 static const WCHAR commentW
[] = {'<','!'};
717 static const char utf8bom
[] = {0xef,0xbb,0xbf};
718 static const char utf16lebom
[] = {0xff,0xfe};
720 *enc
= XmlEncoding_Unknown
;
722 if (buffer
->written
<= 3)
724 HRESULT hr
= readerinput_growraw(readerinput
);
725 if (FAILED(hr
)) return hr
;
726 if (buffer
->written
<= 3) return MX_E_INPUTEND
;
729 /* try start symbols if we have enough data to do that, input buffer should contain
730 first chunk already */
731 if (readerinput_is_utf8(readerinput
))
732 *enc
= XmlEncoding_UTF8
;
733 else if (!memcmp(buffer
->data
, startW
, sizeof(startW
)) ||
734 !memcmp(buffer
->data
, commentW
, sizeof(commentW
)))
735 *enc
= XmlEncoding_UTF16
;
736 /* try with BOM now */
737 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
739 buffer
->cur
+= sizeof(utf8bom
);
740 *enc
= XmlEncoding_UTF8
;
742 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
744 buffer
->cur
+= sizeof(utf16lebom
);
745 *enc
= XmlEncoding_UTF16
;
751 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
753 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
754 int len
= buffer
->written
;
756 /* complete single byte char */
757 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
759 /* find start byte of multibyte char */
760 while (--len
&& !(buffer
->data
[len
] & 0xc0))
766 /* Returns byte length of complete char sequence for buffer code page,
767 it's relative to current buffer position which is currently used for BOM handling
769 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
771 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
774 if (readerinput
->buffer
->code_page
== CP_UTF8
)
775 len
= readerinput_get_utf8_convlen(readerinput
);
777 len
= buffer
->written
;
779 TRACE("%d\n", len
- buffer
->cur
);
780 return len
- buffer
->cur
;
783 /* It's possible that raw buffer has some leftovers from last conversion - some char
784 sequence that doesn't represent a full code point. Length argument should be calculated with
785 readerinput_get_convlen(), if it's -1 it will be calculated here. */
786 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
788 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
791 len
= readerinput_get_convlen(readerinput
);
793 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
794 /* everything below cur is lost too */
795 buffer
->written
-= len
+ buffer
->cur
;
796 /* after this point we don't need cur offset really,
797 it's used only to mark where actual data begins when first chunk is read */
801 /* note that raw buffer content is kept */
802 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
804 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
805 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
811 hr
= get_code_page(enc
, &cp
);
812 if (FAILED(hr
)) return;
814 readerinput
->buffer
->code_page
= cp
;
815 len
= readerinput_get_convlen(readerinput
);
817 TRACE("switching to cp %d\n", cp
);
819 /* just copy in this case */
820 if (enc
== XmlEncoding_UTF16
)
822 readerinput_grow(readerinput
, len
);
823 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
824 dest
->written
+= len
*sizeof(WCHAR
);
828 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
829 readerinput_grow(readerinput
, dest_len
);
830 ptr
= (WCHAR
*)dest
->data
;
831 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
833 dest
->written
+= dest_len
*sizeof(WCHAR
);
836 /* shrinks parsed data a buffer begins with */
837 static void reader_shrink(xmlreader
*reader
)
839 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
841 /* avoid to move too often using threshold shrink length */
842 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
844 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
845 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
847 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
851 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
852 It won't attempt to shrink but will grow destination buffer if needed */
853 static HRESULT
reader_more(xmlreader
*reader
)
855 xmlreaderinput
*readerinput
= reader
->input
;
856 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
857 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
858 UINT cp
= readerinput
->buffer
->code_page
;
863 /* get some raw data from stream first */
864 hr
= readerinput_growraw(readerinput
);
865 len
= readerinput_get_convlen(readerinput
);
867 /* just copy for UTF-16 case */
870 readerinput_grow(readerinput
, len
);
871 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
872 dest
->written
+= len
*sizeof(WCHAR
);
876 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
877 readerinput_grow(readerinput
, dest_len
);
878 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
879 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
881 dest
->written
+= dest_len
*sizeof(WCHAR
);
882 /* get rid of processed data */
883 readerinput_shrinkraw(readerinput
, len
);
888 static inline UINT
reader_get_cur(xmlreader
*reader
)
890 return reader
->input
->buffer
->utf16
.cur
;
893 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
895 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
896 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
897 if (!*ptr
) reader_more(reader
);
898 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
901 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
903 const WCHAR
*ptr
= reader_get_ptr(reader
);
904 return strncmpW(str
, ptr
, strlenW(str
));
907 /* moves cursor n WCHARs forward */
908 static void reader_skipn(xmlreader
*reader
, int n
)
910 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
911 const WCHAR
*ptr
= reader_get_ptr(reader
);
913 while (*ptr
++ && n
--)
920 static inline BOOL
is_wchar_space(WCHAR ch
)
922 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
925 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
926 static int reader_skipspaces(xmlreader
*reader
)
928 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
929 const WCHAR
*ptr
= reader_get_ptr(reader
);
930 UINT start
= reader_get_cur(reader
);
932 while (is_wchar_space(*ptr
))
936 else if (*ptr
== '\n')
945 ptr
= reader_get_ptr(reader
);
948 return reader_get_cur(reader
) - start
;
951 /* [26] VersionNum ::= '1.' [0-9]+ */
952 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
954 static const WCHAR onedotW
[] = {'1','.',0};
958 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
960 start
= reader_get_cur(reader
);
962 reader_skipn(reader
, 2);
964 ptr2
= ptr
= reader_get_ptr(reader
);
965 while (*ptr
>= '0' && *ptr
<= '9')
967 reader_skipn(reader
, 1);
968 ptr
= reader_get_ptr(reader
);
971 if (ptr2
== ptr
) return WC_E_DIGIT
;
972 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
973 TRACE("version=%s\n", debug_strval(reader
, val
));
977 /* [25] Eq ::= S? '=' S? */
978 static HRESULT
reader_parse_eq(xmlreader
*reader
)
980 static const WCHAR eqW
[] = {'=',0};
981 reader_skipspaces(reader
);
982 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
984 reader_skipn(reader
, 1);
985 reader_skipspaces(reader
);
989 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
990 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
992 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
996 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
998 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
999 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1000 /* skip 'version' */
1001 reader_skipn(reader
, 7);
1003 hr
= reader_parse_eq(reader
);
1004 if (FAILED(hr
)) return hr
;
1006 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1009 reader_skipn(reader
, 1);
1011 hr
= reader_parse_versionnum(reader
, &val
);
1012 if (FAILED(hr
)) return hr
;
1014 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1018 reader_skipn(reader
, 1);
1020 return reader_add_attr(reader
, &name
, &val
);
1023 /* ([A-Za-z0-9._] | '-') */
1024 static inline BOOL
is_wchar_encname(WCHAR ch
)
1026 return ((ch
>= 'A' && ch
<= 'Z') ||
1027 (ch
>= 'a' && ch
<= 'z') ||
1028 (ch
>= '0' && ch
<= '9') ||
1029 (ch
== '.') || (ch
== '_') ||
1033 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1034 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1036 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1040 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1041 return WC_E_ENCNAME
;
1043 val
->start
= reader_get_cur(reader
);
1046 while (is_wchar_encname(*++ptr
))
1050 enc
= parse_encoding_name(start
, len
);
1051 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1055 if (enc
== XmlEncoding_Unknown
)
1056 return WC_E_ENCNAME
;
1058 /* skip encoding name */
1059 reader_skipn(reader
, len
);
1063 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1064 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1066 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1070 if (!reader_skipspaces(reader
)) return S_FALSE
;
1072 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1073 name
.str
= reader_get_ptr(reader
);
1074 name
.start
= reader_get_cur(reader
);
1076 /* skip 'encoding' */
1077 reader_skipn(reader
, 8);
1079 hr
= reader_parse_eq(reader
);
1080 if (FAILED(hr
)) return hr
;
1082 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1085 reader_skipn(reader
, 1);
1087 hr
= reader_parse_encname(reader
, &val
);
1088 if (FAILED(hr
)) return hr
;
1090 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1094 reader_skipn(reader
, 1);
1096 return reader_add_attr(reader
, &name
, &val
);
1099 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1100 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1102 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1103 static const WCHAR yesW
[] = {'y','e','s',0};
1104 static const WCHAR noW
[] = {'n','o',0};
1109 if (!reader_skipspaces(reader
)) return S_FALSE
;
1111 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1112 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1113 /* skip 'standalone' */
1114 reader_skipn(reader
, 10);
1116 hr
= reader_parse_eq(reader
);
1117 if (FAILED(hr
)) return hr
;
1119 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1122 reader_skipn(reader
, 1);
1124 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1125 return WC_E_XMLDECL
;
1127 start
= reader_get_cur(reader
);
1128 /* skip 'yes'|'no' */
1129 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1130 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1131 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1133 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1136 reader_skipn(reader
, 1);
1138 return reader_add_attr(reader
, &name
, &val
);
1141 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1142 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1144 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1145 static const WCHAR declcloseW
[] = {'?','>',0};
1148 /* check if we have "<?xml " */
1149 if (reader_cmp(reader
, xmldeclW
)) return S_FALSE
;
1151 reader_skipn(reader
, 5);
1152 hr
= reader_parse_versioninfo(reader
);
1156 hr
= reader_parse_encdecl(reader
);
1160 hr
= reader_parse_sddecl(reader
);
1164 reader_skipspaces(reader
);
1165 if (reader_cmp(reader
, declcloseW
)) return WC_E_XMLDECL
;
1166 reader_skipn(reader
, 2);
1168 reader_inc_depth(reader
);
1169 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1170 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1171 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1172 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1177 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1178 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1183 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1185 start
= reader
->resume
[XmlReadResume_Body
];
1186 ptr
= reader_get_ptr(reader
);
1191 reader_skipn(reader
, 4);
1192 reader_shrink(reader
);
1193 ptr
= reader_get_ptr(reader
);
1194 start
= reader_get_cur(reader
);
1195 reader
->nodetype
= XmlNodeType_Comment
;
1196 reader
->resume
[XmlReadResume_Body
] = start
;
1197 reader
->resumestate
= XmlReadResumeState_Comment
;
1198 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
1199 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
1200 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1203 /* will exit when there's no more data, it won't attempt to
1204 read more from stream */
1215 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1216 TRACE("%s\n", debug_strval(reader
, &value
));
1218 /* skip rest of markup '->' */
1219 reader_skipn(reader
, 3);
1221 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1222 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1223 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1224 reader
->resume
[XmlReadResume_Body
] = 0;
1225 reader
->resumestate
= XmlReadResumeState_Initial
;
1229 return WC_E_COMMENT
;
1233 reader_skipn(reader
, 1);
1240 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1241 static inline BOOL
is_char(WCHAR ch
)
1243 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1244 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1245 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1246 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1247 (ch
>= 0xe000 && ch
<= 0xfffd);
1250 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1251 static inline BOOL
is_pubchar(WCHAR ch
)
1253 return (ch
== ' ') ||
1254 (ch
>= 'a' && ch
<= 'z') ||
1255 (ch
>= 'A' && ch
<= 'Z') ||
1256 (ch
>= '0' && ch
<= '9') ||
1257 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1258 (ch
== '=') || (ch
== '?') ||
1259 (ch
== '@') || (ch
== '!') ||
1260 (ch
>= '#' && ch
<= '%') || /* #$% */
1261 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1264 static inline BOOL
is_namestartchar(WCHAR ch
)
1266 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1267 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1268 (ch
>= 0xc0 && ch
<= 0xd6) ||
1269 (ch
>= 0xd8 && ch
<= 0xf6) ||
1270 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1271 (ch
>= 0x370 && ch
<= 0x37d) ||
1272 (ch
>= 0x37f && ch
<= 0x1fff) ||
1273 (ch
>= 0x200c && ch
<= 0x200d) ||
1274 (ch
>= 0x2070 && ch
<= 0x218f) ||
1275 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1276 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1277 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1278 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1279 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1280 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1283 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1284 static inline BOOL
is_ncnamechar(WCHAR ch
)
1286 return (ch
>= 'A' && ch
<= 'Z') ||
1287 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1288 (ch
== '-') || (ch
== '.') ||
1289 (ch
>= '0' && ch
<= '9') ||
1291 (ch
>= 0xc0 && ch
<= 0xd6) ||
1292 (ch
>= 0xd8 && ch
<= 0xf6) ||
1293 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1294 (ch
>= 0x300 && ch
<= 0x36f) ||
1295 (ch
>= 0x370 && ch
<= 0x37d) ||
1296 (ch
>= 0x37f && ch
<= 0x1fff) ||
1297 (ch
>= 0x200c && ch
<= 0x200d) ||
1298 (ch
>= 0x203f && ch
<= 0x2040) ||
1299 (ch
>= 0x2070 && ch
<= 0x218f) ||
1300 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1301 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1302 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1303 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1304 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1305 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1308 static inline BOOL
is_namechar(WCHAR ch
)
1310 return (ch
== ':') || is_ncnamechar(ch
);
1313 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1315 /* When we're on attribute always return attribute type, container node type is kept.
1316 Note that container is not necessarily an element, and attribute doesn't mean it's
1317 an attribute in XML spec terms. */
1318 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1321 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1322 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1323 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1324 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1325 [5] Name ::= NameStartChar (NameChar)* */
1326 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1331 if (reader
->resume
[XmlReadResume_Name
])
1333 start
= reader
->resume
[XmlReadResume_Name
];
1334 ptr
= reader_get_ptr(reader
);
1338 ptr
= reader_get_ptr(reader
);
1339 start
= reader_get_cur(reader
);
1340 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1343 while (is_namechar(*ptr
))
1345 reader_skipn(reader
, 1);
1346 ptr
= reader_get_ptr(reader
);
1349 if (is_reader_pending(reader
))
1351 reader
->resume
[XmlReadResume_Name
] = start
;
1355 reader
->resume
[XmlReadResume_Name
] = 0;
1357 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1358 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1363 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1364 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1366 static const WCHAR xmlW
[] = {'x','m','l'};
1367 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1373 hr
= reader_parse_name(reader
, &name
);
1374 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1376 /* now that we got name check for illegal content */
1377 if (strval_eq(reader
, &name
, &xmlval
))
1378 return WC_E_LEADINGXML
;
1380 /* PITarget can't be a qualified name */
1381 ptr
= reader_get_strptr(reader
, &name
);
1382 for (i
= 0; i
< name
.len
; i
++)
1384 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1386 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1391 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1392 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1399 switch (reader
->resumestate
)
1401 case XmlReadResumeState_Initial
:
1403 reader_skipn(reader
, 2);
1404 reader_shrink(reader
);
1405 reader
->resumestate
= XmlReadResumeState_PITarget
;
1406 case XmlReadResumeState_PITarget
:
1407 hr
= reader_parse_pitarget(reader
, &target
);
1408 if (FAILED(hr
)) return hr
;
1409 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1410 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1411 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1412 reader
->resumestate
= XmlReadResumeState_PIBody
;
1413 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1418 start
= reader
->resume
[XmlReadResume_Body
];
1419 ptr
= reader_get_ptr(reader
);
1426 UINT cur
= reader_get_cur(reader
);
1429 /* strip all leading whitespace chars */
1432 ptr
= reader_get_ptr2(reader
, start
);
1433 if (!is_wchar_space(*ptr
)) break;
1437 reader_init_strvalue(start
, cur
-start
, &value
);
1440 reader_skipn(reader
, 2);
1441 TRACE("%s\n", debug_strval(reader
, &value
));
1442 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1443 reader
->resumestate
= XmlReadResumeState_Initial
;
1444 reader
->resume
[XmlReadResume_Body
] = 0;
1445 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1450 reader_skipn(reader
, 1);
1451 ptr
= reader_get_ptr(reader
);
1457 /* This one is used to parse significant whitespace nodes, like in Misc production */
1458 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1460 switch (reader
->resumestate
)
1462 case XmlReadResumeState_Initial
:
1463 reader_shrink(reader
);
1464 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1465 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1466 reader
->nodetype
= XmlNodeType_Whitespace
;
1467 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1468 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1469 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1471 case XmlReadResumeState_Whitespace
:
1476 reader_skipspaces(reader
);
1477 if (is_reader_pending(reader
)) return S_OK
;
1479 start
= reader
->resume
[XmlReadResume_Body
];
1480 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1481 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1482 TRACE("%s\n", debug_strval(reader
, &value
));
1483 reader
->resumestate
= XmlReadResumeState_Initial
;
1492 /* [27] Misc ::= Comment | PI | S */
1493 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1495 HRESULT hr
= S_FALSE
;
1497 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1499 hr
= reader_more(reader
);
1500 if (FAILED(hr
)) return hr
;
1502 /* finish current node */
1503 switch (reader
->resumestate
)
1505 case XmlReadResumeState_PITarget
:
1506 case XmlReadResumeState_PIBody
:
1507 return reader_parse_pi(reader
);
1508 case XmlReadResumeState_Comment
:
1509 return reader_parse_comment(reader
);
1510 case XmlReadResumeState_Whitespace
:
1511 return reader_parse_whitespace(reader
);
1513 ERR("unknown resume state %d\n", reader
->resumestate
);
1519 const WCHAR
*cur
= reader_get_ptr(reader
);
1521 if (is_wchar_space(*cur
))
1522 hr
= reader_parse_whitespace(reader
);
1523 else if (!reader_cmp(reader
, commentW
))
1524 hr
= reader_parse_comment(reader
);
1525 else if (!reader_cmp(reader
, piW
))
1526 hr
= reader_parse_pi(reader
);
1530 if (hr
!= S_FALSE
) return hr
;
1536 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1537 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1539 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1542 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1545 reader_skipn(reader
, 1);
1547 cur
= reader_get_ptr(reader
);
1548 start
= reader_get_cur(reader
);
1549 while (is_char(*cur
) && *cur
!= quote
)
1551 reader_skipn(reader
, 1);
1552 cur
= reader_get_ptr(reader
);
1554 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1555 if (*cur
== quote
) reader_skipn(reader
, 1);
1557 TRACE("%s\n", debug_strval(reader
, literal
));
1561 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1562 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1563 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1565 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1568 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1571 reader_skipn(reader
, 1);
1573 start
= reader_get_cur(reader
);
1574 cur
= reader_get_ptr(reader
);
1575 while (is_pubchar(*cur
) && *cur
!= quote
)
1577 reader_skipn(reader
, 1);
1578 cur
= reader_get_ptr(reader
);
1581 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1582 TRACE("%s\n", debug_strval(reader
, literal
));
1586 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1587 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1589 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1590 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1595 if (reader_cmp(reader
, systemW
))
1597 if (reader_cmp(reader
, publicW
))
1604 reader_skipn(reader
, 6);
1605 cnt
= reader_skipspaces(reader
);
1606 if (!cnt
) return WC_E_WHITESPACE
;
1608 hr
= reader_parse_pub_literal(reader
, &pub
);
1609 if (FAILED(hr
)) return hr
;
1611 reader_init_cstrvalue(publicW
, strlenW(publicW
), &name
);
1612 return reader_add_attr(reader
, &name
, &pub
);
1620 reader_skipn(reader
, 6);
1621 cnt
= reader_skipspaces(reader
);
1622 if (!cnt
) return WC_E_WHITESPACE
;
1624 hr
= reader_parse_sys_literal(reader
, &sys
);
1625 if (FAILED(hr
)) return hr
;
1627 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1628 return reader_add_attr(reader
, &name
, &sys
);
1634 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1635 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1637 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1642 /* check if we have "<!DOCTYPE" */
1643 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1644 reader_shrink(reader
);
1646 /* DTD processing is not allowed by default */
1647 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1649 reader_skipn(reader
, 9);
1650 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1653 hr
= reader_parse_name(reader
, &name
);
1654 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1656 reader_skipspaces(reader
);
1658 hr
= reader_parse_externalid(reader
);
1659 if (FAILED(hr
)) return hr
;
1661 reader_skipspaces(reader
);
1663 cur
= reader_get_ptr(reader
);
1666 FIXME("internal subset parsing not implemented\n");
1671 reader_skipn(reader
, 1);
1673 reader
->nodetype
= XmlNodeType_DocumentType
;
1674 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1675 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1680 /* [11 NS] LocalPart ::= NCName */
1681 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
)
1686 if (reader
->resume
[XmlReadResume_Local
])
1688 start
= reader
->resume
[XmlReadResume_Local
];
1689 ptr
= reader_get_ptr(reader
);
1693 ptr
= reader_get_ptr(reader
);
1694 start
= reader_get_cur(reader
);
1697 while (is_ncnamechar(*ptr
))
1699 reader_skipn(reader
, 1);
1700 ptr
= reader_get_ptr(reader
);
1703 if (is_reader_pending(reader
))
1705 reader
->resume
[XmlReadResume_Local
] = start
;
1709 reader
->resume
[XmlReadResume_Local
] = 0;
1711 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1716 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1717 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1718 [9 NS] UnprefixedName ::= LocalPart
1719 [10 NS] Prefix ::= NCName */
1720 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1726 if (reader
->resume
[XmlReadResume_Name
])
1728 start
= reader
->resume
[XmlReadResume_Name
];
1729 ptr
= reader_get_ptr(reader
);
1733 ptr
= reader_get_ptr(reader
);
1734 start
= reader_get_cur(reader
);
1735 reader
->resume
[XmlReadResume_Name
] = start
;
1736 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1739 if (reader
->resume
[XmlReadResume_Local
])
1741 hr
= reader_parse_local(reader
, local
);
1742 if (FAILED(hr
)) return hr
;
1744 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1745 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1750 /* skip prefix part */
1751 while (is_ncnamechar(*ptr
))
1753 reader_skipn(reader
, 1);
1754 ptr
= reader_get_ptr(reader
);
1757 if (is_reader_pending(reader
)) return E_PENDING
;
1759 /* got a qualified name */
1762 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
1765 reader_skipn(reader
, 1);
1766 hr
= reader_parse_local(reader
, local
);
1767 if (FAILED(hr
)) return hr
;
1771 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
1772 reader_init_strvalue(0, 0, prefix
);
1776 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1779 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
1781 TRACE("ncname %s\n", debug_strval(reader
, local
));
1783 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
1785 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
1788 reader
->resume
[XmlReadResume_Name
] = 0;
1789 reader
->resume
[XmlReadResume_Local
] = 0;
1794 /* Applies normalization rules to a single char, used for attribute values.
1796 Rules include 2 steps:
1798 1) replacing \r\n with a single \n;
1799 2) replacing all whitespace chars with ' '.
1802 static void reader_normalize_space(xmlreader
*reader
, WCHAR
*ptr
)
1804 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1806 if (!is_wchar_space(*ptr
)) return;
1808 if (*ptr
== '\r' && *(ptr
+1) == '\n')
1810 int len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - 2*sizeof(WCHAR
);
1811 memmove(ptr
+1, ptr
+2, len
);
1816 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
1818 static const WCHAR entltW
[] = {'l','t'};
1819 static const WCHAR entgtW
[] = {'g','t'};
1820 static const WCHAR entampW
[] = {'a','m','p'};
1821 static const WCHAR entaposW
[] = {'a','p','o','s'};
1822 static const WCHAR entquotW
[] = {'q','u','o','t'};
1823 static const strval lt
= { (WCHAR
*)entltW
, 2 };
1824 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
1825 static const strval amp
= { (WCHAR
*)entampW
, 3 };
1826 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
1827 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
1828 WCHAR
*str
= reader_get_strptr(reader
, name
);
1833 if (strval_eq(reader
, name
, <
)) return '<';
1836 if (strval_eq(reader
, name
, >
)) return '>';
1839 if (strval_eq(reader
, name
, &
))
1841 else if (strval_eq(reader
, name
, &apos
))
1845 if (strval_eq(reader
, name
, "
)) return '\"';
1854 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1855 [67] Reference ::= EntityRef | CharRef
1856 [68] EntityRef ::= '&' Name ';' */
1857 static HRESULT
reader_parse_reference(xmlreader
*reader
)
1859 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1860 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1861 UINT cur
= reader_get_cur(reader
);
1866 reader_skipn(reader
, 1);
1867 ptr
= reader_get_ptr(reader
);
1871 reader_skipn(reader
, 1);
1872 ptr
= reader_get_ptr(reader
);
1874 /* hex char or decimal */
1877 reader_skipn(reader
, 1);
1878 ptr
= reader_get_ptr(reader
);
1882 if ((*ptr
>= '0' && *ptr
<= '9'))
1883 ch
= ch
*16 + *ptr
- '0';
1884 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
1885 ch
= ch
*16 + *ptr
- 'a' + 10;
1886 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
1887 ch
= ch
*16 + *ptr
- 'A' + 10;
1889 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
1890 reader_skipn(reader
, 1);
1891 ptr
= reader_get_ptr(reader
);
1898 if ((*ptr
>= '0' && *ptr
<= '9'))
1900 ch
= ch
*10 + *ptr
- '0';
1901 reader_skipn(reader
, 1);
1902 ptr
= reader_get_ptr(reader
);
1905 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
1909 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
1912 if (is_wchar_space(ch
)) ch
= ' ';
1914 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1915 memmove(start
+1, ptr
+1, len
);
1916 buffer
->cur
= cur
+ 1;
1925 hr
= reader_parse_name(reader
, &name
);
1926 if (FAILED(hr
)) return hr
;
1928 ptr
= reader_get_ptr(reader
);
1929 if (*ptr
!= ';') return WC_E_SEMICOLON
;
1931 /* predefined entities resolve to a single character */
1932 ch
= get_predefined_entity(reader
, &name
);
1935 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1936 memmove(start
+1, ptr
+1, len
);
1937 buffer
->cur
= cur
+ 1;
1943 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
1944 return WC_E_UNDECLAREDENTITY
;
1952 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1953 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
1958 ptr
= reader_get_ptr(reader
);
1960 /* skip opening quote */
1962 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
1963 reader_skipn(reader
, 1);
1965 ptr
= reader_get_ptr(reader
);
1966 start
= reader_get_cur(reader
);
1969 if (*ptr
== '<') return WC_E_LESSTHAN
;
1973 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
1974 /* skip closing quote */
1975 reader_skipn(reader
, 1);
1981 HRESULT hr
= reader_parse_reference(reader
);
1982 if (FAILED(hr
)) return hr
;
1986 reader_normalize_space(reader
, ptr
);
1987 reader_skipn(reader
, 1);
1989 ptr
= reader_get_ptr(reader
);
1995 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
1996 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
1997 [3 NS] DefaultAttName ::= 'xmlns'
1998 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
1999 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2001 static const WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
2002 strval prefix
, local
, qname
, xmlns
, value
;
2005 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2006 if (FAILED(hr
)) return hr
;
2008 reader_init_cstrvalue((WCHAR
*)xmlnsW
, 5, &xmlns
);
2010 if (strval_eq(reader
, &prefix
, &xmlns
))
2012 FIXME("namespace definitions not supported\n");
2016 if (strval_eq(reader
, &qname
, &xmlns
))
2017 FIXME("default namespace definitions not supported\n");
2019 hr
= reader_parse_eq(reader
);
2020 if (FAILED(hr
)) return hr
;
2022 hr
= reader_parse_attvalue(reader
, &value
);
2023 if (FAILED(hr
)) return hr
;
2025 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2026 return reader_add_attr(reader
, &local
, &value
);
2029 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2030 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2031 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
, int *empty
)
2035 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2036 if (FAILED(hr
)) return hr
;
2040 static const WCHAR endW
[] = {'/','>',0};
2042 reader_skipspaces(reader
);
2045 if ((*empty
= !reader_cmp(reader
, endW
)))
2048 reader_skipn(reader
, 2);
2049 reader
->empty_element
= TRUE
;
2053 /* got a start tag */
2054 if (!reader_cmp(reader
, gtW
))
2057 reader_skipn(reader
, 1);
2058 return reader_push_element(reader
, qname
, local
);
2061 hr
= reader_parse_attribute(reader
);
2062 if (FAILED(hr
)) return hr
;
2068 /* [39] element ::= EmptyElemTag | STag content ETag */
2069 static HRESULT
reader_parse_element(xmlreader
*reader
)
2073 switch (reader
->resumestate
)
2075 case XmlReadResumeState_Initial
:
2076 /* check if we are really on element */
2077 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2080 reader_skipn(reader
, 1);
2082 reader_shrink(reader
);
2083 reader
->resumestate
= XmlReadResumeState_STag
;
2084 case XmlReadResumeState_STag
:
2086 strval qname
, prefix
, local
;
2089 /* this handles empty elements too */
2090 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
, &empty
);
2091 if (FAILED(hr
)) return hr
;
2093 /* FIXME: need to check for defined namespace to reject invalid prefix,
2094 currently reject all prefixes */
2095 if (prefix
.len
) return NC_E_UNDECLAREDPREFIX
;
2097 /* if we got empty element and stack is empty go straight to Misc */
2098 if (empty
&& list_empty(&reader
->elements
))
2099 reader
->instate
= XmlReadInState_MiscEnd
;
2101 reader
->instate
= XmlReadInState_Content
;
2103 reader
->nodetype
= XmlNodeType_Element
;
2104 reader
->resumestate
= XmlReadResumeState_Initial
;
2105 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2106 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2107 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2117 /* [13 NS] ETag ::= '</' QName S? '>' */
2118 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2120 strval prefix
, local
, qname
;
2121 struct element
*elem
;
2125 reader_skipn(reader
, 2);
2127 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2128 if (FAILED(hr
)) return hr
;
2130 reader_skipspaces(reader
);
2132 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2135 reader_skipn(reader
, 1);
2137 /* Element stack should never be empty at this point, cause we shouldn't get to
2138 content parsing if it's empty. */
2139 elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2140 if (!strval_eq(reader
, &elem
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2142 reader_pop_element(reader
);
2144 /* It was a root element, the rest is expected as Misc */
2145 if (list_empty(&reader
->elements
))
2146 reader
->instate
= XmlReadInState_MiscEnd
;
2148 reader
->nodetype
= XmlNodeType_EndElement
;
2149 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2150 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2155 /* [18] CDSect ::= CDStart CData CDEnd
2156 [19] CDStart ::= '<![CDATA['
2157 [20] CData ::= (Char* - (Char* ']]>' Char*))
2158 [21] CDEnd ::= ']]>' */
2159 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2164 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2166 start
= reader
->resume
[XmlReadResume_Body
];
2167 ptr
= reader_get_ptr(reader
);
2171 /* skip markup '<![CDATA[' */
2172 reader_skipn(reader
, 9);
2173 reader_shrink(reader
);
2174 ptr
= reader_get_ptr(reader
);
2175 start
= reader_get_cur(reader
);
2176 reader
->nodetype
= XmlNodeType_CDATA
;
2177 reader
->resume
[XmlReadResume_Body
] = start
;
2178 reader
->resumestate
= XmlReadResumeState_CDATA
;
2179 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
2180 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
2181 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2186 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2190 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2193 reader_skipn(reader
, 3);
2194 TRACE("%s\n", debug_strval(reader
, &value
));
2196 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2197 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2198 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2199 reader
->resume
[XmlReadResume_Body
] = 0;
2200 reader
->resumestate
= XmlReadResumeState_Initial
;
2205 /* Value normalization is not fully implemented, rules are:
2207 - single '\r' -> '\n';
2208 - sequence '\r\n' -> '\n', in this case value length changes;
2210 if (*ptr
== '\r') *ptr
= '\n';
2211 reader_skipn(reader
, 1);
2219 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2220 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2225 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2227 start
= reader
->resume
[XmlReadResume_Body
];
2228 ptr
= reader_get_ptr(reader
);
2232 reader_shrink(reader
);
2233 ptr
= reader_get_ptr(reader
);
2234 start
= reader_get_cur(reader
);
2235 /* There's no text */
2236 if (!*ptr
|| *ptr
== '<') return S_OK
;
2237 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2238 reader
->resume
[XmlReadResume_Body
] = start
;
2239 reader
->resumestate
= XmlReadResumeState_CharData
;
2240 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2241 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2242 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2247 /* CDATA closing sequence ']]>' is not allowed */
2248 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2249 return WC_E_CDSECTEND
;
2251 /* Found next markup part */
2256 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2257 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2258 reader
->resume
[XmlReadResume_Body
] = 0;
2259 reader
->resumestate
= XmlReadResumeState_Initial
;
2263 reader_skipn(reader
, 1);
2265 /* this covers a case when text has leading whitespace chars */
2266 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2273 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2274 static HRESULT
reader_parse_content(xmlreader
*reader
)
2276 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2277 static const WCHAR etagW
[] = {'<','/',0};
2278 static const WCHAR ampW
[] = {'&',0};
2280 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2282 switch (reader
->resumestate
)
2284 case XmlReadResumeState_CDATA
:
2285 return reader_parse_cdata(reader
);
2286 case XmlReadResumeState_Comment
:
2287 return reader_parse_comment(reader
);
2288 case XmlReadResumeState_PIBody
:
2289 case XmlReadResumeState_PITarget
:
2290 return reader_parse_pi(reader
);
2291 case XmlReadResumeState_CharData
:
2292 return reader_parse_chardata(reader
);
2294 ERR("unknown resume state %d\n", reader
->resumestate
);
2298 reader_shrink(reader
);
2300 /* handle end tag here, it indicates end of content as well */
2301 if (!reader_cmp(reader
, etagW
))
2302 return reader_parse_endtag(reader
);
2304 if (!reader_cmp(reader
, commentW
))
2305 return reader_parse_comment(reader
);
2307 if (!reader_cmp(reader
, piW
))
2308 return reader_parse_pi(reader
);
2310 if (!reader_cmp(reader
, cdstartW
))
2311 return reader_parse_cdata(reader
);
2313 if (!reader_cmp(reader
, ampW
))
2314 return reader_parse_reference(reader
);
2316 if (!reader_cmp(reader
, ltW
))
2317 return reader_parse_element(reader
);
2319 /* what's left must be CharData */
2320 return reader_parse_chardata(reader
);
2323 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2327 if (!is_reader_pending(reader
))
2328 reader_clear_attrs(reader
);
2332 switch (reader
->instate
)
2334 /* if it's a first call for a new input we need to detect stream encoding */
2335 case XmlReadInState_Initial
:
2339 hr
= readerinput_growraw(reader
->input
);
2340 if (FAILED(hr
)) return hr
;
2342 /* try to detect encoding by BOM or data and set input code page */
2343 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2344 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2345 if (FAILED(hr
)) return hr
;
2347 /* always switch first time cause we have to put something in */
2348 readerinput_switchencoding(reader
->input
, enc
);
2350 /* parse xml declaration */
2351 hr
= reader_parse_xmldecl(reader
);
2352 if (FAILED(hr
)) return hr
;
2354 readerinput_shrinkraw(reader
->input
, -1);
2355 reader
->instate
= XmlReadInState_Misc_DTD
;
2356 if (hr
== S_OK
) return hr
;
2359 case XmlReadInState_Misc_DTD
:
2360 hr
= reader_parse_misc(reader
);
2361 if (FAILED(hr
)) return hr
;
2364 reader
->instate
= XmlReadInState_DTD
;
2368 case XmlReadInState_DTD
:
2369 hr
= reader_parse_dtd(reader
);
2370 if (FAILED(hr
)) return hr
;
2374 reader
->instate
= XmlReadInState_DTD_Misc
;
2378 reader
->instate
= XmlReadInState_Element
;
2380 case XmlReadInState_DTD_Misc
:
2381 hr
= reader_parse_misc(reader
);
2382 if (FAILED(hr
)) return hr
;
2385 reader
->instate
= XmlReadInState_Element
;
2389 case XmlReadInState_Element
:
2390 return reader_parse_element(reader
);
2391 case XmlReadInState_Content
:
2392 return reader_parse_content(reader
);
2393 case XmlReadInState_MiscEnd
:
2394 hr
= reader_parse_misc(reader
);
2395 if (FAILED(hr
)) return hr
;
2398 reader
->instate
= XmlReadInState_Eof
;
2400 case XmlReadInState_Eof
:
2403 FIXME("internal state %d not handled\n", reader
->instate
);
2411 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2413 xmlreader
*This
= impl_from_IXmlReader(iface
);
2415 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2417 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2418 IsEqualGUID(riid
, &IID_IXmlReader
))
2424 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2426 return E_NOINTERFACE
;
2429 IXmlReader_AddRef(iface
);
2434 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2436 xmlreader
*This
= impl_from_IXmlReader(iface
);
2437 ULONG ref
= InterlockedIncrement(&This
->ref
);
2438 TRACE("(%p)->(%d)\n", This
, ref
);
2442 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2444 xmlreader
*This
= impl_from_IXmlReader(iface
);
2445 LONG ref
= InterlockedDecrement(&This
->ref
);
2447 TRACE("(%p)->(%d)\n", This
, ref
);
2451 IMalloc
*imalloc
= This
->imalloc
;
2452 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2453 reader_clear_attrs(This
);
2454 reader_clear_elements(This
);
2455 reader_free_strvalues(This
);
2456 reader_free(This
, This
);
2457 if (imalloc
) IMalloc_Release(imalloc
);
2463 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2465 xmlreader
*This
= impl_from_IXmlReader(iface
);
2466 IXmlReaderInput
*readerinput
;
2469 TRACE("(%p)->(%p)\n", This
, input
);
2473 readerinput_release_stream(This
->input
);
2474 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2478 This
->line
= This
->pos
= 0;
2479 reader_clear_elements(This
);
2481 This
->resumestate
= XmlReadResumeState_Initial
;
2482 memset(This
->resume
, 0, sizeof(This
->resume
));
2484 /* just reset current input */
2487 This
->state
= XmlReadState_Initial
;
2491 /* now try IXmlReaderInput, ISequentialStream, IStream */
2492 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2495 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2496 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2499 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2500 readerinput
, readerinput
->lpVtbl
);
2501 IUnknown_Release(readerinput
);
2507 if (hr
!= S_OK
|| !readerinput
)
2509 /* create IXmlReaderInput basing on supplied interface */
2510 hr
= CreateXmlReaderInputWithEncodingName(input
,
2511 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2512 if (hr
!= S_OK
) return hr
;
2513 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2516 /* set stream for supplied IXmlReaderInput */
2517 hr
= readerinput_query_for_stream(This
->input
);
2520 This
->state
= XmlReadState_Initial
;
2521 This
->instate
= XmlReadInState_Initial
;
2527 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2529 xmlreader
*This
= impl_from_IXmlReader(iface
);
2531 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2533 if (!value
) return E_INVALIDARG
;
2537 case XmlReaderProperty_DtdProcessing
:
2538 *value
= This
->dtdmode
;
2540 case XmlReaderProperty_ReadState
:
2541 *value
= This
->state
;
2544 FIXME("Unimplemented property (%u)\n", property
);
2551 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2553 xmlreader
*This
= impl_from_IXmlReader(iface
);
2555 TRACE("(%p)->(%s %lu)\n", This
, debugstr_reader_prop(property
), value
);
2559 case XmlReaderProperty_DtdProcessing
:
2560 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2561 This
->dtdmode
= value
;
2564 FIXME("Unimplemented property (%u)\n", property
);
2571 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2573 xmlreader
*This
= impl_from_IXmlReader(iface
);
2574 XmlNodeType oldtype
= This
->nodetype
;
2577 TRACE("(%p)->(%p)\n", This
, nodetype
);
2579 if (This
->state
== XmlReadState_Closed
) return S_FALSE
;
2581 hr
= reader_parse_nextnode(This
);
2582 if (oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2583 This
->state
= XmlReadState_Interactive
;
2586 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2587 *nodetype
= This
->nodetype
;
2593 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2595 xmlreader
*This
= impl_from_IXmlReader(iface
);
2596 TRACE("(%p)->(%p)\n", This
, node_type
);
2598 *node_type
= reader_get_nodetype(This
);
2599 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2602 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2604 xmlreader
*This
= impl_from_IXmlReader(iface
);
2606 TRACE("(%p)\n", This
);
2608 if (!This
->attr_count
) return S_FALSE
;
2609 This
->attr
= LIST_ENTRY(list_head(&This
->attrs
), struct attribute
, entry
);
2610 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2611 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2616 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2618 xmlreader
*This
= impl_from_IXmlReader(iface
);
2619 const struct list
*next
;
2621 TRACE("(%p)\n", This
);
2623 if (!This
->attr_count
) return S_FALSE
;
2626 return IXmlReader_MoveToFirstAttribute(iface
);
2628 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2631 This
->attr
= LIST_ENTRY(next
, struct attribute
, entry
);
2632 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2633 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2636 return next
? S_OK
: S_FALSE
;
2639 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
2641 LPCWSTR namespaceUri
)
2643 FIXME("(%p %p %p): stub\n", iface
, local_name
, namespaceUri
);
2647 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
2649 xmlreader
*This
= impl_from_IXmlReader(iface
);
2650 struct element
*elem
;
2652 TRACE("(%p)\n", This
);
2654 if (!This
->attr_count
) return S_FALSE
;
2657 /* FIXME: support other node types with 'attributes' like DTD */
2658 elem
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
2661 reader_set_strvalue(This
, StringValue_QualifiedName
, &elem
->qname
);
2662 reader_set_strvalue(This
, StringValue_LocalName
, &elem
->localname
);
2668 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2670 xmlreader
*This
= impl_from_IXmlReader(iface
);
2672 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2673 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
2674 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
2678 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
,
2679 LPCWSTR
*namespaceUri
,
2680 UINT
*namespaceUri_length
)
2682 FIXME("(%p %p %p): stub\n", iface
, namespaceUri
, namespaceUri_length
);
2686 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2688 xmlreader
*This
= impl_from_IXmlReader(iface
);
2690 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2691 *name
= This
->strvalues
[StringValue_LocalName
].str
;
2692 if (len
) *len
= This
->strvalues
[StringValue_LocalName
].len
;
2696 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, LPCWSTR
*prefix
, UINT
*len
)
2698 xmlreader
*This
= impl_from_IXmlReader(iface
);
2700 TRACE("(%p)->(%p %p)\n", This
, prefix
, len
);
2701 *prefix
= This
->strvalues
[StringValue_Prefix
].str
;
2702 if (len
) *len
= This
->strvalues
[StringValue_Prefix
].len
;
2706 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
2708 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2709 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2711 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
2715 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
) || is_reader_pending(reader
))
2720 hr
= IXmlReader_Read(iface
, &type
);
2721 if (FAILED(hr
)) return hr
;
2723 /* return if still pending, partially read values are not reported */
2724 if (is_reader_pending(reader
)) return E_PENDING
;
2729 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
2730 if (!ptr
) return E_OUTOFMEMORY
;
2731 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
2737 if (len
) *len
= val
->len
;
2741 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
2743 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2744 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2747 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
2749 /* Value is already allocated, chunked reads are not possible. */
2750 if (val
->str
) return S_FALSE
;
2754 len
= min(chunk_size
, val
->len
);
2755 memcpy(buffer
, reader_get_ptr2(reader
, val
->start
), len
);
2758 if (read
) *read
= len
;
2764 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
2766 UINT
*baseUri_length
)
2768 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
2772 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
2774 FIXME("(%p): stub\n", iface
);
2778 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
2780 xmlreader
*This
= impl_from_IXmlReader(iface
);
2781 TRACE("(%p)\n", This
);
2782 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2783 when current node is start tag of an element */
2784 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->empty_element
: FALSE
;
2787 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*lineNumber
)
2789 xmlreader
*This
= impl_from_IXmlReader(iface
);
2791 TRACE("(%p %p)\n", This
, lineNumber
);
2793 if (!lineNumber
) return E_INVALIDARG
;
2795 *lineNumber
= This
->line
;
2800 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*linePosition
)
2802 xmlreader
*This
= impl_from_IXmlReader(iface
);
2804 TRACE("(%p %p)\n", This
, linePosition
);
2806 if (!linePosition
) return E_INVALIDARG
;
2808 *linePosition
= This
->pos
;
2813 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
2815 xmlreader
*This
= impl_from_IXmlReader(iface
);
2817 TRACE("(%p)->(%p)\n", This
, count
);
2819 if (!count
) return E_INVALIDARG
;
2821 *count
= This
->attr_count
;
2825 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
2827 xmlreader
*This
= impl_from_IXmlReader(iface
);
2828 TRACE("(%p)->(%p)\n", This
, depth
);
2829 *depth
= This
->depth
;
2833 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
2835 FIXME("(%p): stub\n", iface
);
2839 static const struct IXmlReaderVtbl xmlreader_vtbl
=
2841 xmlreader_QueryInterface
,
2845 xmlreader_GetProperty
,
2846 xmlreader_SetProperty
,
2848 xmlreader_GetNodeType
,
2849 xmlreader_MoveToFirstAttribute
,
2850 xmlreader_MoveToNextAttribute
,
2851 xmlreader_MoveToAttributeByName
,
2852 xmlreader_MoveToElement
,
2853 xmlreader_GetQualifiedName
,
2854 xmlreader_GetNamespaceUri
,
2855 xmlreader_GetLocalName
,
2856 xmlreader_GetPrefix
,
2858 xmlreader_ReadValueChunk
,
2859 xmlreader_GetBaseUri
,
2860 xmlreader_IsDefault
,
2861 xmlreader_IsEmptyElement
,
2862 xmlreader_GetLineNumber
,
2863 xmlreader_GetLinePosition
,
2864 xmlreader_GetAttributeCount
,
2869 /** IXmlReaderInput **/
2870 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
2872 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2874 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2876 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
2877 IsEqualGUID(riid
, &IID_IUnknown
))
2883 WARN("interface %s not implemented\n", debugstr_guid(riid
));
2885 return E_NOINTERFACE
;
2888 IUnknown_AddRef(iface
);
2893 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
2895 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2896 ULONG ref
= InterlockedIncrement(&This
->ref
);
2897 TRACE("(%p)->(%d)\n", This
, ref
);
2901 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
2903 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2904 LONG ref
= InterlockedDecrement(&This
->ref
);
2906 TRACE("(%p)->(%d)\n", This
, ref
);
2910 IMalloc
*imalloc
= This
->imalloc
;
2911 if (This
->input
) IUnknown_Release(This
->input
);
2912 if (This
->stream
) ISequentialStream_Release(This
->stream
);
2913 if (This
->buffer
) free_input_buffer(This
->buffer
);
2914 readerinput_free(This
, This
->baseuri
);
2915 readerinput_free(This
, This
);
2916 if (imalloc
) IMalloc_Release(imalloc
);
2922 static const struct IUnknownVtbl xmlreaderinputvtbl
=
2924 xmlreaderinput_QueryInterface
,
2925 xmlreaderinput_AddRef
,
2926 xmlreaderinput_Release
2929 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
2934 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
2936 if (!IsEqualGUID(riid
, &IID_IXmlReader
))
2938 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid
));
2943 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
2945 reader
= heap_alloc(sizeof(*reader
));
2946 if(!reader
) return E_OUTOFMEMORY
;
2948 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
2950 reader
->input
= NULL
;
2951 reader
->state
= XmlReadState_Closed
;
2952 reader
->instate
= XmlReadInState_Initial
;
2953 reader
->resumestate
= XmlReadResumeState_Initial
;
2954 reader
->dtdmode
= DtdProcessing_Prohibit
;
2955 reader
->line
= reader
->pos
= 0;
2956 reader
->imalloc
= imalloc
;
2957 if (imalloc
) IMalloc_AddRef(imalloc
);
2958 reader
->nodetype
= XmlNodeType_None
;
2959 list_init(&reader
->attrs
);
2960 reader
->attr_count
= 0;
2961 reader
->attr
= NULL
;
2962 list_init(&reader
->elements
);
2964 reader
->max_depth
= 256;
2965 reader
->empty_element
= FALSE
;
2966 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2968 for (i
= 0; i
< StringValue_Last
; i
++)
2969 reader
->strvalues
[i
] = strval_empty
;
2971 *obj
= &reader
->IXmlReader_iface
;
2973 TRACE("returning iface %p\n", *obj
);
2978 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
2983 IXmlReaderInput
**ppInput
)
2985 xmlreaderinput
*readerinput
;
2988 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
2989 hint
, wine_dbgstr_w(base_uri
), ppInput
);
2991 if (!stream
|| !ppInput
) return E_INVALIDARG
;
2994 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
2996 readerinput
= heap_alloc(sizeof(*readerinput
));
2997 if(!readerinput
) return E_OUTOFMEMORY
;
2999 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3000 readerinput
->ref
= 1;
3001 readerinput
->imalloc
= imalloc
;
3002 readerinput
->stream
= NULL
;
3003 if (imalloc
) IMalloc_AddRef(imalloc
);
3004 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3005 readerinput
->hint
= hint
;
3006 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3007 readerinput
->pending
= 0;
3009 hr
= alloc_input_buffer(readerinput
);
3012 readerinput_free(readerinput
, readerinput
->baseuri
);
3013 readerinput_free(readerinput
, readerinput
);
3014 if (imalloc
) IMalloc_Release(imalloc
);
3017 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3019 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3021 TRACE("returning iface %p\n", *ppInput
);