2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43 XmlReadInState_Initial
,
44 XmlReadInState_XmlDecl
,
45 XmlReadInState_Misc_DTD
,
47 XmlReadInState_DTD_Misc
,
48 XmlReadInState_Element
,
49 XmlReadInState_Content
,
50 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
52 } XmlReaderInternalState
;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
58 XmlReadResumeState_Initial
,
59 XmlReadResumeState_PITarget
,
60 XmlReadResumeState_PIBody
,
61 XmlReadResumeState_CDATA
,
62 XmlReadResumeState_Comment
,
63 XmlReadResumeState_STag
,
64 XmlReadResumeState_CharData
,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState
;
68 /* saved pointer index to resume from particular input position */
71 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local
, /* local for QName */
73 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
79 StringValue_LocalName
,
81 StringValue_QualifiedName
,
84 } XmlReaderStringValue
;
86 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
87 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
89 static const WCHAR dblquoteW
[] = {'\"',0};
90 static const WCHAR quoteW
[] = {'\'',0};
91 static const WCHAR ltW
[] = {'<',0};
92 static const WCHAR gtW
[] = {'>',0};
93 static const WCHAR commentW
[] = {'<','!','-','-',0};
94 static const WCHAR piW
[] = {'<','?',0};
96 static const char *debugstr_nodetype(XmlNodeType nodetype
)
98 static const char * const type_names
[] =
107 "ProcessingInstruction",
120 if (nodetype
> _XmlNodeType_Last
)
121 return wine_dbg_sprintf("unknown type=%d", nodetype
);
123 return type_names
[nodetype
];
126 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
128 static const char * const prop_names
[] =
140 if (prop
> _XmlReaderProperty_Last
)
141 return wine_dbg_sprintf("unknown property=%d", prop
);
143 return prop_names
[prop
];
146 struct xml_encoding_data
153 static const struct xml_encoding_data xml_encoding_map
[] = {
154 { utf16W
, XmlEncoding_UTF16
, ~0 },
155 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
}
158 const WCHAR
*get_encoding_name(xml_encoding encoding
)
160 return xml_encoding_map
[encoding
].name
;
163 xml_encoding
get_encoding_from_codepage(UINT codepage
)
166 for (i
= 0; i
< sizeof(xml_encoding_map
)/sizeof(xml_encoding_map
[0]); i
++)
168 if (xml_encoding_map
[i
].cp
== codepage
) return xml_encoding_map
[i
].enc
;
170 return XmlEncoding_Unknown
;
177 unsigned int allocated
;
178 unsigned int written
;
181 typedef struct input_buffer input_buffer
;
185 IXmlReaderInput IXmlReaderInput_iface
;
187 /* reference passed on IXmlReaderInput creation, is kept when input is created */
190 xml_encoding encoding
;
193 /* stream reference set after SetInput() call from reader,
194 stored as sequential stream, cause currently
195 optimizations possible with IStream aren't implemented */
196 ISequentialStream
*stream
;
197 input_buffer
*buffer
;
198 unsigned int pending
: 1;
201 static const struct IUnknownVtbl xmlreaderinputvtbl
;
203 /* Structure to hold parsed string of specific length.
205 Reader stores node value as 'start' pointer, on request
206 a null-terminated version of it is allocated.
208 To init a strval variable use reader_init_strval(),
209 to set strval as a reader value use reader_set_strval().
213 WCHAR
*str
; /* allocated null-terminated string */
214 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
215 UINT start
; /* input position where value starts */
218 static WCHAR emptyW
[] = {0};
219 static const strval strval_empty
= { emptyW
};
237 IXmlReader IXmlReader_iface
;
239 xmlreaderinput
*input
;
242 XmlReaderInternalState instate
;
243 XmlReaderResumeState resumestate
;
244 XmlNodeType nodetype
;
245 DtdProcessing dtdmode
;
246 UINT line
, pos
; /* reader position in XML stream */
247 struct list attrs
; /* attributes list for current node */
248 struct attribute
*attr
; /* current attribute */
250 struct list elements
;
251 strval strvalues
[StringValue_Last
];
255 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
260 encoded_buffer utf16
;
261 encoded_buffer encoded
;
263 xmlreaderinput
*input
;
266 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
268 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
271 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
273 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
276 /* reader memory allocation functions */
277 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
279 return m_alloc(reader
->imalloc
, len
);
282 static inline void reader_free(xmlreader
*reader
, void *mem
)
284 m_free(reader
->imalloc
, mem
);
287 /* Just return pointer from offset, no attempt to read more. */
288 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
290 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
291 return (WCHAR
*)buffer
->data
+ offset
;
294 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
296 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
299 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
303 if (src
->str
!= strval_empty
.str
)
305 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
306 if (!dest
->str
) return E_OUTOFMEMORY
;
307 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
308 dest
->str
[dest
->len
] = 0;
315 /* reader input memory allocation functions */
316 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
318 return m_alloc(input
->imalloc
, len
);
321 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
323 return m_realloc(input
->imalloc
, mem
, len
);
326 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
328 m_free(input
->imalloc
, mem
);
331 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
338 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
339 ret
= readerinput_alloc(input
, size
);
340 if (ret
) memcpy(ret
, str
, size
);
346 static void reader_clear_attrs(xmlreader
*reader
)
348 struct attribute
*attr
, *attr2
;
349 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
351 reader_free(reader
, attr
);
353 list_init(&reader
->attrs
);
354 reader
->attr_count
= 0;
358 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
359 while we are on a node with attributes */
360 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*localname
, strval
*value
)
362 struct attribute
*attr
;
364 attr
= reader_alloc(reader
, sizeof(*attr
));
365 if (!attr
) return E_OUTOFMEMORY
;
367 attr
->localname
= *localname
;
368 attr
->value
= *value
;
369 list_add_tail(&reader
->attrs
, &attr
->entry
);
370 reader
->attr_count
++;
375 /* This one frees stored string value if needed */
376 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
378 if (v
->str
!= strval_empty
.str
)
380 reader_free(reader
, v
->str
);
385 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
392 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
394 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
397 /* used to initialize from constant string */
398 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
405 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
407 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
410 static void reader_free_strvalues(xmlreader
*reader
)
413 for (type
= 0; type
< StringValue_Last
; type
++)
414 reader_free_strvalue(reader
, type
);
417 /* This helper should only be used to test if strings are the same,
418 it doesn't try to sort. */
419 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
421 if (str1
->len
!= str2
->len
) return 0;
422 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
425 static void reader_clear_elements(xmlreader
*reader
)
427 struct element
*elem
, *elem2
;
428 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
430 reader_free_strvalued(reader
, &elem
->qname
);
431 reader_free(reader
, elem
);
433 list_init(&reader
->elements
);
434 reader
->empty_element
= FALSE
;
437 static HRESULT
reader_inc_depth(xmlreader
*reader
)
439 if (++reader
->depth
> reader
->max_depth
) return SC_E_MAXELEMENTDEPTH
;
443 static void reader_dec_depth(xmlreader
*reader
)
445 if (reader
->depth
> 1) reader
->depth
--;
448 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*qname
, strval
*localname
)
450 struct element
*elem
;
453 elem
= reader_alloc(reader
, sizeof(*elem
));
454 if (!elem
) return E_OUTOFMEMORY
;
456 hr
= reader_strvaldup(reader
, qname
, &elem
->qname
);
458 reader_free(reader
, elem
);
462 hr
= reader_strvaldup(reader
, localname
, &elem
->localname
);
465 reader_free_strvalued(reader
, &elem
->qname
);
466 reader_free(reader
, elem
);
470 if (!list_empty(&reader
->elements
))
472 hr
= reader_inc_depth(reader
);
474 reader_free(reader
, elem
);
479 list_add_head(&reader
->elements
, &elem
->entry
);
480 reader
->empty_element
= FALSE
;
484 static void reader_pop_element(xmlreader
*reader
)
486 struct element
*elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
490 list_remove(&elem
->entry
);
491 reader_free_strvalued(reader
, &elem
->qname
);
492 reader_free_strvalued(reader
, &elem
->localname
);
493 reader_free(reader
, elem
);
494 reader_dec_depth(reader
);
498 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
499 means node value is to be determined. */
500 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
502 strval
*v
= &reader
->strvalues
[type
];
504 reader_free_strvalue(reader
, type
);
513 if (value
->str
== strval_empty
.str
)
517 if (type
== StringValue_Value
)
519 /* defer allocation for value string */
521 v
->start
= value
->start
;
526 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
527 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
528 v
->str
[value
->len
] = 0;
534 static inline int is_reader_pending(xmlreader
*reader
)
536 return reader
->input
->pending
;
539 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
541 const int initial_len
= 0x2000;
542 buffer
->data
= readerinput_alloc(input
, initial_len
);
543 if (!buffer
->data
) return E_OUTOFMEMORY
;
545 memset(buffer
->data
, 0, 4);
547 buffer
->allocated
= initial_len
;
553 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
555 readerinput_free(input
, buffer
->data
);
558 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
560 if (encoding
== XmlEncoding_Unknown
)
562 FIXME("unsupported encoding %d\n", encoding
);
566 *cp
= xml_encoding_map
[encoding
].cp
;
571 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
575 if (!name
) return XmlEncoding_Unknown
;
578 max
= sizeof(xml_encoding_map
)/sizeof(struct xml_encoding_data
) - 1;
585 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
587 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
589 return xml_encoding_map
[n
].enc
;
597 return XmlEncoding_Unknown
;
600 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
602 input_buffer
*buffer
;
605 input
->buffer
= NULL
;
607 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
608 if (!buffer
) return E_OUTOFMEMORY
;
610 buffer
->input
= input
;
611 buffer
->code_page
= ~0; /* code page is unknown at this point */
612 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
614 readerinput_free(input
, buffer
);
618 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
620 free_encoded_buffer(input
, &buffer
->utf16
);
621 readerinput_free(input
, buffer
);
625 input
->buffer
= buffer
;
629 static void free_input_buffer(input_buffer
*buffer
)
631 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
632 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
633 readerinput_free(buffer
->input
, buffer
);
636 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
638 if (readerinput
->stream
) {
639 ISequentialStream_Release(readerinput
->stream
);
640 readerinput
->stream
= NULL
;
644 /* Queries already stored interface for IStream/ISequentialStream.
645 Interface supplied on creation will be overwritten */
646 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
650 readerinput_release_stream(readerinput
);
651 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
653 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
658 /* reads a chunk to raw buffer */
659 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
661 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
662 /* to make sure aligned length won't exceed allocated length */
663 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
667 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
668 variable width encodings like UTF-8 */
669 len
= (len
+ 3) & ~3;
670 /* try to use allocated space or grow */
671 if (buffer
->allocated
- buffer
->written
< len
)
673 buffer
->allocated
*= 2;
674 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
675 len
= buffer
->allocated
- buffer
->written
;
679 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
680 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
681 readerinput
->pending
= hr
== E_PENDING
;
682 if (FAILED(hr
)) return hr
;
683 buffer
->written
+= read
;
688 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
689 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
691 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
693 length
*= sizeof(WCHAR
);
694 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
695 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
697 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
698 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
699 buffer
->allocated
= grown_size
;
703 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
705 static const char startA
[] = {'<','?'};
706 static const char commentA
[] = {'<','!'};
707 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
708 unsigned char *ptr
= (unsigned char*)buffer
->data
;
710 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
711 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
712 /* test start byte */
715 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
716 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
717 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
718 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
722 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
724 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
725 static const WCHAR startW
[] = {'<','?'};
726 static const WCHAR commentW
[] = {'<','!'};
727 static const char utf8bom
[] = {0xef,0xbb,0xbf};
728 static const char utf16lebom
[] = {0xff,0xfe};
730 *enc
= XmlEncoding_Unknown
;
732 if (buffer
->written
<= 3)
734 HRESULT hr
= readerinput_growraw(readerinput
);
735 if (FAILED(hr
)) return hr
;
736 if (buffer
->written
<= 3) return MX_E_INPUTEND
;
739 /* try start symbols if we have enough data to do that, input buffer should contain
740 first chunk already */
741 if (readerinput_is_utf8(readerinput
))
742 *enc
= XmlEncoding_UTF8
;
743 else if (!memcmp(buffer
->data
, startW
, sizeof(startW
)) ||
744 !memcmp(buffer
->data
, commentW
, sizeof(commentW
)))
745 *enc
= XmlEncoding_UTF16
;
746 /* try with BOM now */
747 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
749 buffer
->cur
+= sizeof(utf8bom
);
750 *enc
= XmlEncoding_UTF8
;
752 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
754 buffer
->cur
+= sizeof(utf16lebom
);
755 *enc
= XmlEncoding_UTF16
;
761 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
763 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
764 int len
= buffer
->written
;
766 /* complete single byte char */
767 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
769 /* find start byte of multibyte char */
770 while (--len
&& !(buffer
->data
[len
] & 0xc0))
776 /* Returns byte length of complete char sequence for buffer code page,
777 it's relative to current buffer position which is currently used for BOM handling
779 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
781 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
784 if (readerinput
->buffer
->code_page
== CP_UTF8
)
785 len
= readerinput_get_utf8_convlen(readerinput
);
787 len
= buffer
->written
;
789 TRACE("%d\n", len
- buffer
->cur
);
790 return len
- buffer
->cur
;
793 /* It's possible that raw buffer has some leftovers from last conversion - some char
794 sequence that doesn't represent a full code point. Length argument should be calculated with
795 readerinput_get_convlen(), if it's -1 it will be calculated here. */
796 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
798 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
801 len
= readerinput_get_convlen(readerinput
);
803 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
804 /* everything below cur is lost too */
805 buffer
->written
-= len
+ buffer
->cur
;
806 /* after this point we don't need cur offset really,
807 it's used only to mark where actual data begins when first chunk is read */
811 /* note that raw buffer content is kept */
812 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
814 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
815 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
821 hr
= get_code_page(enc
, &cp
);
822 if (FAILED(hr
)) return;
824 readerinput
->buffer
->code_page
= cp
;
825 len
= readerinput_get_convlen(readerinput
);
827 TRACE("switching to cp %d\n", cp
);
829 /* just copy in this case */
830 if (enc
== XmlEncoding_UTF16
)
832 readerinput_grow(readerinput
, len
);
833 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
834 dest
->written
+= len
*sizeof(WCHAR
);
838 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
839 readerinput_grow(readerinput
, dest_len
);
840 ptr
= (WCHAR
*)dest
->data
;
841 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
843 dest
->written
+= dest_len
*sizeof(WCHAR
);
846 /* shrinks parsed data a buffer begins with */
847 static void reader_shrink(xmlreader
*reader
)
849 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
851 /* avoid to move too often using threshold shrink length */
852 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
854 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
855 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
857 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
861 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
862 It won't attempt to shrink but will grow destination buffer if needed */
863 static HRESULT
reader_more(xmlreader
*reader
)
865 xmlreaderinput
*readerinput
= reader
->input
;
866 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
867 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
868 UINT cp
= readerinput
->buffer
->code_page
;
873 /* get some raw data from stream first */
874 hr
= readerinput_growraw(readerinput
);
875 len
= readerinput_get_convlen(readerinput
);
877 /* just copy for UTF-16 case */
880 readerinput_grow(readerinput
, len
);
881 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
882 dest
->written
+= len
*sizeof(WCHAR
);
886 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
887 readerinput_grow(readerinput
, dest_len
);
888 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
889 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
891 dest
->written
+= dest_len
*sizeof(WCHAR
);
892 /* get rid of processed data */
893 readerinput_shrinkraw(readerinput
, len
);
898 static inline UINT
reader_get_cur(xmlreader
*reader
)
900 return reader
->input
->buffer
->utf16
.cur
;
903 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
905 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
906 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
907 if (!*ptr
) reader_more(reader
);
908 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
911 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
914 const WCHAR
*ptr
= reader_get_ptr(reader
);
920 ptr
= reader_get_ptr(reader
);
922 if (str
[i
] != ptr
[i
])
923 return ptr
[i
] - str
[i
];
929 /* moves cursor n WCHARs forward */
930 static void reader_skipn(xmlreader
*reader
, int n
)
932 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
933 const WCHAR
*ptr
= reader_get_ptr(reader
);
935 while (*ptr
++ && n
--)
942 static inline BOOL
is_wchar_space(WCHAR ch
)
944 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
947 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
948 static int reader_skipspaces(xmlreader
*reader
)
950 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
951 const WCHAR
*ptr
= reader_get_ptr(reader
);
952 UINT start
= reader_get_cur(reader
);
954 while (is_wchar_space(*ptr
))
958 else if (*ptr
== '\n')
967 ptr
= reader_get_ptr(reader
);
970 return reader_get_cur(reader
) - start
;
973 /* [26] VersionNum ::= '1.' [0-9]+ */
974 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
976 static const WCHAR onedotW
[] = {'1','.',0};
980 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
982 start
= reader_get_cur(reader
);
984 reader_skipn(reader
, 2);
986 ptr2
= ptr
= reader_get_ptr(reader
);
987 while (*ptr
>= '0' && *ptr
<= '9')
989 reader_skipn(reader
, 1);
990 ptr
= reader_get_ptr(reader
);
993 if (ptr2
== ptr
) return WC_E_DIGIT
;
994 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
995 TRACE("version=%s\n", debug_strval(reader
, val
));
999 /* [25] Eq ::= S? '=' S? */
1000 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1002 static const WCHAR eqW
[] = {'=',0};
1003 reader_skipspaces(reader
);
1004 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
1006 reader_skipn(reader
, 1);
1007 reader_skipspaces(reader
);
1011 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1012 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1014 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
1018 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1020 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
1021 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1022 /* skip 'version' */
1023 reader_skipn(reader
, 7);
1025 hr
= reader_parse_eq(reader
);
1026 if (FAILED(hr
)) return hr
;
1028 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1031 reader_skipn(reader
, 1);
1033 hr
= reader_parse_versionnum(reader
, &val
);
1034 if (FAILED(hr
)) return hr
;
1036 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1040 reader_skipn(reader
, 1);
1042 return reader_add_attr(reader
, &name
, &val
);
1045 /* ([A-Za-z0-9._] | '-') */
1046 static inline BOOL
is_wchar_encname(WCHAR ch
)
1048 return ((ch
>= 'A' && ch
<= 'Z') ||
1049 (ch
>= 'a' && ch
<= 'z') ||
1050 (ch
>= '0' && ch
<= '9') ||
1051 (ch
== '.') || (ch
== '_') ||
1055 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1056 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1058 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1062 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1063 return WC_E_ENCNAME
;
1065 val
->start
= reader_get_cur(reader
);
1068 while (is_wchar_encname(*++ptr
))
1072 enc
= parse_encoding_name(start
, len
);
1073 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1077 if (enc
== XmlEncoding_Unknown
)
1078 return WC_E_ENCNAME
;
1080 /* skip encoding name */
1081 reader_skipn(reader
, len
);
1085 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1086 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1088 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1092 if (!reader_skipspaces(reader
)) return S_FALSE
;
1094 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1095 name
.str
= reader_get_ptr(reader
);
1096 name
.start
= reader_get_cur(reader
);
1098 /* skip 'encoding' */
1099 reader_skipn(reader
, 8);
1101 hr
= reader_parse_eq(reader
);
1102 if (FAILED(hr
)) return hr
;
1104 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1107 reader_skipn(reader
, 1);
1109 hr
= reader_parse_encname(reader
, &val
);
1110 if (FAILED(hr
)) return hr
;
1112 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1116 reader_skipn(reader
, 1);
1118 return reader_add_attr(reader
, &name
, &val
);
1121 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1122 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1124 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1125 static const WCHAR yesW
[] = {'y','e','s',0};
1126 static const WCHAR noW
[] = {'n','o',0};
1131 if (!reader_skipspaces(reader
)) return S_FALSE
;
1133 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1134 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1135 /* skip 'standalone' */
1136 reader_skipn(reader
, 10);
1138 hr
= reader_parse_eq(reader
);
1139 if (FAILED(hr
)) return hr
;
1141 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1144 reader_skipn(reader
, 1);
1146 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1147 return WC_E_XMLDECL
;
1149 start
= reader_get_cur(reader
);
1150 /* skip 'yes'|'no' */
1151 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1152 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1153 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1155 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1158 reader_skipn(reader
, 1);
1160 return reader_add_attr(reader
, &name
, &val
);
1163 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1164 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1166 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1167 static const WCHAR declcloseW
[] = {'?','>',0};
1170 /* check if we have "<?xml " */
1171 if (reader_cmp(reader
, xmldeclW
)) return S_FALSE
;
1173 reader_skipn(reader
, 5);
1174 hr
= reader_parse_versioninfo(reader
);
1178 hr
= reader_parse_encdecl(reader
);
1182 hr
= reader_parse_sddecl(reader
);
1186 reader_skipspaces(reader
);
1187 if (reader_cmp(reader
, declcloseW
)) return WC_E_XMLDECL
;
1188 reader_skipn(reader
, 2);
1190 reader_inc_depth(reader
);
1191 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1192 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1193 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1194 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1199 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1200 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1205 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1207 start
= reader
->resume
[XmlReadResume_Body
];
1208 ptr
= reader_get_ptr(reader
);
1213 reader_skipn(reader
, 4);
1214 reader_shrink(reader
);
1215 ptr
= reader_get_ptr(reader
);
1216 start
= reader_get_cur(reader
);
1217 reader
->nodetype
= XmlNodeType_Comment
;
1218 reader
->resume
[XmlReadResume_Body
] = start
;
1219 reader
->resumestate
= XmlReadResumeState_Comment
;
1220 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
1221 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
1222 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1225 /* will exit when there's no more data, it won't attempt to
1226 read more from stream */
1237 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1238 TRACE("%s\n", debug_strval(reader
, &value
));
1240 /* skip rest of markup '->' */
1241 reader_skipn(reader
, 3);
1243 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1244 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1245 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1246 reader
->resume
[XmlReadResume_Body
] = 0;
1247 reader
->resumestate
= XmlReadResumeState_Initial
;
1251 return WC_E_COMMENT
;
1255 reader_skipn(reader
, 1);
1262 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1263 static inline BOOL
is_char(WCHAR ch
)
1265 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1266 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1267 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1268 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1269 (ch
>= 0xe000 && ch
<= 0xfffd);
1272 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1273 static inline BOOL
is_pubchar(WCHAR ch
)
1275 return (ch
== ' ') ||
1276 (ch
>= 'a' && ch
<= 'z') ||
1277 (ch
>= 'A' && ch
<= 'Z') ||
1278 (ch
>= '0' && ch
<= '9') ||
1279 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1280 (ch
== '=') || (ch
== '?') ||
1281 (ch
== '@') || (ch
== '!') ||
1282 (ch
>= '#' && ch
<= '%') || /* #$% */
1283 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1286 static inline BOOL
is_namestartchar(WCHAR ch
)
1288 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1289 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1290 (ch
>= 0xc0 && ch
<= 0xd6) ||
1291 (ch
>= 0xd8 && ch
<= 0xf6) ||
1292 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1293 (ch
>= 0x370 && ch
<= 0x37d) ||
1294 (ch
>= 0x37f && ch
<= 0x1fff) ||
1295 (ch
>= 0x200c && ch
<= 0x200d) ||
1296 (ch
>= 0x2070 && ch
<= 0x218f) ||
1297 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1298 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1299 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1300 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1301 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1302 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1305 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1306 static inline BOOL
is_ncnamechar(WCHAR ch
)
1308 return (ch
>= 'A' && ch
<= 'Z') ||
1309 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1310 (ch
== '-') || (ch
== '.') ||
1311 (ch
>= '0' && ch
<= '9') ||
1313 (ch
>= 0xc0 && ch
<= 0xd6) ||
1314 (ch
>= 0xd8 && ch
<= 0xf6) ||
1315 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1316 (ch
>= 0x300 && ch
<= 0x36f) ||
1317 (ch
>= 0x370 && ch
<= 0x37d) ||
1318 (ch
>= 0x37f && ch
<= 0x1fff) ||
1319 (ch
>= 0x200c && ch
<= 0x200d) ||
1320 (ch
>= 0x203f && ch
<= 0x2040) ||
1321 (ch
>= 0x2070 && ch
<= 0x218f) ||
1322 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1323 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1324 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1325 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1326 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1327 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1330 static inline BOOL
is_namechar(WCHAR ch
)
1332 return (ch
== ':') || is_ncnamechar(ch
);
1335 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1337 /* When we're on attribute always return attribute type, container node type is kept.
1338 Note that container is not necessarily an element, and attribute doesn't mean it's
1339 an attribute in XML spec terms. */
1340 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1343 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1344 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1345 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1346 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1347 [5] Name ::= NameStartChar (NameChar)* */
1348 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1353 if (reader
->resume
[XmlReadResume_Name
])
1355 start
= reader
->resume
[XmlReadResume_Name
];
1356 ptr
= reader_get_ptr(reader
);
1360 ptr
= reader_get_ptr(reader
);
1361 start
= reader_get_cur(reader
);
1362 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1365 while (is_namechar(*ptr
))
1367 reader_skipn(reader
, 1);
1368 ptr
= reader_get_ptr(reader
);
1371 if (is_reader_pending(reader
))
1373 reader
->resume
[XmlReadResume_Name
] = start
;
1377 reader
->resume
[XmlReadResume_Name
] = 0;
1379 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1380 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1385 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1386 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1388 static const WCHAR xmlW
[] = {'x','m','l'};
1389 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1395 hr
= reader_parse_name(reader
, &name
);
1396 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1398 /* now that we got name check for illegal content */
1399 if (strval_eq(reader
, &name
, &xmlval
))
1400 return WC_E_LEADINGXML
;
1402 /* PITarget can't be a qualified name */
1403 ptr
= reader_get_strptr(reader
, &name
);
1404 for (i
= 0; i
< name
.len
; i
++)
1406 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1408 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1413 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1414 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1421 switch (reader
->resumestate
)
1423 case XmlReadResumeState_Initial
:
1425 reader_skipn(reader
, 2);
1426 reader_shrink(reader
);
1427 reader
->resumestate
= XmlReadResumeState_PITarget
;
1428 case XmlReadResumeState_PITarget
:
1429 hr
= reader_parse_pitarget(reader
, &target
);
1430 if (FAILED(hr
)) return hr
;
1431 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1432 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1433 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1434 reader
->resumestate
= XmlReadResumeState_PIBody
;
1435 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1440 start
= reader
->resume
[XmlReadResume_Body
];
1441 ptr
= reader_get_ptr(reader
);
1448 UINT cur
= reader_get_cur(reader
);
1451 /* strip all leading whitespace chars */
1454 ptr
= reader_get_ptr2(reader
, start
);
1455 if (!is_wchar_space(*ptr
)) break;
1459 reader_init_strvalue(start
, cur
-start
, &value
);
1462 reader_skipn(reader
, 2);
1463 TRACE("%s\n", debug_strval(reader
, &value
));
1464 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1465 reader
->resumestate
= XmlReadResumeState_Initial
;
1466 reader
->resume
[XmlReadResume_Body
] = 0;
1467 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1472 reader_skipn(reader
, 1);
1473 ptr
= reader_get_ptr(reader
);
1479 /* This one is used to parse significant whitespace nodes, like in Misc production */
1480 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1482 switch (reader
->resumestate
)
1484 case XmlReadResumeState_Initial
:
1485 reader_shrink(reader
);
1486 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1487 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1488 reader
->nodetype
= XmlNodeType_Whitespace
;
1489 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1490 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1491 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1493 case XmlReadResumeState_Whitespace
:
1498 reader_skipspaces(reader
);
1499 if (is_reader_pending(reader
)) return S_OK
;
1501 start
= reader
->resume
[XmlReadResume_Body
];
1502 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1503 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1504 TRACE("%s\n", debug_strval(reader
, &value
));
1505 reader
->resumestate
= XmlReadResumeState_Initial
;
1514 /* [27] Misc ::= Comment | PI | S */
1515 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1517 HRESULT hr
= S_FALSE
;
1519 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1521 hr
= reader_more(reader
);
1522 if (FAILED(hr
)) return hr
;
1524 /* finish current node */
1525 switch (reader
->resumestate
)
1527 case XmlReadResumeState_PITarget
:
1528 case XmlReadResumeState_PIBody
:
1529 return reader_parse_pi(reader
);
1530 case XmlReadResumeState_Comment
:
1531 return reader_parse_comment(reader
);
1532 case XmlReadResumeState_Whitespace
:
1533 return reader_parse_whitespace(reader
);
1535 ERR("unknown resume state %d\n", reader
->resumestate
);
1541 const WCHAR
*cur
= reader_get_ptr(reader
);
1543 if (is_wchar_space(*cur
))
1544 hr
= reader_parse_whitespace(reader
);
1545 else if (!reader_cmp(reader
, commentW
))
1546 hr
= reader_parse_comment(reader
);
1547 else if (!reader_cmp(reader
, piW
))
1548 hr
= reader_parse_pi(reader
);
1552 if (hr
!= S_FALSE
) return hr
;
1558 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1559 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1561 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1564 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1567 reader_skipn(reader
, 1);
1569 cur
= reader_get_ptr(reader
);
1570 start
= reader_get_cur(reader
);
1571 while (is_char(*cur
) && *cur
!= quote
)
1573 reader_skipn(reader
, 1);
1574 cur
= reader_get_ptr(reader
);
1576 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1577 if (*cur
== quote
) reader_skipn(reader
, 1);
1579 TRACE("%s\n", debug_strval(reader
, literal
));
1583 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1584 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1585 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1587 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1590 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1593 reader_skipn(reader
, 1);
1595 start
= reader_get_cur(reader
);
1596 cur
= reader_get_ptr(reader
);
1597 while (is_pubchar(*cur
) && *cur
!= quote
)
1599 reader_skipn(reader
, 1);
1600 cur
= reader_get_ptr(reader
);
1603 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1604 TRACE("%s\n", debug_strval(reader
, literal
));
1608 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1609 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1611 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1612 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1617 if (reader_cmp(reader
, systemW
))
1619 if (reader_cmp(reader
, publicW
))
1626 reader_skipn(reader
, 6);
1627 cnt
= reader_skipspaces(reader
);
1628 if (!cnt
) return WC_E_WHITESPACE
;
1630 hr
= reader_parse_pub_literal(reader
, &pub
);
1631 if (FAILED(hr
)) return hr
;
1633 reader_init_cstrvalue(publicW
, strlenW(publicW
), &name
);
1634 return reader_add_attr(reader
, &name
, &pub
);
1642 reader_skipn(reader
, 6);
1643 cnt
= reader_skipspaces(reader
);
1644 if (!cnt
) return WC_E_WHITESPACE
;
1646 hr
= reader_parse_sys_literal(reader
, &sys
);
1647 if (FAILED(hr
)) return hr
;
1649 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1650 return reader_add_attr(reader
, &name
, &sys
);
1656 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1657 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1659 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1664 /* check if we have "<!DOCTYPE" */
1665 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1666 reader_shrink(reader
);
1668 /* DTD processing is not allowed by default */
1669 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1671 reader_skipn(reader
, 9);
1672 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1675 hr
= reader_parse_name(reader
, &name
);
1676 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1678 reader_skipspaces(reader
);
1680 hr
= reader_parse_externalid(reader
);
1681 if (FAILED(hr
)) return hr
;
1683 reader_skipspaces(reader
);
1685 cur
= reader_get_ptr(reader
);
1688 FIXME("internal subset parsing not implemented\n");
1693 reader_skipn(reader
, 1);
1695 reader
->nodetype
= XmlNodeType_DocumentType
;
1696 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1697 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1702 /* [11 NS] LocalPart ::= NCName */
1703 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
)
1708 if (reader
->resume
[XmlReadResume_Local
])
1710 start
= reader
->resume
[XmlReadResume_Local
];
1711 ptr
= reader_get_ptr(reader
);
1715 ptr
= reader_get_ptr(reader
);
1716 start
= reader_get_cur(reader
);
1719 while (is_ncnamechar(*ptr
))
1721 reader_skipn(reader
, 1);
1722 ptr
= reader_get_ptr(reader
);
1725 if (is_reader_pending(reader
))
1727 reader
->resume
[XmlReadResume_Local
] = start
;
1731 reader
->resume
[XmlReadResume_Local
] = 0;
1733 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1738 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1739 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1740 [9 NS] UnprefixedName ::= LocalPart
1741 [10 NS] Prefix ::= NCName */
1742 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1748 if (reader
->resume
[XmlReadResume_Name
])
1750 start
= reader
->resume
[XmlReadResume_Name
];
1751 ptr
= reader_get_ptr(reader
);
1755 ptr
= reader_get_ptr(reader
);
1756 start
= reader_get_cur(reader
);
1757 reader
->resume
[XmlReadResume_Name
] = start
;
1758 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1761 if (reader
->resume
[XmlReadResume_Local
])
1763 hr
= reader_parse_local(reader
, local
);
1764 if (FAILED(hr
)) return hr
;
1766 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1767 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1772 /* skip prefix part */
1773 while (is_ncnamechar(*ptr
))
1775 reader_skipn(reader
, 1);
1776 ptr
= reader_get_ptr(reader
);
1779 if (is_reader_pending(reader
)) return E_PENDING
;
1781 /* got a qualified name */
1784 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
1787 reader_skipn(reader
, 1);
1788 hr
= reader_parse_local(reader
, local
);
1789 if (FAILED(hr
)) return hr
;
1793 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
1794 reader_init_strvalue(0, 0, prefix
);
1798 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1801 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
1803 TRACE("ncname %s\n", debug_strval(reader
, local
));
1805 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
1807 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
1810 reader
->resume
[XmlReadResume_Name
] = 0;
1811 reader
->resume
[XmlReadResume_Local
] = 0;
1816 /* Applies normalization rules to a single char, used for attribute values.
1818 Rules include 2 steps:
1820 1) replacing \r\n with a single \n;
1821 2) replacing all whitespace chars with ' '.
1824 static void reader_normalize_space(xmlreader
*reader
, WCHAR
*ptr
)
1826 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1828 if (!is_wchar_space(*ptr
)) return;
1830 if (*ptr
== '\r' && *(ptr
+1) == '\n')
1832 int len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - 2*sizeof(WCHAR
);
1833 memmove(ptr
+1, ptr
+2, len
);
1838 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
1840 static const WCHAR entltW
[] = {'l','t'};
1841 static const WCHAR entgtW
[] = {'g','t'};
1842 static const WCHAR entampW
[] = {'a','m','p'};
1843 static const WCHAR entaposW
[] = {'a','p','o','s'};
1844 static const WCHAR entquotW
[] = {'q','u','o','t'};
1845 static const strval lt
= { (WCHAR
*)entltW
, 2 };
1846 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
1847 static const strval amp
= { (WCHAR
*)entampW
, 3 };
1848 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
1849 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
1850 WCHAR
*str
= reader_get_strptr(reader
, name
);
1855 if (strval_eq(reader
, name
, <
)) return '<';
1858 if (strval_eq(reader
, name
, >
)) return '>';
1861 if (strval_eq(reader
, name
, &
))
1863 else if (strval_eq(reader
, name
, &apos
))
1867 if (strval_eq(reader
, name
, "
)) return '\"';
1876 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1877 [67] Reference ::= EntityRef | CharRef
1878 [68] EntityRef ::= '&' Name ';' */
1879 static HRESULT
reader_parse_reference(xmlreader
*reader
)
1881 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1882 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1883 UINT cur
= reader_get_cur(reader
);
1888 reader_skipn(reader
, 1);
1889 ptr
= reader_get_ptr(reader
);
1893 reader_skipn(reader
, 1);
1894 ptr
= reader_get_ptr(reader
);
1896 /* hex char or decimal */
1899 reader_skipn(reader
, 1);
1900 ptr
= reader_get_ptr(reader
);
1904 if ((*ptr
>= '0' && *ptr
<= '9'))
1905 ch
= ch
*16 + *ptr
- '0';
1906 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
1907 ch
= ch
*16 + *ptr
- 'a' + 10;
1908 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
1909 ch
= ch
*16 + *ptr
- 'A' + 10;
1911 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
1912 reader_skipn(reader
, 1);
1913 ptr
= reader_get_ptr(reader
);
1920 if ((*ptr
>= '0' && *ptr
<= '9'))
1922 ch
= ch
*10 + *ptr
- '0';
1923 reader_skipn(reader
, 1);
1924 ptr
= reader_get_ptr(reader
);
1927 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
1931 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
1934 if (is_wchar_space(ch
)) ch
= ' ';
1936 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1937 memmove(start
+1, ptr
+1, len
);
1938 buffer
->cur
= cur
+ 1;
1947 hr
= reader_parse_name(reader
, &name
);
1948 if (FAILED(hr
)) return hr
;
1950 ptr
= reader_get_ptr(reader
);
1951 if (*ptr
!= ';') return WC_E_SEMICOLON
;
1953 /* predefined entities resolve to a single character */
1954 ch
= get_predefined_entity(reader
, &name
);
1957 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1958 memmove(start
+1, ptr
+1, len
);
1959 buffer
->cur
= cur
+ 1;
1965 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
1966 return WC_E_UNDECLAREDENTITY
;
1974 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1975 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
1980 ptr
= reader_get_ptr(reader
);
1982 /* skip opening quote */
1984 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
1985 reader_skipn(reader
, 1);
1987 ptr
= reader_get_ptr(reader
);
1988 start
= reader_get_cur(reader
);
1991 if (*ptr
== '<') return WC_E_LESSTHAN
;
1995 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
1996 /* skip closing quote */
1997 reader_skipn(reader
, 1);
2003 HRESULT hr
= reader_parse_reference(reader
);
2004 if (FAILED(hr
)) return hr
;
2008 reader_normalize_space(reader
, ptr
);
2009 reader_skipn(reader
, 1);
2011 ptr
= reader_get_ptr(reader
);
2017 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2018 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2019 [3 NS] DefaultAttName ::= 'xmlns'
2020 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2021 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2023 static const WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
2024 strval prefix
, local
, qname
, xmlns
, value
;
2027 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2028 if (FAILED(hr
)) return hr
;
2030 reader_init_cstrvalue((WCHAR
*)xmlnsW
, 5, &xmlns
);
2032 if (strval_eq(reader
, &prefix
, &xmlns
))
2034 FIXME("namespace definitions not supported\n");
2038 if (strval_eq(reader
, &qname
, &xmlns
))
2039 FIXME("default namespace definitions not supported\n");
2041 hr
= reader_parse_eq(reader
);
2042 if (FAILED(hr
)) return hr
;
2044 hr
= reader_parse_attvalue(reader
, &value
);
2045 if (FAILED(hr
)) return hr
;
2047 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2048 return reader_add_attr(reader
, &local
, &value
);
2051 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2052 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2053 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
, int *empty
)
2057 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2058 if (FAILED(hr
)) return hr
;
2062 static const WCHAR endW
[] = {'/','>',0};
2064 reader_skipspaces(reader
);
2067 if ((*empty
= !reader_cmp(reader
, endW
)))
2070 reader_skipn(reader
, 2);
2071 reader
->empty_element
= TRUE
;
2075 /* got a start tag */
2076 if (!reader_cmp(reader
, gtW
))
2079 reader_skipn(reader
, 1);
2080 return reader_push_element(reader
, qname
, local
);
2083 hr
= reader_parse_attribute(reader
);
2084 if (FAILED(hr
)) return hr
;
2090 /* [39] element ::= EmptyElemTag | STag content ETag */
2091 static HRESULT
reader_parse_element(xmlreader
*reader
)
2095 switch (reader
->resumestate
)
2097 case XmlReadResumeState_Initial
:
2098 /* check if we are really on element */
2099 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2102 reader_skipn(reader
, 1);
2104 reader_shrink(reader
);
2105 reader
->resumestate
= XmlReadResumeState_STag
;
2106 case XmlReadResumeState_STag
:
2108 strval qname
, prefix
, local
;
2111 /* this handles empty elements too */
2112 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
, &empty
);
2113 if (FAILED(hr
)) return hr
;
2115 /* FIXME: need to check for defined namespace to reject invalid prefix,
2116 currently reject all prefixes */
2117 if (prefix
.len
) return NC_E_UNDECLAREDPREFIX
;
2119 /* if we got empty element and stack is empty go straight to Misc */
2120 if (empty
&& list_empty(&reader
->elements
))
2121 reader
->instate
= XmlReadInState_MiscEnd
;
2123 reader
->instate
= XmlReadInState_Content
;
2125 reader
->nodetype
= XmlNodeType_Element
;
2126 reader
->resumestate
= XmlReadResumeState_Initial
;
2127 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2128 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2129 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2139 /* [13 NS] ETag ::= '</' QName S? '>' */
2140 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2142 strval prefix
, local
, qname
;
2143 struct element
*elem
;
2147 reader_skipn(reader
, 2);
2149 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2150 if (FAILED(hr
)) return hr
;
2152 reader_skipspaces(reader
);
2154 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2157 reader_skipn(reader
, 1);
2159 /* Element stack should never be empty at this point, cause we shouldn't get to
2160 content parsing if it's empty. */
2161 elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2162 if (!strval_eq(reader
, &elem
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2164 reader_pop_element(reader
);
2166 /* It was a root element, the rest is expected as Misc */
2167 if (list_empty(&reader
->elements
))
2168 reader
->instate
= XmlReadInState_MiscEnd
;
2170 reader
->nodetype
= XmlNodeType_EndElement
;
2171 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2172 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2177 /* [18] CDSect ::= CDStart CData CDEnd
2178 [19] CDStart ::= '<![CDATA['
2179 [20] CData ::= (Char* - (Char* ']]>' Char*))
2180 [21] CDEnd ::= ']]>' */
2181 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2186 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2188 start
= reader
->resume
[XmlReadResume_Body
];
2189 ptr
= reader_get_ptr(reader
);
2193 /* skip markup '<![CDATA[' */
2194 reader_skipn(reader
, 9);
2195 reader_shrink(reader
);
2196 ptr
= reader_get_ptr(reader
);
2197 start
= reader_get_cur(reader
);
2198 reader
->nodetype
= XmlNodeType_CDATA
;
2199 reader
->resume
[XmlReadResume_Body
] = start
;
2200 reader
->resumestate
= XmlReadResumeState_CDATA
;
2201 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
2202 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
2203 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2208 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2212 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2215 reader_skipn(reader
, 3);
2216 TRACE("%s\n", debug_strval(reader
, &value
));
2218 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2219 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2220 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2221 reader
->resume
[XmlReadResume_Body
] = 0;
2222 reader
->resumestate
= XmlReadResumeState_Initial
;
2227 /* Value normalization is not fully implemented, rules are:
2229 - single '\r' -> '\n';
2230 - sequence '\r\n' -> '\n', in this case value length changes;
2232 if (*ptr
== '\r') *ptr
= '\n';
2233 reader_skipn(reader
, 1);
2241 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2242 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2247 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2249 start
= reader
->resume
[XmlReadResume_Body
];
2250 ptr
= reader_get_ptr(reader
);
2254 reader_shrink(reader
);
2255 ptr
= reader_get_ptr(reader
);
2256 start
= reader_get_cur(reader
);
2257 /* There's no text */
2258 if (!*ptr
|| *ptr
== '<') return S_OK
;
2259 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2260 reader
->resume
[XmlReadResume_Body
] = start
;
2261 reader
->resumestate
= XmlReadResumeState_CharData
;
2262 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2263 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2264 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2269 /* CDATA closing sequence ']]>' is not allowed */
2270 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2271 return WC_E_CDSECTEND
;
2273 /* Found next markup part */
2278 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2279 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2280 reader
->resume
[XmlReadResume_Body
] = 0;
2281 reader
->resumestate
= XmlReadResumeState_Initial
;
2285 reader_skipn(reader
, 1);
2287 /* this covers a case when text has leading whitespace chars */
2288 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2295 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2296 static HRESULT
reader_parse_content(xmlreader
*reader
)
2298 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2299 static const WCHAR etagW
[] = {'<','/',0};
2300 static const WCHAR ampW
[] = {'&',0};
2302 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2304 switch (reader
->resumestate
)
2306 case XmlReadResumeState_CDATA
:
2307 return reader_parse_cdata(reader
);
2308 case XmlReadResumeState_Comment
:
2309 return reader_parse_comment(reader
);
2310 case XmlReadResumeState_PIBody
:
2311 case XmlReadResumeState_PITarget
:
2312 return reader_parse_pi(reader
);
2313 case XmlReadResumeState_CharData
:
2314 return reader_parse_chardata(reader
);
2316 ERR("unknown resume state %d\n", reader
->resumestate
);
2320 reader_shrink(reader
);
2322 /* handle end tag here, it indicates end of content as well */
2323 if (!reader_cmp(reader
, etagW
))
2324 return reader_parse_endtag(reader
);
2326 if (!reader_cmp(reader
, commentW
))
2327 return reader_parse_comment(reader
);
2329 if (!reader_cmp(reader
, piW
))
2330 return reader_parse_pi(reader
);
2332 if (!reader_cmp(reader
, cdstartW
))
2333 return reader_parse_cdata(reader
);
2335 if (!reader_cmp(reader
, ampW
))
2336 return reader_parse_reference(reader
);
2338 if (!reader_cmp(reader
, ltW
))
2339 return reader_parse_element(reader
);
2341 /* what's left must be CharData */
2342 return reader_parse_chardata(reader
);
2345 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2349 if (!is_reader_pending(reader
))
2350 reader_clear_attrs(reader
);
2354 switch (reader
->instate
)
2356 /* if it's a first call for a new input we need to detect stream encoding */
2357 case XmlReadInState_Initial
:
2361 hr
= readerinput_growraw(reader
->input
);
2362 if (FAILED(hr
)) return hr
;
2364 /* try to detect encoding by BOM or data and set input code page */
2365 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2366 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2367 if (FAILED(hr
)) return hr
;
2369 /* always switch first time cause we have to put something in */
2370 readerinput_switchencoding(reader
->input
, enc
);
2372 /* parse xml declaration */
2373 hr
= reader_parse_xmldecl(reader
);
2374 if (FAILED(hr
)) return hr
;
2376 readerinput_shrinkraw(reader
->input
, -1);
2377 reader
->instate
= XmlReadInState_Misc_DTD
;
2378 if (hr
== S_OK
) return hr
;
2381 case XmlReadInState_Misc_DTD
:
2382 hr
= reader_parse_misc(reader
);
2383 if (FAILED(hr
)) return hr
;
2386 reader
->instate
= XmlReadInState_DTD
;
2390 case XmlReadInState_DTD
:
2391 hr
= reader_parse_dtd(reader
);
2392 if (FAILED(hr
)) return hr
;
2396 reader
->instate
= XmlReadInState_DTD_Misc
;
2400 reader
->instate
= XmlReadInState_Element
;
2402 case XmlReadInState_DTD_Misc
:
2403 hr
= reader_parse_misc(reader
);
2404 if (FAILED(hr
)) return hr
;
2407 reader
->instate
= XmlReadInState_Element
;
2411 case XmlReadInState_Element
:
2412 return reader_parse_element(reader
);
2413 case XmlReadInState_Content
:
2414 return reader_parse_content(reader
);
2415 case XmlReadInState_MiscEnd
:
2416 hr
= reader_parse_misc(reader
);
2417 if (FAILED(hr
)) return hr
;
2420 reader
->instate
= XmlReadInState_Eof
;
2422 case XmlReadInState_Eof
:
2425 FIXME("internal state %d not handled\n", reader
->instate
);
2433 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2435 xmlreader
*This
= impl_from_IXmlReader(iface
);
2437 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2439 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2440 IsEqualGUID(riid
, &IID_IXmlReader
))
2446 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2448 return E_NOINTERFACE
;
2451 IXmlReader_AddRef(iface
);
2456 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2458 xmlreader
*This
= impl_from_IXmlReader(iface
);
2459 ULONG ref
= InterlockedIncrement(&This
->ref
);
2460 TRACE("(%p)->(%d)\n", This
, ref
);
2464 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2466 xmlreader
*This
= impl_from_IXmlReader(iface
);
2467 LONG ref
= InterlockedDecrement(&This
->ref
);
2469 TRACE("(%p)->(%d)\n", This
, ref
);
2473 IMalloc
*imalloc
= This
->imalloc
;
2474 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2475 reader_clear_attrs(This
);
2476 reader_clear_elements(This
);
2477 reader_free_strvalues(This
);
2478 reader_free(This
, This
);
2479 if (imalloc
) IMalloc_Release(imalloc
);
2485 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2487 xmlreader
*This
= impl_from_IXmlReader(iface
);
2488 IXmlReaderInput
*readerinput
;
2491 TRACE("(%p)->(%p)\n", This
, input
);
2495 readerinput_release_stream(This
->input
);
2496 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2500 This
->line
= This
->pos
= 0;
2501 reader_clear_elements(This
);
2503 This
->resumestate
= XmlReadResumeState_Initial
;
2504 memset(This
->resume
, 0, sizeof(This
->resume
));
2506 /* just reset current input */
2509 This
->state
= XmlReadState_Initial
;
2513 /* now try IXmlReaderInput, ISequentialStream, IStream */
2514 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2517 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2518 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2521 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2522 readerinput
, readerinput
->lpVtbl
);
2523 IUnknown_Release(readerinput
);
2529 if (hr
!= S_OK
|| !readerinput
)
2531 /* create IXmlReaderInput basing on supplied interface */
2532 hr
= CreateXmlReaderInputWithEncodingName(input
,
2533 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2534 if (hr
!= S_OK
) return hr
;
2535 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2538 /* set stream for supplied IXmlReaderInput */
2539 hr
= readerinput_query_for_stream(This
->input
);
2542 This
->state
= XmlReadState_Initial
;
2543 This
->instate
= XmlReadInState_Initial
;
2549 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2551 xmlreader
*This
= impl_from_IXmlReader(iface
);
2553 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2555 if (!value
) return E_INVALIDARG
;
2559 case XmlReaderProperty_DtdProcessing
:
2560 *value
= This
->dtdmode
;
2562 case XmlReaderProperty_ReadState
:
2563 *value
= This
->state
;
2566 FIXME("Unimplemented property (%u)\n", property
);
2573 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2575 xmlreader
*This
= impl_from_IXmlReader(iface
);
2577 TRACE("(%p)->(%s %lu)\n", This
, debugstr_reader_prop(property
), value
);
2581 case XmlReaderProperty_DtdProcessing
:
2582 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2583 This
->dtdmode
= value
;
2586 FIXME("Unimplemented property (%u)\n", property
);
2593 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2595 xmlreader
*This
= impl_from_IXmlReader(iface
);
2596 XmlNodeType oldtype
= This
->nodetype
;
2599 TRACE("(%p)->(%p)\n", This
, nodetype
);
2601 if (This
->state
== XmlReadState_Closed
) return S_FALSE
;
2603 hr
= reader_parse_nextnode(This
);
2604 if (oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2605 This
->state
= XmlReadState_Interactive
;
2608 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2609 *nodetype
= This
->nodetype
;
2615 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2617 xmlreader
*This
= impl_from_IXmlReader(iface
);
2618 TRACE("(%p)->(%p)\n", This
, node_type
);
2620 *node_type
= reader_get_nodetype(This
);
2621 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2624 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2626 xmlreader
*This
= impl_from_IXmlReader(iface
);
2628 TRACE("(%p)\n", This
);
2630 if (!This
->attr_count
) return S_FALSE
;
2631 This
->attr
= LIST_ENTRY(list_head(&This
->attrs
), struct attribute
, entry
);
2632 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2633 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2638 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2640 xmlreader
*This
= impl_from_IXmlReader(iface
);
2641 const struct list
*next
;
2643 TRACE("(%p)\n", This
);
2645 if (!This
->attr_count
) return S_FALSE
;
2648 return IXmlReader_MoveToFirstAttribute(iface
);
2650 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2653 This
->attr
= LIST_ENTRY(next
, struct attribute
, entry
);
2654 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2655 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2658 return next
? S_OK
: S_FALSE
;
2661 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
2663 LPCWSTR namespaceUri
)
2665 FIXME("(%p %p %p): stub\n", iface
, local_name
, namespaceUri
);
2669 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
2671 xmlreader
*This
= impl_from_IXmlReader(iface
);
2672 struct element
*elem
;
2674 TRACE("(%p)\n", This
);
2676 if (!This
->attr_count
) return S_FALSE
;
2679 /* FIXME: support other node types with 'attributes' like DTD */
2680 elem
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
2683 reader_set_strvalue(This
, StringValue_QualifiedName
, &elem
->qname
);
2684 reader_set_strvalue(This
, StringValue_LocalName
, &elem
->localname
);
2690 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2692 xmlreader
*This
= impl_from_IXmlReader(iface
);
2694 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2695 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
2696 if (len
) *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
2700 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
,
2701 LPCWSTR
*namespaceUri
,
2702 UINT
*namespaceUri_length
)
2704 FIXME("(%p %p %p): stub\n", iface
, namespaceUri
, namespaceUri_length
);
2708 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2710 xmlreader
*This
= impl_from_IXmlReader(iface
);
2712 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2713 *name
= This
->strvalues
[StringValue_LocalName
].str
;
2714 if (len
) *len
= This
->strvalues
[StringValue_LocalName
].len
;
2718 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, LPCWSTR
*prefix
, UINT
*len
)
2720 xmlreader
*This
= impl_from_IXmlReader(iface
);
2722 TRACE("(%p)->(%p %p)\n", This
, prefix
, len
);
2723 *prefix
= This
->strvalues
[StringValue_Prefix
].str
;
2724 if (len
) *len
= This
->strvalues
[StringValue_Prefix
].len
;
2728 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
2730 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2731 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2733 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
2737 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
) || is_reader_pending(reader
))
2742 hr
= IXmlReader_Read(iface
, &type
);
2743 if (FAILED(hr
)) return hr
;
2745 /* return if still pending, partially read values are not reported */
2746 if (is_reader_pending(reader
)) return E_PENDING
;
2751 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
2752 if (!ptr
) return E_OUTOFMEMORY
;
2753 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
2759 if (len
) *len
= val
->len
;
2763 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
2765 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2766 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2769 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
2771 /* Value is already allocated, chunked reads are not possible. */
2772 if (val
->str
) return S_FALSE
;
2776 len
= min(chunk_size
, val
->len
);
2777 memcpy(buffer
, reader_get_ptr2(reader
, val
->start
), len
);
2780 if (read
) *read
= len
;
2786 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
2788 UINT
*baseUri_length
)
2790 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
2794 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
2796 FIXME("(%p): stub\n", iface
);
2800 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
2802 xmlreader
*This
= impl_from_IXmlReader(iface
);
2803 TRACE("(%p)\n", This
);
2804 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2805 when current node is start tag of an element */
2806 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->empty_element
: FALSE
;
2809 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*lineNumber
)
2811 xmlreader
*This
= impl_from_IXmlReader(iface
);
2813 TRACE("(%p %p)\n", This
, lineNumber
);
2815 if (!lineNumber
) return E_INVALIDARG
;
2817 *lineNumber
= This
->line
;
2822 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*linePosition
)
2824 xmlreader
*This
= impl_from_IXmlReader(iface
);
2826 TRACE("(%p %p)\n", This
, linePosition
);
2828 if (!linePosition
) return E_INVALIDARG
;
2830 *linePosition
= This
->pos
;
2835 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
2837 xmlreader
*This
= impl_from_IXmlReader(iface
);
2839 TRACE("(%p)->(%p)\n", This
, count
);
2841 if (!count
) return E_INVALIDARG
;
2843 *count
= This
->attr_count
;
2847 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
2849 xmlreader
*This
= impl_from_IXmlReader(iface
);
2850 TRACE("(%p)->(%p)\n", This
, depth
);
2851 *depth
= This
->depth
;
2855 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
2857 FIXME("(%p): stub\n", iface
);
2861 static const struct IXmlReaderVtbl xmlreader_vtbl
=
2863 xmlreader_QueryInterface
,
2867 xmlreader_GetProperty
,
2868 xmlreader_SetProperty
,
2870 xmlreader_GetNodeType
,
2871 xmlreader_MoveToFirstAttribute
,
2872 xmlreader_MoveToNextAttribute
,
2873 xmlreader_MoveToAttributeByName
,
2874 xmlreader_MoveToElement
,
2875 xmlreader_GetQualifiedName
,
2876 xmlreader_GetNamespaceUri
,
2877 xmlreader_GetLocalName
,
2878 xmlreader_GetPrefix
,
2880 xmlreader_ReadValueChunk
,
2881 xmlreader_GetBaseUri
,
2882 xmlreader_IsDefault
,
2883 xmlreader_IsEmptyElement
,
2884 xmlreader_GetLineNumber
,
2885 xmlreader_GetLinePosition
,
2886 xmlreader_GetAttributeCount
,
2891 /** IXmlReaderInput **/
2892 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
2894 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2896 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2898 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
2899 IsEqualGUID(riid
, &IID_IUnknown
))
2905 WARN("interface %s not implemented\n", debugstr_guid(riid
));
2907 return E_NOINTERFACE
;
2910 IUnknown_AddRef(iface
);
2915 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
2917 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2918 ULONG ref
= InterlockedIncrement(&This
->ref
);
2919 TRACE("(%p)->(%d)\n", This
, ref
);
2923 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
2925 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2926 LONG ref
= InterlockedDecrement(&This
->ref
);
2928 TRACE("(%p)->(%d)\n", This
, ref
);
2932 IMalloc
*imalloc
= This
->imalloc
;
2933 if (This
->input
) IUnknown_Release(This
->input
);
2934 if (This
->stream
) ISequentialStream_Release(This
->stream
);
2935 if (This
->buffer
) free_input_buffer(This
->buffer
);
2936 readerinput_free(This
, This
->baseuri
);
2937 readerinput_free(This
, This
);
2938 if (imalloc
) IMalloc_Release(imalloc
);
2944 static const struct IUnknownVtbl xmlreaderinputvtbl
=
2946 xmlreaderinput_QueryInterface
,
2947 xmlreaderinput_AddRef
,
2948 xmlreaderinput_Release
2951 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
2956 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
2958 if (!IsEqualGUID(riid
, &IID_IXmlReader
))
2960 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid
));
2965 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
2967 reader
= heap_alloc(sizeof(*reader
));
2968 if(!reader
) return E_OUTOFMEMORY
;
2970 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
2972 reader
->input
= NULL
;
2973 reader
->state
= XmlReadState_Closed
;
2974 reader
->instate
= XmlReadInState_Initial
;
2975 reader
->resumestate
= XmlReadResumeState_Initial
;
2976 reader
->dtdmode
= DtdProcessing_Prohibit
;
2977 reader
->line
= reader
->pos
= 0;
2978 reader
->imalloc
= imalloc
;
2979 if (imalloc
) IMalloc_AddRef(imalloc
);
2980 reader
->nodetype
= XmlNodeType_None
;
2981 list_init(&reader
->attrs
);
2982 reader
->attr_count
= 0;
2983 reader
->attr
= NULL
;
2984 list_init(&reader
->elements
);
2986 reader
->max_depth
= 256;
2987 reader
->empty_element
= FALSE
;
2988 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2990 for (i
= 0; i
< StringValue_Last
; i
++)
2991 reader
->strvalues
[i
] = strval_empty
;
2993 *obj
= &reader
->IXmlReader_iface
;
2995 TRACE("returning iface %p\n", *obj
);
3000 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
3005 IXmlReaderInput
**ppInput
)
3007 xmlreaderinput
*readerinput
;
3010 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3011 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3013 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3016 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3018 readerinput
= heap_alloc(sizeof(*readerinput
));
3019 if(!readerinput
) return E_OUTOFMEMORY
;
3021 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3022 readerinput
->ref
= 1;
3023 readerinput
->imalloc
= imalloc
;
3024 readerinput
->stream
= NULL
;
3025 if (imalloc
) IMalloc_AddRef(imalloc
);
3026 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3027 readerinput
->hint
= hint
;
3028 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3029 readerinput
->pending
= 0;
3031 hr
= alloc_input_buffer(readerinput
);
3034 readerinput_free(readerinput
, readerinput
->baseuri
);
3035 readerinput_free(readerinput
, readerinput
);
3036 if (imalloc
) IMalloc_Release(imalloc
);
3039 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3041 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3043 TRACE("returning iface %p\n", *ppInput
);