2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "xmllite_private.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
34 #include "wine/unicode.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
50 XmlReadInState_Initial
,
51 XmlReadInState_XmlDecl
,
52 XmlReadInState_Misc_DTD
,
54 XmlReadInState_DTD_Misc
,
55 XmlReadInState_Element
,
56 XmlReadInState_Content
,
57 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
59 } XmlReaderInternalState
;
61 /* This state denotes where parsing was interrupted by input problem.
62 Reader resumes parsing using this information. */
65 XmlReadResumeState_Initial
,
66 XmlReadResumeState_PITarget
,
67 XmlReadResumeState_PIBody
,
68 XmlReadResumeState_CDATA
,
69 XmlReadResumeState_Comment
,
70 XmlReadResumeState_STag
,
71 XmlReadResumeState_CharData
72 } XmlReaderResumeState
;
74 /* saved pointer index to resume from particular input position */
77 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
78 XmlReadResume_Local
, /* local for QName */
79 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
85 StringValue_LocalName
,
87 StringValue_QualifiedName
,
90 } XmlReaderStringValue
;
92 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
93 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
95 static const WCHAR dblquoteW
[] = {'\"',0};
96 static const WCHAR quoteW
[] = {'\'',0};
97 static const WCHAR ltW
[] = {'<',0};
98 static const WCHAR gtW
[] = {'>',0};
99 static const WCHAR commentW
[] = {'<','!','-','-',0};
100 static const WCHAR piW
[] = {'<','?',0};
102 static const char *debugstr_nodetype(XmlNodeType nodetype
)
104 static const char* type_names
[] =
113 "ProcessingInstruction",
126 if (nodetype
> _XmlNodeType_Last
)
129 sprintf(buf
, "unknown type=%d", nodetype
);
132 return type_names
[nodetype
];
135 static const char *debugstr_prop(XmlReaderProperty prop
)
137 static const char* prop_names
[] =
149 if (prop
> _XmlReaderProperty_Last
)
152 sprintf(buf
, "unknown property=%d", prop
);
155 return prop_names
[prop
];
158 struct xml_encoding_data
165 static const struct xml_encoding_data xml_encoding_map
[] = {
166 { utf16W
, XmlEncoding_UTF16
, ~0 },
167 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
}
174 unsigned int allocated
;
175 unsigned int written
;
178 typedef struct input_buffer input_buffer
;
182 IXmlReaderInput IXmlReaderInput_iface
;
184 /* reference passed on IXmlReaderInput creation, is kept when input is created */
187 xml_encoding encoding
;
190 /* stream reference set after SetInput() call from reader,
191 stored as sequential stream, cause currently
192 optimizations possible with IStream aren't implemented */
193 ISequentialStream
*stream
;
194 input_buffer
*buffer
;
195 unsigned int pending
: 1;
198 static const struct IUnknownVtbl xmlreaderinputvtbl
;
200 /* Structure to hold parsed string of specific length.
202 Reader stores node value as 'start' pointer, on request
203 a null-terminated version of it is allocated.
205 To init a strval variable use reader_init_strval(),
206 to set strval as a reader value use reader_set_strval().
210 WCHAR
*str
; /* allocated null-terminated string */
211 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
212 UINT start
; /* input position where value starts */
215 static WCHAR emptyW
[] = {0};
216 static const strval strval_empty
= { emptyW
};
234 IXmlReader IXmlReader_iface
;
236 xmlreaderinput
*input
;
239 XmlReaderInternalState instate
;
240 XmlReaderResumeState resumestate
;
241 XmlNodeType nodetype
;
242 DtdProcessing dtdmode
;
243 UINT line
, pos
; /* reader position in XML stream */
244 struct list attrs
; /* attributes list for current node */
245 struct attribute
*attr
; /* current attribute */
247 struct list elements
;
248 strval strvalues
[StringValue_Last
];
252 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
257 encoded_buffer utf16
;
258 encoded_buffer encoded
;
260 xmlreaderinput
*input
;
263 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
265 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
268 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
270 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
273 static inline void *m_alloc(IMalloc
*imalloc
, size_t len
)
276 return IMalloc_Alloc(imalloc
, len
);
278 return heap_alloc(len
);
281 static inline void *m_realloc(IMalloc
*imalloc
, void *mem
, size_t len
)
284 return IMalloc_Realloc(imalloc
, mem
, len
);
286 return heap_realloc(mem
, len
);
289 static inline void m_free(IMalloc
*imalloc
, void *mem
)
292 IMalloc_Free(imalloc
, mem
);
297 /* reader memory allocation functions */
298 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
300 return m_alloc(reader
->imalloc
, len
);
303 static inline void reader_free(xmlreader
*reader
, void *mem
)
305 m_free(reader
->imalloc
, mem
);
308 /* Just return pointer from offset, no attempt to read more. */
309 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
311 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
312 return (WCHAR
*)buffer
->data
+ offset
;
315 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
317 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
320 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
324 if (src
->str
!= strval_empty
.str
)
326 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
327 if (!dest
->str
) return E_OUTOFMEMORY
;
328 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
329 dest
->str
[dest
->len
] = 0;
336 /* reader input memory allocation functions */
337 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
339 return m_alloc(input
->imalloc
, len
);
342 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
344 return m_realloc(input
->imalloc
, mem
, len
);
347 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
349 m_free(input
->imalloc
, mem
);
352 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
359 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
360 ret
= readerinput_alloc(input
, size
);
361 if (ret
) memcpy(ret
, str
, size
);
367 static void reader_clear_attrs(xmlreader
*reader
)
369 struct attribute
*attr
, *attr2
;
370 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
372 reader_free(reader
, attr
);
374 list_init(&reader
->attrs
);
375 reader
->attr_count
= 0;
378 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
379 while we are on a node with attributes */
380 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*localname
, strval
*value
)
382 struct attribute
*attr
;
384 attr
= reader_alloc(reader
, sizeof(*attr
));
385 if (!attr
) return E_OUTOFMEMORY
;
387 attr
->localname
= *localname
;
388 attr
->value
= *value
;
389 list_add_tail(&reader
->attrs
, &attr
->entry
);
390 reader
->attr_count
++;
395 /* This one frees stored string value if needed */
396 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
398 if (v
->str
!= strval_empty
.str
)
400 reader_free(reader
, v
->str
);
405 /* returns length in WCHARs from 'start' to current buffer offset */
406 static inline UINT
reader_get_len(const xmlreader
*reader
, UINT start
)
408 return reader
->input
->buffer
->utf16
.cur
- start
;
411 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
418 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
420 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
423 /* used to initalize from constant string */
424 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
431 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
433 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
436 static void reader_free_strvalues(xmlreader
*reader
)
439 for (type
= 0; type
< StringValue_Last
; type
++)
440 reader_free_strvalue(reader
, type
);
443 /* This helper should only be used to test if strings are the same,
444 it doesn't try to sort. */
445 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
447 if (str1
->len
!= str2
->len
) return 0;
448 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
451 static void reader_clear_elements(xmlreader
*reader
)
453 struct element
*elem
, *elem2
;
454 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
456 reader_free_strvalued(reader
, &elem
->qname
);
457 reader_free(reader
, elem
);
459 list_init(&reader
->elements
);
460 reader
->empty_element
= FALSE
;
463 static HRESULT
reader_inc_depth(xmlreader
*reader
)
465 if (++reader
->depth
> reader
->max_depth
) return SC_E_MAXELEMENTDEPTH
;
469 static void reader_dec_depth(xmlreader
*reader
)
471 if (reader
->depth
> 1) reader
->depth
--;
474 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*qname
, strval
*localname
)
476 struct element
*elem
;
479 elem
= reader_alloc(reader
, sizeof(*elem
));
480 if (!elem
) return E_OUTOFMEMORY
;
482 hr
= reader_strvaldup(reader
, qname
, &elem
->qname
);
484 reader_free(reader
, elem
);
488 hr
= reader_strvaldup(reader
, localname
, &elem
->localname
);
491 reader_free_strvalued(reader
, &elem
->qname
);
492 reader_free(reader
, elem
);
496 if (!list_empty(&reader
->elements
))
498 hr
= reader_inc_depth(reader
);
500 reader_free(reader
, elem
);
505 list_add_head(&reader
->elements
, &elem
->entry
);
506 reader
->empty_element
= FALSE
;
510 static void reader_pop_element(xmlreader
*reader
)
512 struct element
*elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
516 list_remove(&elem
->entry
);
517 reader_free_strvalued(reader
, &elem
->qname
);
518 reader_free_strvalued(reader
, &elem
->localname
);
519 reader_free(reader
, elem
);
520 reader_dec_depth(reader
);
524 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
525 means node value is to be determined. */
526 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
528 strval
*v
= &reader
->strvalues
[type
];
530 reader_free_strvalue(reader
, type
);
539 if (value
->str
== strval_empty
.str
)
543 if (type
== StringValue_Value
)
545 /* defer allocation for value string */
547 v
->start
= value
->start
;
552 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
553 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
554 v
->str
[value
->len
] = 0;
560 static inline int is_reader_pending(xmlreader
*reader
)
562 return reader
->input
->pending
;
565 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
567 const int initial_len
= 0x2000;
568 buffer
->data
= readerinput_alloc(input
, initial_len
);
569 if (!buffer
->data
) return E_OUTOFMEMORY
;
571 memset(buffer
->data
, 0, 4);
573 buffer
->allocated
= initial_len
;
579 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
581 readerinput_free(input
, buffer
->data
);
584 static HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
586 if (encoding
== XmlEncoding_Unknown
)
588 FIXME("unsupported encoding %d\n", encoding
);
592 *cp
= xml_encoding_map
[encoding
].cp
;
597 static xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
601 if (!name
) return XmlEncoding_Unknown
;
604 max
= sizeof(xml_encoding_map
)/sizeof(struct xml_encoding_data
) - 1;
611 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
613 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
615 return xml_encoding_map
[n
].enc
;
623 return XmlEncoding_Unknown
;
626 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
628 input_buffer
*buffer
;
631 input
->buffer
= NULL
;
633 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
634 if (!buffer
) return E_OUTOFMEMORY
;
636 buffer
->input
= input
;
637 buffer
->code_page
= ~0; /* code page is unknown at this point */
638 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
640 readerinput_free(input
, buffer
);
644 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
646 free_encoded_buffer(input
, &buffer
->utf16
);
647 readerinput_free(input
, buffer
);
651 input
->buffer
= buffer
;
655 static void free_input_buffer(input_buffer
*buffer
)
657 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
658 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
659 readerinput_free(buffer
->input
, buffer
);
662 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
664 if (readerinput
->stream
) {
665 ISequentialStream_Release(readerinput
->stream
);
666 readerinput
->stream
= NULL
;
670 /* Queries already stored interface for IStream/ISequentialStream.
671 Interface supplied on creation will be overwritten */
672 static HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
676 readerinput_release_stream(readerinput
);
677 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
679 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
684 /* reads a chunk to raw buffer */
685 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
687 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
688 /* to make sure aligned length won't exceed allocated length */
689 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
693 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
694 variable width encodings like UTF-8 */
695 len
= (len
+ 3) & ~3;
696 /* try to use allocated space or grow */
697 if (buffer
->allocated
- buffer
->written
< len
)
699 buffer
->allocated
*= 2;
700 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
701 len
= buffer
->allocated
- buffer
->written
;
705 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
706 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
707 readerinput
->pending
= hr
== E_PENDING
;
708 if (FAILED(hr
)) return hr
;
709 buffer
->written
+= read
;
714 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
715 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
717 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
719 length
*= sizeof(WCHAR
);
720 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
721 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
723 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
724 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
725 buffer
->allocated
= grown_size
;
729 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
731 static char startA
[] = {'<','?'};
732 static char commentA
[] = {'<','!'};
733 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
734 unsigned char *ptr
= (unsigned char*)buffer
->data
;
736 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
737 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
738 /* test start byte */
741 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
742 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
743 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
744 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
748 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
750 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
751 static WCHAR startW
[] = {'<','?'};
752 static WCHAR commentW
[] = {'<','!'};
753 static char utf8bom
[] = {0xef,0xbb,0xbf};
754 static char utf16lebom
[] = {0xff,0xfe};
756 *enc
= XmlEncoding_Unknown
;
758 if (buffer
->written
<= 3)
760 HRESULT hr
= readerinput_growraw(readerinput
);
761 if (FAILED(hr
)) return hr
;
762 if (buffer
->written
<= 3) return MX_E_INPUTEND
;
765 /* try start symbols if we have enough data to do that, input buffer should contain
766 first chunk already */
767 if (readerinput_is_utf8(readerinput
))
768 *enc
= XmlEncoding_UTF8
;
769 else if (!memcmp(buffer
->data
, startW
, sizeof(startW
)) ||
770 !memcmp(buffer
->data
, commentW
, sizeof(commentW
)))
771 *enc
= XmlEncoding_UTF16
;
772 /* try with BOM now */
773 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
775 buffer
->cur
+= sizeof(utf8bom
);
776 *enc
= XmlEncoding_UTF8
;
778 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
780 buffer
->cur
+= sizeof(utf16lebom
);
781 *enc
= XmlEncoding_UTF16
;
787 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
789 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
790 int len
= buffer
->written
;
792 /* complete single byte char */
793 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
795 /* find start byte of multibyte char */
796 while (--len
&& !(buffer
->data
[len
] & 0xc0))
802 /* Returns byte length of complete char sequence for buffer code page,
803 it's relative to current buffer position which is currently used for BOM handling
805 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
807 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
810 if (readerinput
->buffer
->code_page
== CP_UTF8
)
811 len
= readerinput_get_utf8_convlen(readerinput
);
813 len
= buffer
->written
;
815 TRACE("%d\n", len
- buffer
->cur
);
816 return len
- buffer
->cur
;
819 /* It's possible that raw buffer has some leftovers from last conversion - some char
820 sequence that doesn't represent a full code point. Length argument should be calculated with
821 readerinput_get_convlen(), if it's -1 it will be calculated here. */
822 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
824 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
827 len
= readerinput_get_convlen(readerinput
);
829 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
830 /* everything below cur is lost too */
831 buffer
->written
-= len
+ buffer
->cur
;
832 /* after this point we don't need cur offset really,
833 it's used only to mark where actual data begins when first chunk is read */
837 /* note that raw buffer content is kept */
838 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
840 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
841 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
847 hr
= get_code_page(enc
, &cp
);
848 if (FAILED(hr
)) return;
850 readerinput
->buffer
->code_page
= cp
;
851 len
= readerinput_get_convlen(readerinput
);
853 TRACE("switching to cp %d\n", cp
);
855 /* just copy in this case */
856 if (enc
== XmlEncoding_UTF16
)
858 readerinput_grow(readerinput
, len
);
859 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
860 dest
->written
+= len
*sizeof(WCHAR
);
864 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
865 readerinput_grow(readerinput
, dest_len
);
866 ptr
= (WCHAR
*)dest
->data
;
867 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
869 dest
->written
+= dest_len
*sizeof(WCHAR
);
872 /* shrinks parsed data a buffer begins with */
873 static void reader_shrink(xmlreader
*reader
)
875 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
877 /* avoid to move too often using threshold shrink length */
878 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
880 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
881 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
883 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
887 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
888 It won't attempt to shrink but will grow destination buffer if needed */
889 static HRESULT
reader_more(xmlreader
*reader
)
891 xmlreaderinput
*readerinput
= reader
->input
;
892 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
893 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
894 UINT cp
= readerinput
->buffer
->code_page
;
899 /* get some raw data from stream first */
900 hr
= readerinput_growraw(readerinput
);
901 len
= readerinput_get_convlen(readerinput
);
903 /* just copy for UTF-16 case */
906 readerinput_grow(readerinput
, len
);
907 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
908 dest
->written
+= len
*sizeof(WCHAR
);
912 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
913 readerinput_grow(readerinput
, dest_len
);
914 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
915 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
917 dest
->written
+= dest_len
*sizeof(WCHAR
);
918 /* get rid of processed data */
919 readerinput_shrinkraw(readerinput
, len
);
924 static inline UINT
reader_get_cur(xmlreader
*reader
)
926 return reader
->input
->buffer
->utf16
.cur
;
929 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
931 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
932 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
933 if (!*ptr
) reader_more(reader
);
934 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
937 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
939 const WCHAR
*ptr
= reader_get_ptr(reader
);
940 return strncmpW(str
, ptr
, strlenW(str
));
943 /* moves cursor n WCHARs forward */
944 static void reader_skipn(xmlreader
*reader
, int n
)
946 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
947 const WCHAR
*ptr
= reader_get_ptr(reader
);
949 while (*ptr
++ && n
--)
956 static inline BOOL
is_wchar_space(WCHAR ch
)
958 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
961 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
962 static int reader_skipspaces(xmlreader
*reader
)
964 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
965 const WCHAR
*ptr
= reader_get_ptr(reader
), *start
= ptr
;
967 while (is_wchar_space(*ptr
))
972 else if (*ptr
== '\n')
985 /* [26] VersionNum ::= '1.' [0-9]+ */
986 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
988 static const WCHAR onedotW
[] = {'1','.',0};
992 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
994 start
= reader_get_cur(reader
);
996 reader_skipn(reader
, 2);
998 ptr2
= ptr
= reader_get_ptr(reader
);
999 while (*ptr
>= '0' && *ptr
<= '9')
1001 reader_skipn(reader
, 1);
1002 ptr
= reader_get_ptr(reader
);
1005 if (ptr2
== ptr
) return WC_E_DIGIT
;
1006 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
1007 TRACE("version=%s\n", debug_strval(reader
, val
));
1011 /* [25] Eq ::= S? '=' S? */
1012 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1014 static const WCHAR eqW
[] = {'=',0};
1015 reader_skipspaces(reader
);
1016 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
1018 reader_skipn(reader
, 1);
1019 reader_skipspaces(reader
);
1023 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1024 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1026 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
1030 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1032 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
1033 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1034 /* skip 'version' */
1035 reader_skipn(reader
, 7);
1037 hr
= reader_parse_eq(reader
);
1038 if (FAILED(hr
)) return hr
;
1040 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1043 reader_skipn(reader
, 1);
1045 hr
= reader_parse_versionnum(reader
, &val
);
1046 if (FAILED(hr
)) return hr
;
1048 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1052 reader_skipn(reader
, 1);
1054 return reader_add_attr(reader
, &name
, &val
);
1057 /* ([A-Za-z0-9._] | '-') */
1058 static inline BOOL
is_wchar_encname(WCHAR ch
)
1060 return ((ch
>= 'A' && ch
<= 'Z') ||
1061 (ch
>= 'a' && ch
<= 'z') ||
1062 (ch
>= '0' && ch
<= '9') ||
1063 (ch
== '.') || (ch
== '_') ||
1067 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1068 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1070 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1074 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1075 return WC_E_ENCNAME
;
1078 while (is_wchar_encname(*++ptr
))
1082 enc
= parse_encoding_name(start
, len
);
1083 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1087 if (enc
== XmlEncoding_Unknown
)
1088 return WC_E_ENCNAME
;
1090 /* skip encoding name */
1091 reader_skipn(reader
, len
);
1095 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1096 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1098 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1102 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1104 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1105 name
.str
= reader_get_ptr(reader
);
1107 /* skip 'encoding' */
1108 reader_skipn(reader
, 8);
1110 hr
= reader_parse_eq(reader
);
1111 if (FAILED(hr
)) return hr
;
1113 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1116 reader_skipn(reader
, 1);
1118 hr
= reader_parse_encname(reader
, &val
);
1119 if (FAILED(hr
)) return hr
;
1121 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1125 reader_skipn(reader
, 1);
1127 return reader_add_attr(reader
, &name
, &val
);
1130 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1131 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1133 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1134 static const WCHAR yesW
[] = {'y','e','s',0};
1135 static const WCHAR noW
[] = {'n','o',0};
1140 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1142 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1143 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1144 /* skip 'standalone' */
1145 reader_skipn(reader
, 10);
1147 hr
= reader_parse_eq(reader
);
1148 if (FAILED(hr
)) return hr
;
1150 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1153 reader_skipn(reader
, 1);
1155 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1156 return WC_E_XMLDECL
;
1158 start
= reader_get_cur(reader
);
1159 /* skip 'yes'|'no' */
1160 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1161 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1162 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1164 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1167 reader_skipn(reader
, 1);
1169 return reader_add_attr(reader
, &name
, &val
);
1172 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1173 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1175 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1176 static const WCHAR declcloseW
[] = {'?','>',0};
1179 /* check if we have "<?xml " */
1180 if (reader_cmp(reader
, xmldeclW
)) return S_FALSE
;
1182 reader_skipn(reader
, 5);
1183 hr
= reader_parse_versioninfo(reader
);
1187 hr
= reader_parse_encdecl(reader
);
1191 hr
= reader_parse_sddecl(reader
);
1195 reader_skipspaces(reader
);
1196 if (reader_cmp(reader
, declcloseW
)) return WC_E_XMLDECL
;
1197 reader_skipn(reader
, 2);
1199 reader_inc_depth(reader
);
1200 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1201 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1202 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1203 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1208 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1209 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1214 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1216 start
= reader
->resume
[XmlReadResume_Body
];
1217 ptr
= reader_get_ptr(reader
);
1222 reader_skipn(reader
, 4);
1223 reader_shrink(reader
);
1224 ptr
= reader_get_ptr(reader
);
1225 start
= reader_get_cur(reader
);
1226 reader
->nodetype
= XmlNodeType_Comment
;
1227 reader
->resume
[XmlReadResume_Body
] = start
;
1228 reader
->resumestate
= XmlReadResumeState_Comment
;
1229 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
1230 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
1231 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1234 /* will exit when there's no more data, it won't attempt to
1235 read more from stream */
1246 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1247 TRACE("%s\n", debug_strval(reader
, &value
));
1249 /* skip rest of markup '->' */
1250 reader_skipn(reader
, 3);
1252 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1253 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1254 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1255 reader
->resume
[XmlReadResume_Body
] = 0;
1256 reader
->resumestate
= XmlReadResumeState_Initial
;
1260 return WC_E_COMMENT
;
1264 reader_skipn(reader
, 1);
1271 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1272 static inline BOOL
is_char(WCHAR ch
)
1274 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1275 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1276 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1277 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1278 (ch
>= 0xe000 && ch
<= 0xfffd);
1281 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1282 static inline BOOL
is_pubchar(WCHAR ch
)
1284 return (ch
== ' ') ||
1285 (ch
>= 'a' && ch
<= 'z') ||
1286 (ch
>= 'A' && ch
<= 'Z') ||
1287 (ch
>= '0' && ch
<= '9') ||
1288 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1289 (ch
== '=') || (ch
== '?') ||
1290 (ch
== '@') || (ch
== '!') ||
1291 (ch
>= '#' && ch
<= '%') || /* #$% */
1292 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1295 static inline BOOL
is_namestartchar(WCHAR ch
)
1297 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1298 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1299 (ch
>= 0xc0 && ch
<= 0xd6) ||
1300 (ch
>= 0xd8 && ch
<= 0xf6) ||
1301 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1302 (ch
>= 0x370 && ch
<= 0x37d) ||
1303 (ch
>= 0x37f && ch
<= 0x1fff) ||
1304 (ch
>= 0x200c && ch
<= 0x200d) ||
1305 (ch
>= 0x2070 && ch
<= 0x218f) ||
1306 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1307 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1308 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1309 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1310 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1311 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1314 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1315 static inline BOOL
is_ncnamechar(WCHAR ch
)
1317 return (ch
>= 'A' && ch
<= 'Z') ||
1318 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1319 (ch
== '-') || (ch
== '.') ||
1320 (ch
>= '0' && ch
<= '9') ||
1322 (ch
>= 0xc0 && ch
<= 0xd6) ||
1323 (ch
>= 0xd8 && ch
<= 0xf6) ||
1324 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1325 (ch
>= 0x300 && ch
<= 0x36f) ||
1326 (ch
>= 0x370 && ch
<= 0x37d) ||
1327 (ch
>= 0x37f && ch
<= 0x1fff) ||
1328 (ch
>= 0x200c && ch
<= 0x200d) ||
1329 (ch
>= 0x203f && ch
<= 0x2040) ||
1330 (ch
>= 0x2070 && ch
<= 0x218f) ||
1331 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1332 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1333 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1334 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1335 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1336 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1339 static inline BOOL
is_namechar(WCHAR ch
)
1341 return (ch
== ':') || is_ncnamechar(ch
);
1344 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1346 /* When we're on attribute always return attribute type, container node type is kept.
1347 Note that container is not necessarily an element, and attribute doesn't mean it's
1348 an attribute in XML spec terms. */
1349 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1352 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1353 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1354 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1355 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1356 [5] Name ::= NameStartChar (NameChar)* */
1357 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1362 if (reader
->resume
[XmlReadResume_Name
])
1364 start
= reader
->resume
[XmlReadResume_Name
];
1365 ptr
= reader_get_ptr(reader
);
1369 ptr
= reader_get_ptr(reader
);
1370 start
= reader_get_cur(reader
);
1371 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1374 while (is_namechar(*ptr
))
1376 reader_skipn(reader
, 1);
1377 ptr
= reader_get_ptr(reader
);
1380 if (is_reader_pending(reader
))
1382 reader
->resume
[XmlReadResume_Name
] = start
;
1386 reader
->resume
[XmlReadResume_Name
] = 0;
1388 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1389 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1394 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1395 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1397 static const WCHAR xmlW
[] = {'x','m','l'};
1398 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1404 hr
= reader_parse_name(reader
, &name
);
1405 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1407 /* now that we got name check for illegal content */
1408 if (strval_eq(reader
, &name
, &xmlval
))
1409 return WC_E_LEADINGXML
;
1411 /* PITarget can't be a qualified name */
1412 ptr
= reader_get_strptr(reader
, &name
);
1413 for (i
= 0; i
< name
.len
; i
++)
1415 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1417 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1422 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1423 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1430 switch (reader
->resumestate
)
1432 case XmlReadResumeState_Initial
:
1434 reader_skipn(reader
, 2);
1435 reader_shrink(reader
);
1436 reader
->resumestate
= XmlReadResumeState_PITarget
;
1437 case XmlReadResumeState_PITarget
:
1438 hr
= reader_parse_pitarget(reader
, &target
);
1439 if (FAILED(hr
)) return hr
;
1440 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1441 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1442 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1443 reader
->resumestate
= XmlReadResumeState_PIBody
;
1444 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1449 start
= reader
->resume
[XmlReadResume_Body
];
1450 ptr
= reader_get_ptr(reader
);
1457 UINT cur
= reader_get_cur(reader
);
1460 /* strip all leading whitespace chars */
1463 ptr
= reader_get_ptr2(reader
, start
);
1464 if (!is_wchar_space(*ptr
)) break;
1468 reader_init_strvalue(start
, cur
-start
, &value
);
1471 reader_skipn(reader
, 2);
1472 TRACE("%s\n", debug_strval(reader
, &value
));
1473 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1474 reader
->resumestate
= XmlReadResumeState_Initial
;
1475 reader
->resume
[XmlReadResume_Body
] = 0;
1476 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1481 reader_skipn(reader
, 1);
1482 ptr
= reader_get_ptr(reader
);
1488 /* This one is used to parse significant whitespace nodes, like in Misc production */
1489 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1493 reader_shrink(reader
);
1494 start
= reader_get_ptr(reader
);
1496 reader_skipspaces(reader
);
1497 ptr
= reader_get_ptr(reader
);
1498 TRACE("%s\n", debugstr_wn(start
, ptr
-start
));
1500 reader
->nodetype
= XmlNodeType_Whitespace
;
1501 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1502 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1503 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1507 /* [27] Misc ::= Comment | PI | S */
1508 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1510 HRESULT hr
= S_FALSE
;
1512 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1514 hr
= reader_more(reader
);
1515 if (FAILED(hr
)) return hr
;
1517 /* finish current node */
1518 switch (reader
->resumestate
)
1520 case XmlReadResumeState_PITarget
:
1521 case XmlReadResumeState_PIBody
:
1522 return reader_parse_pi(reader
);
1523 case XmlReadResumeState_Comment
:
1524 return reader_parse_comment(reader
);
1526 ERR("unknown resume state %d\n", reader
->resumestate
);
1532 const WCHAR
*cur
= reader_get_ptr(reader
);
1534 if (is_wchar_space(*cur
))
1535 hr
= reader_parse_whitespace(reader
);
1536 else if (!reader_cmp(reader
, commentW
))
1537 hr
= reader_parse_comment(reader
);
1538 else if (!reader_cmp(reader
, piW
))
1539 hr
= reader_parse_pi(reader
);
1543 if (hr
!= S_FALSE
) return hr
;
1549 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1550 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1552 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1555 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1558 reader_skipn(reader
, 1);
1560 cur
= reader_get_ptr(reader
);
1561 start
= reader_get_cur(reader
);
1562 while (is_char(*cur
) && *cur
!= quote
)
1564 reader_skipn(reader
, 1);
1565 cur
= reader_get_ptr(reader
);
1567 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1568 if (*cur
== quote
) reader_skipn(reader
, 1);
1570 TRACE("%s\n", debug_strval(reader
, literal
));
1574 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1575 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1576 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1578 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1581 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1584 reader_skipn(reader
, 1);
1586 start
= reader_get_cur(reader
);
1587 cur
= reader_get_ptr(reader
);
1588 while (is_pubchar(*cur
) && *cur
!= quote
)
1590 reader_skipn(reader
, 1);
1591 cur
= reader_get_ptr(reader
);
1594 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1595 TRACE("%s\n", debug_strval(reader
, literal
));
1599 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1600 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1602 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1603 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1608 if (reader_cmp(reader
, systemW
))
1610 if (reader_cmp(reader
, publicW
))
1617 reader_skipn(reader
, 6);
1618 cnt
= reader_skipspaces(reader
);
1619 if (!cnt
) return WC_E_WHITESPACE
;
1621 hr
= reader_parse_pub_literal(reader
, &pub
);
1622 if (FAILED(hr
)) return hr
;
1624 reader_init_cstrvalue(publicW
, strlenW(publicW
), &name
);
1625 return reader_add_attr(reader
, &name
, &pub
);
1633 reader_skipn(reader
, 6);
1634 cnt
= reader_skipspaces(reader
);
1635 if (!cnt
) return WC_E_WHITESPACE
;
1637 hr
= reader_parse_sys_literal(reader
, &sys
);
1638 if (FAILED(hr
)) return hr
;
1640 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1641 return reader_add_attr(reader
, &name
, &sys
);
1647 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1648 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1650 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1655 /* check if we have "<!DOCTYPE" */
1656 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1657 reader_shrink(reader
);
1659 /* DTD processing is not allowed by default */
1660 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1662 reader_skipn(reader
, 9);
1663 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1666 hr
= reader_parse_name(reader
, &name
);
1667 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1669 reader_skipspaces(reader
);
1671 hr
= reader_parse_externalid(reader
);
1672 if (FAILED(hr
)) return hr
;
1674 reader_skipspaces(reader
);
1676 cur
= reader_get_ptr(reader
);
1679 FIXME("internal subset parsing not implemented\n");
1684 reader_skipn(reader
, 1);
1686 reader
->nodetype
= XmlNodeType_DocumentType
;
1687 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1688 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1693 /* [11 NS] LocalPart ::= NCName */
1694 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
)
1699 if (reader
->resume
[XmlReadResume_Local
])
1701 start
= reader
->resume
[XmlReadResume_Local
];
1702 ptr
= reader_get_ptr(reader
);
1706 ptr
= reader_get_ptr(reader
);
1707 start
= reader_get_cur(reader
);
1710 while (is_ncnamechar(*ptr
))
1712 reader_skipn(reader
, 1);
1713 ptr
= reader_get_ptr(reader
);
1716 if (is_reader_pending(reader
))
1718 reader
->resume
[XmlReadResume_Local
] = start
;
1722 reader
->resume
[XmlReadResume_Local
] = 0;
1724 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1729 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1730 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1731 [9 NS] UnprefixedName ::= LocalPart
1732 [10 NS] Prefix ::= NCName */
1733 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1739 if (reader
->resume
[XmlReadResume_Name
])
1741 start
= reader
->resume
[XmlReadResume_Name
];
1742 ptr
= reader_get_ptr(reader
);
1746 ptr
= reader_get_ptr(reader
);
1747 start
= reader_get_cur(reader
);
1748 reader
->resume
[XmlReadResume_Name
] = start
;
1749 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1752 if (reader
->resume
[XmlReadResume_Local
])
1754 hr
= reader_parse_local(reader
, local
);
1755 if (FAILED(hr
)) return hr
;
1757 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1758 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1763 /* skip prefix part */
1764 while (is_ncnamechar(*ptr
))
1766 reader_skipn(reader
, 1);
1767 ptr
= reader_get_ptr(reader
);
1770 if (is_reader_pending(reader
)) return E_PENDING
;
1772 /* got a qualified name */
1775 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
1778 reader_skipn(reader
, 1);
1779 hr
= reader_parse_local(reader
, local
);
1780 if (FAILED(hr
)) return hr
;
1784 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
1785 reader_init_strvalue(0, 0, prefix
);
1789 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1792 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
1794 TRACE("ncname %s\n", debug_strval(reader
, local
));
1796 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
1798 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
1801 reader
->resume
[XmlReadResume_Name
] = 0;
1802 reader
->resume
[XmlReadResume_Local
] = 0;
1807 /* Applies normalization rules to a single char, used for attribute values.
1809 Rules include 2 steps:
1811 1) replacing \r\n with a single \n;
1812 2) replacing all whitespace chars with ' '.
1815 static void reader_normalize_space(xmlreader
*reader
, WCHAR
*ptr
)
1817 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1819 if (!is_wchar_space(*ptr
)) return;
1821 if (*ptr
== '\r' && *(ptr
+1) == '\n')
1823 int len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - 2*sizeof(WCHAR
);
1824 memmove(ptr
+1, ptr
+2, len
);
1829 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
1831 static const WCHAR entltW
[] = {'l','t'};
1832 static const WCHAR entgtW
[] = {'g','t'};
1833 static const WCHAR entampW
[] = {'a','m','p'};
1834 static const WCHAR entaposW
[] = {'a','p','o','s'};
1835 static const WCHAR entquotW
[] = {'q','u','o','t'};
1836 static const strval lt
= { (WCHAR
*)entltW
, 2 };
1837 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
1838 static const strval amp
= { (WCHAR
*)entampW
, 3 };
1839 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
1840 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
1841 WCHAR
*str
= reader_get_strptr(reader
, name
);
1846 if (strval_eq(reader
, name
, <
)) return '<';
1849 if (strval_eq(reader
, name
, >
)) return '>';
1852 if (strval_eq(reader
, name
, &
))
1854 else if (strval_eq(reader
, name
, &apos
))
1858 if (strval_eq(reader
, name
, "
)) return '\"';
1867 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1868 [67] Reference ::= EntityRef | CharRef
1869 [68] EntityRef ::= '&' Name ';' */
1870 static HRESULT
reader_parse_reference(xmlreader
*reader
)
1872 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1873 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1874 UINT cur
= reader_get_cur(reader
);
1879 reader_skipn(reader
, 1);
1880 ptr
= reader_get_ptr(reader
);
1884 reader_skipn(reader
, 1);
1885 ptr
= reader_get_ptr(reader
);
1887 /* hex char or decimal */
1890 reader_skipn(reader
, 1);
1891 ptr
= reader_get_ptr(reader
);
1895 if ((*ptr
>= '0' && *ptr
<= '9'))
1896 ch
= ch
*16 + *ptr
- '0';
1897 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
1898 ch
= ch
*16 + *ptr
- 'a' + 10;
1899 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
1900 ch
= ch
*16 + *ptr
- 'A' + 10;
1902 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
1903 reader_skipn(reader
, 1);
1904 ptr
= reader_get_ptr(reader
);
1911 if ((*ptr
>= '0' && *ptr
<= '9'))
1913 ch
= ch
*10 + *ptr
- '0';
1914 reader_skipn(reader
, 1);
1915 ptr
= reader_get_ptr(reader
);
1918 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
1922 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
1925 if (is_wchar_space(ch
)) ch
= ' ';
1927 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1928 memmove(start
+1, ptr
+1, len
);
1929 buffer
->cur
= cur
+ 1;
1938 hr
= reader_parse_name(reader
, &name
);
1939 if (FAILED(hr
)) return hr
;
1941 ptr
= reader_get_ptr(reader
);
1942 if (*ptr
!= ';') return WC_E_SEMICOLON
;
1944 /* predefined entities resolve to a single character */
1945 ch
= get_predefined_entity(reader
, &name
);
1948 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1949 memmove(start
+1, ptr
+1, len
);
1950 buffer
->cur
= cur
+ 1;
1956 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
1957 return WC_E_UNDECLAREDENTITY
;
1965 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1966 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
1971 ptr
= reader_get_ptr(reader
);
1973 /* skip opening quote */
1975 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
1976 reader_skipn(reader
, 1);
1978 ptr
= reader_get_ptr(reader
);
1979 start
= reader_get_cur(reader
);
1982 if (*ptr
== '<') return WC_E_LESSTHAN
;
1986 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
1987 /* skip closing quote */
1988 reader_skipn(reader
, 1);
1994 HRESULT hr
= reader_parse_reference(reader
);
1995 if (FAILED(hr
)) return hr
;
1999 reader_normalize_space(reader
, ptr
);
2000 reader_skipn(reader
, 1);
2002 ptr
= reader_get_ptr(reader
);
2008 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2009 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2010 [3 NS] DefaultAttName ::= 'xmlns'
2011 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2012 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2014 static const WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
2015 strval prefix
, local
, qname
, xmlns
, value
;
2018 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2019 if (FAILED(hr
)) return hr
;
2021 reader_init_cstrvalue((WCHAR
*)xmlnsW
, 5, &xmlns
);
2023 if (strval_eq(reader
, &prefix
, &xmlns
))
2025 FIXME("namespace definitions not supported\n");
2029 if (strval_eq(reader
, &qname
, &xmlns
))
2031 FIXME("default namespace definitions not supported\n");
2035 hr
= reader_parse_eq(reader
);
2036 if (FAILED(hr
)) return hr
;
2038 hr
= reader_parse_attvalue(reader
, &value
);
2039 if (FAILED(hr
)) return hr
;
2041 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2042 return reader_add_attr(reader
, &local
, &value
);
2045 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2046 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2047 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
, int *empty
)
2051 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2052 if (FAILED(hr
)) return hr
;
2056 static const WCHAR endW
[] = {'/','>',0};
2058 reader_skipspaces(reader
);
2061 if ((*empty
= !reader_cmp(reader
, endW
)))
2064 reader_skipn(reader
, 2);
2065 reader
->empty_element
= TRUE
;
2069 /* got a start tag */
2070 if (!reader_cmp(reader
, gtW
))
2073 reader_skipn(reader
, 1);
2074 return reader_push_element(reader
, qname
, local
);
2077 hr
= reader_parse_attribute(reader
);
2078 if (FAILED(hr
)) return hr
;
2084 /* [39] element ::= EmptyElemTag | STag content ETag */
2085 static HRESULT
reader_parse_element(xmlreader
*reader
)
2089 switch (reader
->resumestate
)
2091 case XmlReadResumeState_Initial
:
2092 /* check if we are really on element */
2093 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2096 reader_skipn(reader
, 1);
2098 reader_shrink(reader
);
2099 reader
->resumestate
= XmlReadResumeState_STag
;
2100 case XmlReadResumeState_STag
:
2102 strval qname
, prefix
, local
;
2105 /* this handles empty elements too */
2106 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
, &empty
);
2107 if (FAILED(hr
)) return hr
;
2109 /* FIXME: need to check for defined namespace to reject invalid prefix,
2110 currently reject all prefixes */
2111 if (prefix
.len
) return NC_E_UNDECLAREDPREFIX
;
2113 /* if we got empty element and stack is empty go straight to Misc */
2114 if (empty
&& list_empty(&reader
->elements
))
2115 reader
->instate
= XmlReadInState_MiscEnd
;
2117 reader
->instate
= XmlReadInState_Content
;
2119 reader
->nodetype
= XmlNodeType_Element
;
2120 reader
->resumestate
= XmlReadResumeState_Initial
;
2121 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2122 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2123 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2133 /* [13 NS] ETag ::= '</' QName S? '>' */
2134 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2136 strval prefix
, local
, qname
;
2137 struct element
*elem
;
2141 reader_skipn(reader
, 2);
2143 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2144 if (FAILED(hr
)) return hr
;
2146 reader_skipspaces(reader
);
2148 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2151 reader_skipn(reader
, 1);
2153 /* Element stack should never be empty at this point, cause we shouldn't get to
2154 content parsing if it's empty. */
2155 elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2156 if (!strval_eq(reader
, &elem
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2158 reader_pop_element(reader
);
2160 /* It was a root element, the rest is expected as Misc */
2161 if (list_empty(&reader
->elements
))
2162 reader
->instate
= XmlReadInState_MiscEnd
;
2164 reader
->nodetype
= XmlNodeType_EndElement
;
2165 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2166 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2171 /* [18] CDSect ::= CDStart CData CDEnd
2172 [19] CDStart ::= '<![CDATA['
2173 [20] CData ::= (Char* - (Char* ']]>' Char*))
2174 [21] CDEnd ::= ']]>' */
2175 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2180 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2182 start
= reader
->resume
[XmlReadResume_Body
];
2183 ptr
= reader_get_ptr(reader
);
2187 /* skip markup '<![CDATA[' */
2188 reader_skipn(reader
, 9);
2189 reader_shrink(reader
);
2190 ptr
= reader_get_ptr(reader
);
2191 start
= reader_get_cur(reader
);
2192 reader
->nodetype
= XmlNodeType_CDATA
;
2193 reader
->resume
[XmlReadResume_Body
] = start
;
2194 reader
->resumestate
= XmlReadResumeState_CDATA
;
2195 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
2196 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
2197 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2202 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2206 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2209 reader_skipn(reader
, 3);
2210 TRACE("%s\n", debug_strval(reader
, &value
));
2212 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2213 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2214 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2215 reader
->resume
[XmlReadResume_Body
] = 0;
2216 reader
->resumestate
= XmlReadResumeState_Initial
;
2221 /* Value normalization is not fully implemented, rules are:
2223 - single '\r' -> '\n';
2224 - sequence '\r\n' -> '\n', in this case value length changes;
2226 if (*ptr
== '\r') *ptr
= '\n';
2227 reader_skipn(reader
, 1);
2235 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2236 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2241 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2243 start
= reader
->resume
[XmlReadResume_Body
];
2244 ptr
= reader_get_ptr(reader
);
2248 reader_shrink(reader
);
2249 ptr
= reader_get_ptr(reader
);
2250 start
= reader_get_cur(reader
);
2251 /* There's no text */
2252 if (!*ptr
|| *ptr
== '<') return S_OK
;
2253 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2254 reader
->resume
[XmlReadResume_Body
] = start
;
2255 reader
->resumestate
= XmlReadResumeState_CharData
;
2256 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2257 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2258 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2263 /* CDATA closing sequence ']]>' is not allowed */
2264 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2265 return WC_E_CDSECTEND
;
2267 /* Found next markup part */
2272 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2273 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2274 reader
->resume
[XmlReadResume_Body
] = 0;
2275 reader
->resumestate
= XmlReadResumeState_Initial
;
2279 reader_skipn(reader
, 1);
2281 /* this covers a case when text has leading whitespace chars */
2282 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2289 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2290 static HRESULT
reader_parse_content(xmlreader
*reader
)
2292 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2293 static const WCHAR etagW
[] = {'<','/',0};
2294 static const WCHAR ampW
[] = {'&',0};
2296 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2298 switch (reader
->resumestate
)
2300 case XmlReadResumeState_CDATA
:
2301 return reader_parse_cdata(reader
);
2302 case XmlReadResumeState_Comment
:
2303 return reader_parse_comment(reader
);
2304 case XmlReadResumeState_PIBody
:
2305 case XmlReadResumeState_PITarget
:
2306 return reader_parse_pi(reader
);
2307 case XmlReadResumeState_CharData
:
2308 return reader_parse_chardata(reader
);
2310 ERR("unknown resume state %d\n", reader
->resumestate
);
2314 reader_shrink(reader
);
2316 /* handle end tag here, it indicates end of content as well */
2317 if (!reader_cmp(reader
, etagW
))
2318 return reader_parse_endtag(reader
);
2320 if (!reader_cmp(reader
, commentW
))
2321 return reader_parse_comment(reader
);
2323 if (!reader_cmp(reader
, piW
))
2324 return reader_parse_pi(reader
);
2326 if (!reader_cmp(reader
, cdstartW
))
2327 return reader_parse_cdata(reader
);
2329 if (!reader_cmp(reader
, ampW
))
2330 return reader_parse_reference(reader
);
2332 if (!reader_cmp(reader
, ltW
))
2333 return reader_parse_element(reader
);
2335 /* what's left must be CharData */
2336 return reader_parse_chardata(reader
);
2339 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2343 if (!is_reader_pending(reader
))
2344 reader_clear_attrs(reader
);
2348 switch (reader
->instate
)
2350 /* if it's a first call for a new input we need to detect stream encoding */
2351 case XmlReadInState_Initial
:
2355 hr
= readerinput_growraw(reader
->input
);
2356 if (FAILED(hr
)) return hr
;
2358 /* try to detect encoding by BOM or data and set input code page */
2359 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2360 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2361 if (FAILED(hr
)) return hr
;
2363 /* always switch first time cause we have to put something in */
2364 readerinput_switchencoding(reader
->input
, enc
);
2366 /* parse xml declaration */
2367 hr
= reader_parse_xmldecl(reader
);
2368 if (FAILED(hr
)) return hr
;
2370 readerinput_shrinkraw(reader
->input
, -1);
2371 reader
->instate
= XmlReadInState_Misc_DTD
;
2372 if (hr
== S_OK
) return hr
;
2375 case XmlReadInState_Misc_DTD
:
2376 hr
= reader_parse_misc(reader
);
2377 if (FAILED(hr
)) return hr
;
2380 reader
->instate
= XmlReadInState_DTD
;
2384 case XmlReadInState_DTD
:
2385 hr
= reader_parse_dtd(reader
);
2386 if (FAILED(hr
)) return hr
;
2390 reader
->instate
= XmlReadInState_DTD_Misc
;
2394 reader
->instate
= XmlReadInState_Element
;
2396 case XmlReadInState_DTD_Misc
:
2397 hr
= reader_parse_misc(reader
);
2398 if (FAILED(hr
)) return hr
;
2401 reader
->instate
= XmlReadInState_Element
;
2405 case XmlReadInState_Element
:
2406 return reader_parse_element(reader
);
2407 case XmlReadInState_Content
:
2408 return reader_parse_content(reader
);
2409 case XmlReadInState_MiscEnd
:
2410 hr
= reader_parse_misc(reader
);
2411 if (FAILED(hr
)) return hr
;
2414 reader
->instate
= XmlReadInState_Eof
;
2416 case XmlReadInState_Eof
:
2419 FIXME("internal state %d not handled\n", reader
->instate
);
2427 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2429 xmlreader
*This
= impl_from_IXmlReader(iface
);
2431 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2433 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2434 IsEqualGUID(riid
, &IID_IXmlReader
))
2440 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2442 return E_NOINTERFACE
;
2445 IXmlReader_AddRef(iface
);
2450 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2452 xmlreader
*This
= impl_from_IXmlReader(iface
);
2453 ULONG ref
= InterlockedIncrement(&This
->ref
);
2454 TRACE("(%p)->(%d)\n", This
, ref
);
2458 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2460 xmlreader
*This
= impl_from_IXmlReader(iface
);
2461 LONG ref
= InterlockedDecrement(&This
->ref
);
2463 TRACE("(%p)->(%d)\n", This
, ref
);
2467 IMalloc
*imalloc
= This
->imalloc
;
2468 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2469 reader_clear_attrs(This
);
2470 reader_clear_elements(This
);
2471 reader_free_strvalues(This
);
2472 reader_free(This
, This
);
2473 if (imalloc
) IMalloc_Release(imalloc
);
2479 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2481 xmlreader
*This
= impl_from_IXmlReader(iface
);
2482 IXmlReaderInput
*readerinput
;
2485 TRACE("(%p)->(%p)\n", This
, input
);
2489 readerinput_release_stream(This
->input
);
2490 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2494 This
->line
= This
->pos
= 0;
2495 reader_clear_elements(This
);
2497 This
->resumestate
= XmlReadResumeState_Initial
;
2498 memset(This
->resume
, 0, sizeof(This
->resume
));
2500 /* just reset current input */
2503 This
->state
= XmlReadState_Initial
;
2507 /* now try IXmlReaderInput, ISequentialStream, IStream */
2508 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2511 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2512 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2515 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2516 readerinput
, readerinput
->lpVtbl
);
2517 IUnknown_Release(readerinput
);
2523 if (hr
!= S_OK
|| !readerinput
)
2525 /* create IXmlReaderInput basing on supplied interface */
2526 hr
= CreateXmlReaderInputWithEncodingName(input
,
2527 NULL
, NULL
, FALSE
, NULL
, &readerinput
);
2528 if (hr
!= S_OK
) return hr
;
2529 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2532 /* set stream for supplied IXmlReaderInput */
2533 hr
= readerinput_query_for_stream(This
->input
);
2536 This
->state
= XmlReadState_Initial
;
2537 This
->instate
= XmlReadInState_Initial
;
2543 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2545 xmlreader
*This
= impl_from_IXmlReader(iface
);
2547 TRACE("(%p)->(%s %p)\n", This
, debugstr_prop(property
), value
);
2549 if (!value
) return E_INVALIDARG
;
2553 case XmlReaderProperty_DtdProcessing
:
2554 *value
= This
->dtdmode
;
2556 case XmlReaderProperty_ReadState
:
2557 *value
= This
->state
;
2560 FIXME("Unimplemented property (%u)\n", property
);
2567 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2569 xmlreader
*This
= impl_from_IXmlReader(iface
);
2571 TRACE("(%p)->(%s %lu)\n", This
, debugstr_prop(property
), value
);
2575 case XmlReaderProperty_DtdProcessing
:
2576 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2577 This
->dtdmode
= value
;
2580 FIXME("Unimplemented property (%u)\n", property
);
2587 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2589 xmlreader
*This
= impl_from_IXmlReader(iface
);
2590 XmlNodeType oldtype
= This
->nodetype
;
2593 TRACE("(%p)->(%p)\n", This
, nodetype
);
2595 if (This
->state
== XmlReadState_Closed
) return S_FALSE
;
2597 hr
= reader_parse_nextnode(This
);
2598 if (oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2599 This
->state
= XmlReadState_Interactive
;
2602 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2603 *nodetype
= This
->nodetype
;
2609 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2611 xmlreader
*This
= impl_from_IXmlReader(iface
);
2612 TRACE("(%p)->(%p)\n", This
, node_type
);
2614 *node_type
= reader_get_nodetype(This
);
2615 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2618 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2620 xmlreader
*This
= impl_from_IXmlReader(iface
);
2622 TRACE("(%p)\n", This
);
2624 if (!This
->attr_count
) return S_FALSE
;
2625 This
->attr
= LIST_ENTRY(list_head(&This
->attrs
), struct attribute
, entry
);
2626 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2627 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2632 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2634 xmlreader
*This
= impl_from_IXmlReader(iface
);
2635 const struct list
*next
;
2637 TRACE("(%p)\n", This
);
2639 if (!This
->attr_count
) return S_FALSE
;
2642 return IXmlReader_MoveToFirstAttribute(iface
);
2644 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2647 This
->attr
= LIST_ENTRY(next
, struct attribute
, entry
);
2648 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2649 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2652 return next
? S_OK
: S_FALSE
;
2655 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
2657 LPCWSTR namespaceUri
)
2659 FIXME("(%p %p %p): stub\n", iface
, local_name
, namespaceUri
);
2663 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
2665 xmlreader
*This
= impl_from_IXmlReader(iface
);
2666 struct element
*elem
;
2668 TRACE("(%p)\n", This
);
2670 if (!This
->attr_count
) return S_FALSE
;
2673 /* FIXME: support other node types with 'attributes' like DTD */
2674 elem
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
2677 reader_set_strvalue(This
, StringValue_QualifiedName
, &elem
->qname
);
2678 reader_set_strvalue(This
, StringValue_LocalName
, &elem
->localname
);
2684 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2686 xmlreader
*This
= impl_from_IXmlReader(iface
);
2688 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2689 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
2690 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
2694 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
,
2695 LPCWSTR
*namespaceUri
,
2696 UINT
*namespaceUri_length
)
2698 FIXME("(%p %p %p): stub\n", iface
, namespaceUri
, namespaceUri_length
);
2702 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2704 xmlreader
*This
= impl_from_IXmlReader(iface
);
2706 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2707 *name
= This
->strvalues
[StringValue_LocalName
].str
;
2708 if (len
) *len
= This
->strvalues
[StringValue_LocalName
].len
;
2712 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, LPCWSTR
*prefix
, UINT
*len
)
2714 xmlreader
*This
= impl_from_IXmlReader(iface
);
2716 TRACE("(%p)->(%p %p)\n", This
, prefix
, len
);
2717 *prefix
= This
->strvalues
[StringValue_Prefix
].str
;
2718 if (len
) *len
= This
->strvalues
[StringValue_Prefix
].len
;
2722 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
2724 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2725 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2727 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
2731 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
) || is_reader_pending(reader
))
2736 hr
= IXmlReader_Read(iface
, &type
);
2737 if (FAILED(hr
)) return hr
;
2739 /* return if still pending, partially read values are not reported */
2740 if (is_reader_pending(reader
)) return E_PENDING
;
2745 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
2746 if (!ptr
) return E_OUTOFMEMORY
;
2747 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
2753 if (len
) *len
= val
->len
;
2757 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
2759 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2760 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2763 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
2765 /* Value is already allocated, chunked reads are not possible. */
2766 if (val
->str
) return S_FALSE
;
2770 len
= min(chunk_size
, val
->len
);
2771 memcpy(buffer
, reader_get_ptr2(reader
, val
->start
), len
);
2774 if (read
) *read
= len
;
2780 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
2782 UINT
*baseUri_length
)
2784 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
2788 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
2790 FIXME("(%p): stub\n", iface
);
2794 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
2796 xmlreader
*This
= impl_from_IXmlReader(iface
);
2797 TRACE("(%p)\n", This
);
2798 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2799 when current node is start tag of an element */
2800 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->empty_element
: FALSE
;
2803 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*lineNumber
)
2805 xmlreader
*This
= impl_from_IXmlReader(iface
);
2807 TRACE("(%p %p)\n", This
, lineNumber
);
2809 if (!lineNumber
) return E_INVALIDARG
;
2811 *lineNumber
= This
->line
;
2816 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*linePosition
)
2818 xmlreader
*This
= impl_from_IXmlReader(iface
);
2820 TRACE("(%p %p)\n", This
, linePosition
);
2822 if (!linePosition
) return E_INVALIDARG
;
2824 *linePosition
= This
->pos
;
2829 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
2831 xmlreader
*This
= impl_from_IXmlReader(iface
);
2833 TRACE("(%p)->(%p)\n", This
, count
);
2835 if (!count
) return E_INVALIDARG
;
2837 *count
= This
->attr_count
;
2841 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
2843 xmlreader
*This
= impl_from_IXmlReader(iface
);
2844 TRACE("(%p)->(%p)\n", This
, depth
);
2845 *depth
= This
->depth
;
2849 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
2851 FIXME("(%p): stub\n", iface
);
2855 static const struct IXmlReaderVtbl xmlreader_vtbl
=
2857 xmlreader_QueryInterface
,
2861 xmlreader_GetProperty
,
2862 xmlreader_SetProperty
,
2864 xmlreader_GetNodeType
,
2865 xmlreader_MoveToFirstAttribute
,
2866 xmlreader_MoveToNextAttribute
,
2867 xmlreader_MoveToAttributeByName
,
2868 xmlreader_MoveToElement
,
2869 xmlreader_GetQualifiedName
,
2870 xmlreader_GetNamespaceUri
,
2871 xmlreader_GetLocalName
,
2872 xmlreader_GetPrefix
,
2874 xmlreader_ReadValueChunk
,
2875 xmlreader_GetBaseUri
,
2876 xmlreader_IsDefault
,
2877 xmlreader_IsEmptyElement
,
2878 xmlreader_GetLineNumber
,
2879 xmlreader_GetLinePosition
,
2880 xmlreader_GetAttributeCount
,
2885 /** IXmlReaderInput **/
2886 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
2888 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2890 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2892 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
2893 IsEqualGUID(riid
, &IID_IUnknown
))
2899 WARN("interface %s not implemented\n", debugstr_guid(riid
));
2901 return E_NOINTERFACE
;
2904 IUnknown_AddRef(iface
);
2909 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
2911 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2912 ULONG ref
= InterlockedIncrement(&This
->ref
);
2913 TRACE("(%p)->(%d)\n", This
, ref
);
2917 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
2919 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2920 LONG ref
= InterlockedDecrement(&This
->ref
);
2922 TRACE("(%p)->(%d)\n", This
, ref
);
2926 IMalloc
*imalloc
= This
->imalloc
;
2927 if (This
->input
) IUnknown_Release(This
->input
);
2928 if (This
->stream
) ISequentialStream_Release(This
->stream
);
2929 if (This
->buffer
) free_input_buffer(This
->buffer
);
2930 readerinput_free(This
, This
->baseuri
);
2931 readerinput_free(This
, This
);
2932 if (imalloc
) IMalloc_Release(imalloc
);
2938 static const struct IUnknownVtbl xmlreaderinputvtbl
=
2940 xmlreaderinput_QueryInterface
,
2941 xmlreaderinput_AddRef
,
2942 xmlreaderinput_Release
2945 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
2950 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
2952 if (!IsEqualGUID(riid
, &IID_IXmlReader
))
2954 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid
));
2959 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
2961 reader
= heap_alloc(sizeof(*reader
));
2962 if(!reader
) return E_OUTOFMEMORY
;
2964 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
2966 reader
->input
= NULL
;
2967 reader
->state
= XmlReadState_Closed
;
2968 reader
->instate
= XmlReadInState_Initial
;
2969 reader
->resumestate
= XmlReadResumeState_Initial
;
2970 reader
->dtdmode
= DtdProcessing_Prohibit
;
2971 reader
->line
= reader
->pos
= 0;
2972 reader
->imalloc
= imalloc
;
2973 if (imalloc
) IMalloc_AddRef(imalloc
);
2974 reader
->nodetype
= XmlNodeType_None
;
2975 list_init(&reader
->attrs
);
2976 reader
->attr_count
= 0;
2977 reader
->attr
= NULL
;
2978 list_init(&reader
->elements
);
2980 reader
->max_depth
= 256;
2981 reader
->empty_element
= FALSE
;
2982 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2984 for (i
= 0; i
< StringValue_Last
; i
++)
2985 reader
->strvalues
[i
] = strval_empty
;
2987 *obj
= &reader
->IXmlReader_iface
;
2989 TRACE("returning iface %p\n", *obj
);
2994 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
2999 IXmlReaderInput
**ppInput
)
3001 xmlreaderinput
*readerinput
;
3004 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3005 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3007 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3010 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3012 readerinput
= heap_alloc(sizeof(*readerinput
));
3013 if(!readerinput
) return E_OUTOFMEMORY
;
3015 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3016 readerinput
->ref
= 1;
3017 readerinput
->imalloc
= imalloc
;
3018 readerinput
->stream
= NULL
;
3019 if (imalloc
) IMalloc_AddRef(imalloc
);
3020 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3021 readerinput
->hint
= hint
;
3022 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3023 readerinput
->pending
= 0;
3025 hr
= alloc_input_buffer(readerinput
);
3028 readerinput_free(readerinput
, readerinput
->baseuri
);
3029 readerinput_free(readerinput
, readerinput
);
3030 if (imalloc
) IMalloc_Release(imalloc
);
3033 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3035 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3037 TRACE("returning iface %p\n", *ppInput
);