2 Copyright (C) 2005 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version.
38 Partly derived from code which carried the following notice:
40 Copyright (c) 1997, 1998 by Microstar Software Ltd.
42 AElfred is free for both commercial and non-commercial use and
43 redistribution, provided that Microstar's copyright and disclaimer are
44 retained intact. You are free to modify AElfred for your own use and
45 to redistribute AElfred with your modifications, provided that the
46 modifications are clearly documented.
48 This program is distributed in the hope that it will be useful, but
49 WITHOUT ANY WARRANTY; without even the implied warranty of
50 merchantability or fitness for a particular purpose. Please use it AT
54 package gnu
.xml
.stream
;
56 import gnu
.java
.lang
.CPStringBuilder
;
58 import java
.io
.BufferedInputStream
;
59 import java
.io
.EOFException
;
61 import java
.io
.FileOutputStream
;
62 import java
.io
.FileWriter
;
63 import java
.io
.InputStream
;
64 import java
.io
.InputStreamReader
;
65 import java
.io
.IOException
;
66 import java
.io
.Reader
;
67 import java
.io
.StringReader
;
68 import java
.io
.UnsupportedEncodingException
;
69 import java
.net
.MalformedURLException
;
71 import java
.util
.ArrayList
;
72 import java
.util
.Collections
;
73 import java
.util
.HashSet
;
74 import java
.util
.Iterator
;
75 import java
.util
.LinkedHashMap
;
76 import java
.util
.LinkedList
;
78 import java
.util
.NoSuchElementException
;
79 import java
.util
.StringTokenizer
;
81 import javax
.xml
.XMLConstants
;
82 import javax
.xml
.namespace
.NamespaceContext
;
83 import javax
.xml
.namespace
.QName
;
84 import javax
.xml
.stream
.Location
;
85 import javax
.xml
.stream
.XMLInputFactory
;
86 import javax
.xml
.stream
.XMLReporter
;
87 import javax
.xml
.stream
.XMLResolver
;
88 import javax
.xml
.stream
.XMLStreamConstants
;
89 import javax
.xml
.stream
.XMLStreamException
;
90 import javax
.xml
.stream
.XMLStreamReader
;
92 import gnu
.java
.net
.CRLFInputStream
;
93 import gnu
.classpath
.debug
.TeeInputStream
;
94 import gnu
.classpath
.debug
.TeeReader
;
98 * This parser supports the following additional StAX properties:
100 * <tr><td>gnu.xml.stream.stringInterning</td>
102 * <td>Indicates whether markup strings will be interned</td></tr>
103 * <tr><td>gnu.xml.stream.xmlBase</td>
105 * <td>Indicates whether XML Base processing will be performed</td></tr>
106 * <tr><td>gnu.xml.stream.baseURI</td>
108 * <td>Returns the base URI of the current event</td></tr>
111 * @see http://www.w3.org/TR/REC-xml/
112 * @see http://www.w3.org/TR/xml11/
113 * @see http://www.w3.org/TR/REC-xml-names
114 * @see http://www.w3.org/TR/xml-names11
115 * @see http://www.w3.org/TR/xmlbase/
117 * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
119 public class XMLParser
120 implements XMLStreamReader
, NamespaceContext
123 // -- parser state machine states --
124 private static final int INIT
= 0; // start state
125 private static final int PROLOG
= 1; // in prolog
126 private static final int CONTENT
= 2; // in content
127 private static final int EMPTY_ELEMENT
= 3; // empty element state
128 private static final int MISC
= 4; // in Misc (after root element)
130 // -- parameters for parsing literals --
131 private final static int LIT_ENTITY_REF
= 2;
132 private final static int LIT_NORMALIZE
= 4;
133 private final static int LIT_ATTRIBUTE
= 8;
134 private final static int LIT_DISABLE_PE
= 16;
135 private final static int LIT_DISABLE_CREF
= 32;
136 private final static int LIT_DISABLE_EREF
= 64;
137 private final static int LIT_PUBID
= 256;
139 // -- types of attribute values --
140 final static int ATTRIBUTE_DEFAULT_UNDECLARED
= 30;
141 final static int ATTRIBUTE_DEFAULT_SPECIFIED
= 31;
142 final static int ATTRIBUTE_DEFAULT_IMPLIED
= 32;
143 final static int ATTRIBUTE_DEFAULT_REQUIRED
= 33;
144 final static int ATTRIBUTE_DEFAULT_FIXED
= 34;
146 // -- additional event types --
147 final static int START_ENTITY
= 50;
148 final static int END_ENTITY
= 51;
156 * Stack of inputs representing XML general entities.
157 * The input representing the XML input stream or reader is always the
158 * first element in this stack.
160 private LinkedList inputStack
= new LinkedList();
163 * Stack of start-entity events to be reported.
165 private LinkedList startEntityStack
= new LinkedList();
168 * Stack of end-entity events to be reported.
170 private LinkedList endEntityStack
= new LinkedList();
173 * Current parser state within the main state machine.
175 private int state
= INIT
;
178 * The (type of the) current event.
183 * The element name stack. The first element in this stack will be the
186 private LinkedList stack
= new LinkedList();
189 * Stack of namespace contexts. These are maps specifying prefix-to-URI
190 * mappings. The first element in this stack is the most recent namespace
191 * context (i.e. the other way around from the element name stack).
193 private LinkedList namespaces
= new LinkedList();
196 * The base-URI stack. This holds the base URI context for each element.
197 * The first element in this stack is the most recent context (i.e. the
198 * other way around from the element name stack).
200 private LinkedList bases
= new LinkedList();
203 * The list of attributes for the current element, in the order defined in
206 private ArrayList attrs
= new ArrayList();
209 * Buffer for text and character data.
211 private StringBuffer buf
= new StringBuffer();
214 * Buffer for NMTOKEN strings (markup).
216 private StringBuffer nmtokenBuf
= new StringBuffer();
219 * Buffer for string literals. (e.g. attribute values)
221 private StringBuffer literalBuf
= new StringBuffer();
224 * Temporary Unicode character buffer used during character data reads.
226 private int[] tmpBuf
= new int[1024];
229 * The element content model for the current element.
231 private ContentModel currentContentModel
;
234 * The validation stack. This holds lists of the elements seen for each
235 * element, in order to determine whether the names and order of these
236 * elements match the content model for the element. The last entry in
237 * this stack represents the current element.
239 private LinkedList validationStack
;
242 * These sets contain the IDs and the IDREFs seen in the document, to
243 * ensure that IDs are unique and that each IDREF refers to an ID in the
246 private HashSet ids
, idrefs
;
249 * The target and data associated with the current processing instruction
252 private String piTarget
, piData
;
255 * The XML version declared in the XML declaration.
257 private String xmlVersion
;
260 * The encoding declared in the XML declaration.
262 private String xmlEncoding
;
265 * The standalone value declared in the XML declaration.
267 private Boolean xmlStandalone
;
270 * The document type definition.
275 * State variables for determining parameter-entity expansion.
277 private boolean expandPE
, peIsError
;
280 * Whether this is a validating parser.
282 private final boolean validating
;
285 * Whether strings representing markup will be interned.
287 private final boolean stringInterning
;
290 * If true, CDATA sections will be merged with adjacent text nodes into a
293 private final boolean coalescing
;
296 * Whether to replace general entity references with their replacement
297 * text automatically during parsing.
298 * Otherwise entity-reference events will be issued.
300 private final boolean replaceERefs
;
303 * Whether to support external entities.
305 private final boolean externalEntities
;
308 * Whether to support DTDs.
310 private final boolean supportDTD
;
313 * Whether to support XML namespaces. If true, namespace information will
314 * be available. Otherwise namespaces will simply be reported as ordinary
317 private final boolean namespaceAware
;
320 * Whether to support XML Base. If true, URIs specified in xml:base
321 * attributes will be honoured when resolving external entities.
323 private final boolean baseAware
;
326 * Whether to report extended event types (START_ENTITY and END_ENTITY)
327 * in addition to the standard event types. Used by the SAX parser.
329 private final boolean extendedEventTypes
;
332 * The reporter to receive parsing warnings.
334 final XMLReporter reporter
;
337 * Callback interface for resolving external entities.
339 final XMLResolver resolver
;
341 // -- Constants for testing the next kind of markup event --
342 private static final String TEST_START_ELEMENT
= "<";
343 private static final String TEST_END_ELEMENT
= "</";
344 private static final String TEST_COMMENT
= "<!--";
345 private static final String TEST_PI
= "<?";
346 private static final String TEST_CDATA
= "<![CDATA[";
347 private static final String TEST_XML_DECL
= "<?xml";
348 private static final String TEST_DOCTYPE_DECL
= "<!DOCTYPE";
349 private static final String TEST_ELEMENT_DECL
= "<!ELEMENT";
350 private static final String TEST_ATTLIST_DECL
= "<!ATTLIST";
351 private static final String TEST_ENTITY_DECL
= "<!ENTITY";
352 private static final String TEST_NOTATION_DECL
= "<!NOTATION";
353 private static final String TEST_KET
= ">";
354 private static final String TEST_END_COMMENT
= "--";
355 private static final String TEST_END_PI
= "?>";
356 private static final String TEST_END_CDATA
= "]]>";
359 * The general entities predefined by the XML specification.
361 private static final LinkedHashMap PREDEFINED_ENTITIES
= new LinkedHashMap();
364 PREDEFINED_ENTITIES
.put("amp", "&");
365 PREDEFINED_ENTITIES
.put("lt", "<");
366 PREDEFINED_ENTITIES
.put("gt", ">");
367 PREDEFINED_ENTITIES
.put("apos", "'");
368 PREDEFINED_ENTITIES
.put("quot", "\"");
372 * Creates a new XML parser for the given input stream.
373 * This constructor should be used where possible, as it allows the
374 * encoding of the XML data to be correctly determined from the stream.
375 * @param in the input stream
376 * @param systemId the URL from which the input stream was retrieved
377 * (necessary if there are external entities to be resolved)
378 * @param validating if the parser is to be a validating parser
379 * @param namespaceAware if the parser should support XML Namespaces
380 * @param coalescing if CDATA sections should be merged into adjacent text
382 * @param replaceERefs if entity references should be automatically
383 * replaced by their replacement text (otherwise they will be reported as
384 * entity-reference events)
385 * @param externalEntities if external entities should be loaded
386 * @param supportDTD if support for the XML DTD should be enabled
387 * @param baseAware if the parser should support XML Base to resolve
389 * @param stringInterning whether strings will be interned during parsing
390 * @param reporter the reporter to receive warnings during processing
391 * @param resolver the callback interface used to resolve external
394 public XMLParser(InputStream in
, String systemId
,
396 boolean namespaceAware
,
398 boolean replaceERefs
,
399 boolean externalEntities
,
402 boolean stringInterning
,
403 boolean extendedEventTypes
,
404 XMLReporter reporter
,
405 XMLResolver resolver
)
407 this.validating
= validating
;
408 this.namespaceAware
= namespaceAware
;
409 this.coalescing
= coalescing
;
410 this.replaceERefs
= replaceERefs
;
411 this.externalEntities
= externalEntities
;
412 this.supportDTD
= supportDTD
;
413 this.baseAware
= baseAware
;
414 this.stringInterning
= stringInterning
;
415 this.extendedEventTypes
= extendedEventTypes
;
416 this.reporter
= reporter
;
417 this.resolver
= resolver
;
420 validationStack
= new LinkedList();
422 idrefs
= new HashSet();
424 String debug
= System
.getProperty("gnu.xml.debug.input");
429 File file
= File
.createTempFile(debug
, ".xml");
430 in
= new TeeInputStream(in
, new FileOutputStream(file
));
432 catch (IOException e
)
434 RuntimeException e2
= new RuntimeException();
439 systemId
= canonicalize(systemId
);
440 pushInput(new Input(in
, null, null, systemId
, null, null, false, true));
444 * Creates a new XML parser for the given character stream.
445 * This constructor is only available for compatibility with the JAXP
446 * APIs, which permit XML to be parsed from a character stream. Because
447 * the encoding specified by the character stream may conflict with that
448 * specified in the XML declaration, this method should be avoided where
450 * @param in the input stream
451 * @param systemId the URL from which the input stream was retrieved
452 * (necessary if there are external entities to be resolved)
453 * @param validating if the parser is to be a validating parser
454 * @param namespaceAware if the parser should support XML Namespaces
455 * @param coalescing if CDATA sections should be merged into adjacent text
457 * @param replaceERefs if entity references should be automatically
458 * replaced by their replacement text (otherwise they will be reported as
459 * entity-reference events)
460 * @param externalEntities if external entities should be loaded
461 * @param supportDTD if support for the XML DTD should be enabled
462 * @param baseAware if the parser should support XML Base to resolve
464 * @param stringInterning whether strings will be interned during parsing
465 * @param reporter the reporter to receive warnings during processing
466 * @param resolver the callback interface used to resolve external
469 public XMLParser(Reader reader
, String systemId
,
471 boolean namespaceAware
,
473 boolean replaceERefs
,
474 boolean externalEntities
,
477 boolean stringInterning
,
478 boolean extendedEventTypes
,
479 XMLReporter reporter
,
480 XMLResolver resolver
)
482 this.validating
= validating
;
483 this.namespaceAware
= namespaceAware
;
484 this.coalescing
= coalescing
;
485 this.replaceERefs
= replaceERefs
;
486 this.externalEntities
= externalEntities
;
487 this.supportDTD
= supportDTD
;
488 this.baseAware
= baseAware
;
489 this.stringInterning
= stringInterning
;
490 this.extendedEventTypes
= extendedEventTypes
;
491 this.reporter
= reporter
;
492 this.resolver
= resolver
;
495 validationStack
= new LinkedList();
497 idrefs
= new HashSet();
499 String debug
= System
.getProperty("gnu.xml.debug.input");
504 File file
= File
.createTempFile(debug
, ".xml");
505 reader
= new TeeReader(reader
, new FileWriter(file
));
507 catch (IOException e
)
509 RuntimeException e2
= new RuntimeException();
514 systemId
= canonicalize(systemId
);
515 pushInput(new Input(null, reader
, null, systemId
, null, null, false, true));
518 // -- NamespaceContext --
520 public String
getNamespaceURI(String prefix
)
522 if (XMLConstants
.XML_NS_PREFIX
.equals(prefix
))
523 return XMLConstants
.XML_NS_URI
;
524 if (XMLConstants
.XMLNS_ATTRIBUTE
.equals(prefix
))
525 return XMLConstants
.XMLNS_ATTRIBUTE_NS_URI
;
526 for (Iterator i
= namespaces
.iterator(); i
.hasNext(); )
528 LinkedHashMap ctx
= (LinkedHashMap
) i
.next();
529 String namespaceURI
= (String
) ctx
.get(prefix
);
530 if (namespaceURI
!= null)
536 public String
getPrefix(String namespaceURI
)
538 if (XMLConstants
.XML_NS_URI
.equals(namespaceURI
))
539 return XMLConstants
.XML_NS_PREFIX
;
540 if (XMLConstants
.XMLNS_ATTRIBUTE_NS_URI
.equals(namespaceURI
))
541 return XMLConstants
.XMLNS_ATTRIBUTE
;
542 for (Iterator i
= namespaces
.iterator(); i
.hasNext(); )
544 LinkedHashMap ctx
= (LinkedHashMap
) i
.next();
545 if (ctx
.containsValue(namespaceURI
))
547 for (Iterator j
= ctx
.entrySet().iterator(); j
.hasNext(); )
549 Map
.Entry entry
= (Map
.Entry
) i
.next();
550 String uri
= (String
) entry
.getValue();
551 if (uri
.equals(namespaceURI
))
552 return (String
) entry
.getKey();
559 public Iterator
getPrefixes(String namespaceURI
)
561 if (XMLConstants
.XML_NS_URI
.equals(namespaceURI
))
562 return Collections
.singleton(XMLConstants
.XML_NS_PREFIX
).iterator();
563 if (XMLConstants
.XMLNS_ATTRIBUTE_NS_URI
.equals(namespaceURI
))
564 return Collections
.singleton(XMLConstants
.XMLNS_ATTRIBUTE
).iterator();
565 LinkedList acc
= new LinkedList();
566 for (Iterator i
= namespaces
.iterator(); i
.hasNext(); )
568 LinkedHashMap ctx
= (LinkedHashMap
) i
.next();
569 if (ctx
.containsValue(namespaceURI
))
571 for (Iterator j
= ctx
.entrySet().iterator(); j
.hasNext(); )
573 Map
.Entry entry
= (Map
.Entry
) i
.next();
574 String uri
= (String
) entry
.getValue();
575 if (uri
.equals(namespaceURI
))
576 acc
.add(entry
.getKey());
580 return acc
.iterator();
583 // -- XMLStreamReader --
586 throws XMLStreamException
596 validationStack
= null;
601 public NamespaceContext
getNamespaceContext()
606 public int getAttributeCount()
611 public String
getAttributeLocalName(int index
)
613 Attribute a
= (Attribute
) attrs
.get(index
);
617 public String
getAttributeNamespace(int index
)
619 String prefix
= getAttributePrefix(index
);
620 return getNamespaceURI(prefix
);
623 public String
getAttributePrefix(int index
)
625 Attribute a
= (Attribute
) attrs
.get(index
);
629 public QName
getAttributeName(int index
)
631 Attribute a
= (Attribute
) attrs
.get(index
);
632 String namespaceURI
= getNamespaceURI(a
.prefix
);
633 return new QName(namespaceURI
, a
.localName
, a
.prefix
);
636 public String
getAttributeType(int index
)
638 Attribute a
= (Attribute
) attrs
.get(index
);
642 private String
getAttributeType(String elementName
, String attName
)
646 AttributeDecl att
= doctype
.getAttributeDecl(elementName
, attName
);
653 public String
getAttributeValue(int index
)
655 Attribute a
= (Attribute
) attrs
.get(index
);
659 public String
getAttributeValue(String namespaceURI
, String localName
)
661 for (Iterator i
= attrs
.iterator(); i
.hasNext(); )
663 Attribute a
= (Attribute
) i
.next();
664 if (a
.localName
.equals(localName
))
666 String uri
= getNamespaceURI(a
.prefix
);
667 if ((uri
== null && namespaceURI
== null) ||
668 (uri
!= null && uri
.equals(namespaceURI
)))
675 boolean isAttributeDeclared(int index
)
679 Attribute a
= (Attribute
) attrs
.get(index
);
680 String qn
= ("".equals(a
.prefix
)) ? a
.localName
:
681 a
.prefix
+ ":" + a
.localName
;
682 String elementName
= buf
.toString();
683 return doctype
.isAttributeDeclared(elementName
, qn
);
686 public String
getCharacterEncodingScheme()
691 public String
getElementText()
692 throws XMLStreamException
694 if (event
!= XMLStreamConstants
.START_ELEMENT
)
695 throw new XMLStreamException("current event must be START_ELEMENT");
696 CPStringBuilder elementText
= new CPStringBuilder();
697 int depth
= stack
.size();
698 while (event
!= XMLStreamConstants
.END_ELEMENT
|| stack
.size() > depth
)
702 case XMLStreamConstants
.CHARACTERS
:
703 case XMLStreamConstants
.SPACE
:
704 elementText
.append(buf
.toString());
707 return elementText
.toString();
710 public String
getEncoding()
712 return (input
.inputEncoding
== null) ?
"UTF-8" : input
.inputEncoding
;
715 public int getEventType()
720 public String
getLocalName()
724 case XMLStreamConstants
.START_ELEMENT
:
725 case XMLStreamConstants
.END_ELEMENT
:
726 String qName
= buf
.toString();
727 int ci
= qName
.indexOf(':');
728 String localName
= (ci
== -1) ? qName
: qName
.substring(ci
+ 1);
730 localName
= localName
.intern();
737 public Location
getLocation()
742 public QName
getName()
746 case XMLStreamConstants
.START_ELEMENT
:
747 case XMLStreamConstants
.END_ELEMENT
:
748 String qName
= buf
.toString();
749 int ci
= qName
.indexOf(':');
750 String localName
= (ci
== -1) ? qName
: qName
.substring(ci
+ 1);
752 localName
= localName
.intern();
753 String prefix
= (ci
== -1) ?
754 (namespaceAware ? XMLConstants
.DEFAULT_NS_PREFIX
: null) :
755 qName
.substring(0, ci
);
756 if (stringInterning
&& prefix
!= null)
757 prefix
= prefix
.intern();
758 String namespaceURI
= getNamespaceURI(prefix
);
759 return new QName(namespaceURI
, localName
, prefix
);
765 public int getNamespaceCount()
767 if (!namespaceAware
|| namespaces
.isEmpty())
771 case XMLStreamConstants
.START_ELEMENT
:
772 case XMLStreamConstants
.END_ELEMENT
:
773 LinkedHashMap ctx
= (LinkedHashMap
) namespaces
.getFirst();
780 public String
getNamespacePrefix(int index
)
782 LinkedHashMap ctx
= (LinkedHashMap
) namespaces
.getFirst();
784 for (Iterator i
= ctx
.keySet().iterator(); i
.hasNext(); )
786 String prefix
= (String
) i
.next();
787 if (count
++ == index
)
793 public String
getNamespaceURI()
797 case XMLStreamConstants
.START_ELEMENT
:
798 case XMLStreamConstants
.END_ELEMENT
:
799 String qName
= buf
.toString();
800 int ci
= qName
.indexOf(':');
803 String prefix
= qName
.substring(0, ci
);
804 return getNamespaceURI(prefix
);
810 public String
getNamespaceURI(int index
)
812 LinkedHashMap ctx
= (LinkedHashMap
) namespaces
.getFirst();
814 for (Iterator i
= ctx
.values().iterator(); i
.hasNext(); )
816 String uri
= (String
) i
.next();
817 if (count
++ == index
)
823 public String
getPIData()
828 public String
getPITarget()
833 public String
getPrefix()
837 case XMLStreamConstants
.START_ELEMENT
:
838 case XMLStreamConstants
.END_ELEMENT
:
839 String qName
= buf
.toString();
840 int ci
= qName
.indexOf(':');
841 String prefix
= (ci
== -1) ?
842 (namespaceAware ? XMLConstants
.DEFAULT_NS_PREFIX
: null) :
843 qName
.substring(0, ci
);
844 if (stringInterning
&& prefix
!= null)
845 prefix
= prefix
.intern();
852 public Object
getProperty(String name
)
853 throws IllegalArgumentException
856 throw new IllegalArgumentException("name is null");
857 if (XMLInputFactory
.ALLOCATOR
.equals(name
))
859 if (XMLInputFactory
.IS_COALESCING
.equals(name
))
860 return coalescing ? Boolean
.TRUE
: Boolean
.FALSE
;
861 if (XMLInputFactory
.IS_NAMESPACE_AWARE
.equals(name
))
862 return namespaceAware ? Boolean
.TRUE
: Boolean
.FALSE
;
863 if (XMLInputFactory
.IS_REPLACING_ENTITY_REFERENCES
.equals(name
))
864 return replaceERefs ? Boolean
.TRUE
: Boolean
.FALSE
;
865 if (XMLInputFactory
.IS_SUPPORTING_EXTERNAL_ENTITIES
.equals(name
))
866 return externalEntities ? Boolean
.TRUE
: Boolean
.FALSE
;
867 if (XMLInputFactory
.IS_VALIDATING
.equals(name
))
868 return Boolean
.FALSE
;
869 if (XMLInputFactory
.REPORTER
.equals(name
))
871 if (XMLInputFactory
.RESOLVER
.equals(name
))
873 if (XMLInputFactory
.SUPPORT_DTD
.equals(name
))
874 return supportDTD ? Boolean
.TRUE
: Boolean
.FALSE
;
875 if ("gnu.xml.stream.stringInterning".equals(name
))
876 return stringInterning ? Boolean
.TRUE
: Boolean
.FALSE
;
877 if ("gnu.xml.stream.xmlBase".equals(name
))
878 return baseAware ? Boolean
.TRUE
: Boolean
.FALSE
;
879 if ("gnu.xml.stream.baseURI".equals(name
))
884 public String
getText()
886 return buf
.toString();
889 public char[] getTextCharacters()
891 return buf
.toString().toCharArray();
894 public int getTextCharacters(int sourceStart
, char[] target
,
895 int targetStart
, int length
)
896 throws XMLStreamException
898 length
= Math
.min(sourceStart
+ buf
.length(), length
);
899 int sourceEnd
= sourceStart
+ length
;
900 buf
.getChars(sourceStart
, sourceEnd
, target
, targetStart
);
904 public int getTextLength()
909 public int getTextStart()
914 public String
getVersion()
916 return (xmlVersion
== null) ?
"1.0" : xmlVersion
;
919 public boolean hasName()
923 case XMLStreamConstants
.START_ELEMENT
:
924 case XMLStreamConstants
.END_ELEMENT
:
931 public boolean hasText()
935 case XMLStreamConstants
.CHARACTERS
:
936 case XMLStreamConstants
.SPACE
:
943 public boolean isAttributeSpecified(int index
)
945 Attribute a
= (Attribute
) attrs
.get(index
);
949 public boolean isCharacters()
951 return (event
== XMLStreamConstants
.CHARACTERS
);
954 public boolean isEndElement()
956 return (event
== XMLStreamConstants
.END_ELEMENT
);
959 public boolean isStandalone()
961 return Boolean
.TRUE
.equals(xmlStandalone
);
964 public boolean isStartElement()
966 return (event
== XMLStreamConstants
.START_ELEMENT
);
969 public boolean isWhiteSpace()
971 return (event
== XMLStreamConstants
.SPACE
);
975 throws XMLStreamException
981 case XMLStreamConstants
.START_ELEMENT
:
982 case XMLStreamConstants
.END_ELEMENT
:
983 case XMLStreamConstants
.CHARACTERS
:
984 case XMLStreamConstants
.SPACE
:
985 case XMLStreamConstants
.COMMENT
:
986 case XMLStreamConstants
.PROCESSING_INSTRUCTION
:
989 throw new XMLStreamException("Unexpected event type: " + event
);
992 while (event
!= XMLStreamConstants
.START_ELEMENT
&&
993 event
!= XMLStreamConstants
.END_ELEMENT
);
997 public void require(int type
, String namespaceURI
, String localName
)
998 throws XMLStreamException
1001 throw new XMLStreamException("Current event type is " + event
);
1002 if (event
== XMLStreamConstants
.START_ELEMENT
||
1003 event
== XMLStreamConstants
.END_ELEMENT
)
1005 String ln
= getLocalName();
1006 if (!ln
.equals(localName
))
1007 throw new XMLStreamException("Current local-name is " + ln
);
1008 String uri
= getNamespaceURI();
1009 if ((uri
== null && namespaceURI
!= null) ||
1010 (uri
!= null && !uri
.equals(namespaceURI
)))
1011 throw new XMLStreamException("Current namespace URI is " + uri
);
1015 public boolean standaloneSet()
1017 return (xmlStandalone
!= null);
1020 public boolean hasNext()
1021 throws XMLStreamException
1023 return (event
!= XMLStreamConstants
.END_DOCUMENT
&& event
!= -1);
1027 throws XMLStreamException
1029 if (event
== XMLStreamConstants
.END_ELEMENT
)
1031 // Pop namespace context
1032 if (namespaceAware
&& !namespaces
.isEmpty())
1033 namespaces
.removeFirst();
1035 if (baseAware
&& !bases
.isEmpty())
1036 bases
.removeFirst();
1038 if (!startEntityStack
.isEmpty())
1040 String entityName
= (String
) startEntityStack
.removeFirst();
1042 buf
.append(entityName
);
1043 event
= START_ENTITY
;
1044 return extendedEventTypes ? event
: next();
1046 else if (!endEntityStack
.isEmpty())
1048 String entityName
= (String
) endEntityStack
.removeFirst();
1050 buf
.append(entityName
);
1052 return extendedEventTypes ? event
: next();
1056 if (!input
.initialized
)
1061 if (tryRead(TEST_END_ELEMENT
))
1064 if (stack
.isEmpty())
1066 event
= XMLStreamConstants
.END_ELEMENT
;
1068 else if (tryRead(TEST_COMMENT
))
1071 event
= XMLStreamConstants
.COMMENT
;
1073 else if (tryRead(TEST_PI
))
1076 event
= XMLStreamConstants
.PROCESSING_INSTRUCTION
;
1078 else if (tryRead(TEST_CDATA
))
1081 event
= XMLStreamConstants
.CDATA
;
1083 else if (tryRead(TEST_START_ELEMENT
))
1085 state
= readStartElement();
1086 event
= XMLStreamConstants
.START_ELEMENT
;
1090 // Check for character reference or predefined entity
1093 if (c
== 0x26) // '&'
1096 if (c
== 0x23) // '#'
1099 event
= readCharData(null);
1107 String ref
= buf
.toString();
1108 String text
= (String
) PREDEFINED_ENTITIES
.get(ref
);
1111 event
= readCharData(text
);
1113 else if (replaceERefs
&& !isUnparsedEntity(ref
))
1115 // this will report a start-entity event
1116 boolean external
= false;
1117 if (doctype
!= null)
1119 Object entity
= doctype
.getEntity(ref
);
1120 if (entity
instanceof ExternalIds
)
1123 expandEntity(ref
, false, external
);
1128 event
= XMLStreamConstants
.ENTITY_REFERENCE
;
1135 event
= readCharData(null);
1136 if (validating
&& doctype
!= null)
1137 validatePCData(buf
.toString());
1142 String elementName
= (String
) stack
.removeLast();
1144 buf
.append(elementName
);
1145 state
= stack
.isEmpty() ? MISC
: CONTENT
;
1146 event
= XMLStreamConstants
.END_ELEMENT
;
1147 if (validating
&& doctype
!= null)
1148 endElementValidationHook();
1150 case INIT
: // XMLDecl?
1151 if (tryRead(TEST_XML_DECL
))
1153 input
.finalizeEncoding();
1154 event
= XMLStreamConstants
.START_DOCUMENT
;
1157 case PROLOG
: // Misc* (doctypedecl Misc*)?
1159 if (doctype
== null && tryRead(TEST_DOCTYPE_DECL
))
1162 event
= XMLStreamConstants
.DTD
;
1164 else if (tryRead(TEST_COMMENT
))
1167 event
= XMLStreamConstants
.COMMENT
;
1169 else if (tryRead(TEST_PI
))
1172 event
= XMLStreamConstants
.PROCESSING_INSTRUCTION
;
1174 else if (tryRead(TEST_START_ELEMENT
))
1176 state
= readStartElement();
1177 event
= XMLStreamConstants
.START_ELEMENT
;
1182 error("no root element: U+" + Integer
.toHexString(c
));
1185 case MISC
: // Comment | PI | S
1187 if (tryRead(TEST_COMMENT
))
1190 event
= XMLStreamConstants
.COMMENT
;
1192 else if (tryRead(TEST_PI
))
1195 event
= XMLStreamConstants
.PROCESSING_INSTRUCTION
;
1199 if (event
== XMLStreamConstants
.END_DOCUMENT
)
1200 throw new NoSuchElementException();
1203 error("Only comments and PIs may appear after " +
1204 "the root element");
1205 event
= XMLStreamConstants
.END_DOCUMENT
;
1213 catch (IOException e
)
1215 XMLStreamException e2
= new XMLStreamException();
1224 * Returns the current element name.
1226 String
getCurrentElement()
1228 return (String
) stack
.getLast();
1233 private void mark(int limit
)
1239 private void reset()
1248 return input
.read();
1251 private int read(int[] b
, int off
, int len
)
1254 return input
.read(b
, off
, len
);
1258 * Parsed character read.
1260 private int readCh()
1261 throws IOException
, XMLStreamException
1264 if (expandPE
&& c
== 0x25) // '%'
1267 error("PE reference within decl in internal subset.");
1268 expandPEReference();
1275 * Reads the next character, ensuring it is the character specified.
1276 * @param delim the character to match
1277 * @exception XMLStreamException if the next character is not the
1280 private void require(char delim
)
1281 throws IOException
, XMLStreamException
1288 error("required character (got U+" + Integer
.toHexString(c
) + ")",
1289 new Character(delim
));
1294 * Reads the next few characters, ensuring they match the string specified.
1295 * @param delim the string to match
1296 * @exception XMLStreamException if the next characters do not match the
1299 private void require(String delim
)
1300 throws IOException
, XMLStreamException
1302 char[] chars
= delim
.toCharArray();
1303 int len
= chars
.length
;
1308 int l2
= read(tmpBuf
, off
, len
- off
);
1312 error("EOF before required string", delim
);
1317 for (int i
= 0; i
< chars
.length
; i
++)
1319 if (chars
[i
] != tmpBuf
[i
])
1322 error("required string", delim
);
1328 * Try to read a single character. On failure, reset the stream.
1329 * @param delim the character to test
1330 * @return true if the character matched delim, false otherwise.
1332 private boolean tryRead(char delim
)
1333 throws IOException
, XMLStreamException
1346 * Tries to read the specified characters.
1347 * If successful, the stream is positioned after the last character,
1348 * otherwise it is reset.
1349 * @param test the string to test
1350 * @return true if the characters matched the test string, false otherwise.
1352 private boolean tryRead(String test
)
1355 char[] chars
= test
.toCharArray();
1356 int len
= chars
.length
;
1359 int l2
= read(tmpBuf
, 0, len
);
1366 // check the characters we received first before doing additional reads
1367 for (int i
= 0; i
< count
; i
++)
1369 if (chars
[i
] != tmpBuf
[i
])
1384 tmpBuf
[count
] = (char) c
;
1385 // check each character as it is read
1386 if (chars
[count
] != tmpBuf
[count
])
1397 * Reads characters until the specified test string is encountered.
1398 * @param delim the string delimiting the end of the characters
1400 private void readUntil(String delim
)
1401 throws IOException
, XMLStreamException
1403 int startLine
= input
.line
;
1406 while (!tryRead(delim
))
1410 throw new EOFException();
1411 else if (input
.xml11
)
1413 if (!isXML11Char(c
) || isXML11RestrictedChar(c
))
1414 error("illegal XML 1.1 character",
1415 "U+" + Integer
.toHexString(c
));
1417 else if (!isChar(c
))
1418 error("illegal XML character",
1419 "U+" + Integer
.toHexString(c
));
1420 buf
.append(Character
.toChars(c
));
1423 catch (EOFException e
)
1425 error("end of input while looking for delimiter "+
1426 "(started on line " + startLine
+ ')', delim
);
1431 * Reads any whitespace characters.
1432 * @return true if whitespace characters were read, false otherwise
1434 private boolean tryWhitespace()
1435 throws IOException
, XMLStreamException
1438 boolean ret
= false;
1443 while (c
== -1 && inputStack
.size() > 1)
1448 white
= (c
== 0x20 || c
== 0x09 || c
== 0x0a || c
== 0x0d);
1458 * Skip over any whitespace characters.
1460 private void skipWhitespace()
1461 throws IOException
, XMLStreamException
1468 while (c
== -1 && inputStack
.size() > 1)
1473 white
= (c
== 0x20 || c
== 0x09 || c
== 0x0a || c
== 0x0d);
1480 * Try to read as many whitespace characters as are available.
1481 * @exception XMLStreamException if no whitespace characters were seen
1483 private void requireWhitespace()
1484 throws IOException
, XMLStreamException
1486 if (!tryWhitespace())
1487 error("whitespace required");
1491 * Returns the current base URI for resolving external entities.
1497 for (Iterator i
= bases
.iterator(); i
.hasNext(); )
1499 String base
= (String
) i
.next();
1504 return input
.systemId
;
1508 * Push the specified text input source.
1510 private void pushInput(String name
, String text
, boolean report
,
1512 throws IOException
, XMLStreamException
1514 // Check for recursion
1515 if (name
!= null && !"".equals(name
))
1517 for (Iterator i
= inputStack
.iterator(); i
.hasNext(); )
1519 Input ctx
= (Input
) i
.next();
1520 if (name
.equals(ctx
.name
))
1521 error("entities may not be self-recursive", name
);
1526 pushInput(new Input(null, new StringReader(text
), input
.publicId
,
1527 input
.systemId
, name
, input
.inputEncoding
, report
,
1532 * Push the specified external input source.
1534 private void pushInput(String name
, ExternalIds ids
, boolean report
,
1536 throws IOException
, XMLStreamException
1538 if (!externalEntities
)
1540 String url
= canonicalize(absolutize(input
.systemId
, ids
.systemId
));
1541 // Check for recursion
1542 for (Iterator i
= inputStack
.iterator(); i
.hasNext(); )
1544 Input ctx
= (Input
) i
.next();
1545 if (url
.equals(ctx
.systemId
))
1546 error("entities may not be self-recursive", url
);
1547 if (name
!= null && !"".equals(name
) && name
.equals(ctx
.name
))
1548 error("entities may not be self-recursive", name
);
1550 if (name
== null || "".equals(name
))
1552 InputStream in
= null;
1553 if (resolver
!= null)
1555 Object obj
= resolver
.resolveEntity(ids
.publicId
, url
, getXMLBase(),
1557 if (obj
instanceof InputStream
)
1558 in
= (InputStream
) obj
;
1563 error("unable to resolve external entity",
1564 (ids
.systemId
!= null) ? ids
.systemId
: ids
.publicId
);
1565 pushInput(new Input(in
, null, ids
.publicId
, url
, name
, null, report
,
1568 if (tryRead(TEST_XML_DECL
))
1570 input
.finalizeEncoding();
1574 * Push the specified input source (general entity) onto the input stack.
1576 private void pushInput(Input input
)
1579 startEntityStack
.addFirst(input
.name
);
1580 inputStack
.addLast(input
);
1581 if (this.input
!= null)
1582 input
.xml11
= this.input
.xml11
;
1587 * Returns a canonicalized version of the specified URL.
1588 * This is largely to work around a problem with the specification of
1591 static String
canonicalize(String url
)
1595 if (url
.startsWith("file:") && !url
.startsWith("file://"))
1596 url
= "file://" + url
.substring(5);
1601 * "Absolutize" a URL. This resolves a relative URL into an absolute one.
1602 * @param base the current base URL
1603 * @param href the (absolute or relative) URL to resolve
1605 public static String
absolutize(String base
, String href
)
1609 int ci
= href
.indexOf(':');
1610 if (ci
> 1 && isURLScheme(href
.substring(0, ci
)))
1612 // href is absolute already
1619 int i
= base
.lastIndexOf('/');
1621 base
= base
.substring(0, i
+ 1);
1625 if ("".equals(base
))
1627 // assume file URL relative to current directory
1628 base
= System
.getProperty("user.dir");
1629 if (base
.charAt(0) == '/')
1630 base
= base
.substring(1);
1631 base
= "file:///" + base
.replace(File
.separatorChar
, '/');
1632 if (!base
.endsWith("/"))
1635 // We can't use java.net.URL here to do the parsing, as it searches for
1636 // a protocol handler. A protocol handler may not be registered for the
1637 // URL scheme here. Do it manually.
1639 // Set aside scheme and host portion of base URL
1640 String basePrefix
= null;
1641 ci
= base
.indexOf(':');
1642 if (ci
> 1 && isURLScheme(base
.substring(0, ci
)))
1644 if (base
.length() > (ci
+ 3) &&
1645 base
.charAt(ci
+ 1) == '/' &&
1646 base
.charAt(ci
+ 2) == '/')
1648 int si
= base
.indexOf('/', ci
+ 3);
1653 basePrefix
= base
.substring(0, si
);
1654 base
= base
.substring(si
);
1660 if (base
== null) // unknown or malformed base URL, use href
1662 if (href
.startsWith("/")) // absolute href pathname
1663 return (basePrefix
== null) ? href
: basePrefix
+ href
;
1664 // relative href pathname
1665 if (!base
.endsWith("/"))
1667 int lsi
= base
.lastIndexOf('/');
1671 base
= base
.substring(0, lsi
+ 1);
1673 while (href
.startsWith("../") || href
.startsWith("./"))
1675 if (href
.startsWith("../"))
1677 // strip last path component from base
1678 int lsi
= base
.lastIndexOf('/', base
.length() - 2);
1680 base
= base
.substring(0, lsi
+ 1);
1681 href
= href
.substring(3); // strip ../ prefix
1685 href
= href
.substring(2); // strip ./ prefix
1688 return (basePrefix
== null) ? base
+ href
: basePrefix
+ base
+ href
;
1692 * Indicates whether the specified characters match the scheme portion of
1694 * @see RFC 1738 section 2.1
1696 private static boolean isURLScheme(String text
)
1698 int len
= text
.length();
1699 for (int i
= 0; i
< len
; i
++)
1701 char c
= text
.charAt(i
);
1702 if (c
== '+' || c
== '.' || c
== '-')
1704 if (c
< 65 || (c
> 90 && c
< 97) || c
> 122)
1711 * Returns an input stream for the given URL.
1713 static InputStream
resolve(String url
)
1718 return new URL(url
).openStream();
1720 catch (MalformedURLException e
)
1724 catch (IOException e
)
1726 IOException e2
= new IOException("error resolving " + url
);
1733 * Pops the current input source (general entity) off the stack.
1735 private void popInput()
1737 Input old
= (Input
) inputStack
.removeLast();
1739 endEntityStack
.addFirst(old
.name
);
1740 input
= (Input
) inputStack
.getLast();
1744 * Parse an entity text declaration.
1746 private void readTextDecl()
1747 throws IOException
, XMLStreamException
1749 final int flags
= LIT_DISABLE_CREF
| LIT_DISABLE_PE
| LIT_DISABLE_EREF
;
1750 requireWhitespace();
1751 if (tryRead("version"))
1754 String v
= readLiteral(flags
, false);
1755 if ("1.0".equals(v
))
1756 input
.xml11
= false;
1757 else if ("1.1".equals(v
))
1759 Input i1
= (Input
) inputStack
.getFirst();
1761 error("external entity specifies later version number");
1765 throw new XMLStreamException("illegal XML version: " + v
);
1766 requireWhitespace();
1768 require("encoding");
1770 String enc
= readLiteral(flags
, false);
1773 input
.setInputEncoding(enc
);
1777 * Parse the XML declaration.
1779 private void readXMLDecl()
1780 throws IOException
, XMLStreamException
1782 final int flags
= LIT_DISABLE_CREF
| LIT_DISABLE_PE
| LIT_DISABLE_EREF
;
1784 requireWhitespace();
1787 xmlVersion
= readLiteral(flags
, false);
1788 if ("1.0".equals(xmlVersion
))
1789 input
.xml11
= false;
1790 else if ("1.1".equals(xmlVersion
))
1793 throw new XMLStreamException("illegal XML version: " + xmlVersion
);
1795 boolean white
= tryWhitespace();
1797 if (tryRead("encoding"))
1800 error("whitespace required before 'encoding='");
1802 xmlEncoding
= readLiteral(flags
, false);
1803 white
= tryWhitespace();
1806 if (tryRead("standalone"))
1809 error("whitespace required before 'standalone='");
1811 String standalone
= readLiteral(flags
, false);
1812 if ("yes".equals(standalone
))
1813 xmlStandalone
= Boolean
.TRUE
;
1814 else if ("no".equals(standalone
))
1815 xmlStandalone
= Boolean
.FALSE
;
1817 error("standalone flag must be 'yes' or 'no'", standalone
);
1822 if (xmlEncoding
!= null)
1823 input
.setInputEncoding(xmlEncoding
);
1827 * Parse the DOCTYPE declaration.
1829 private void readDoctypeDecl()
1830 throws IOException
, XMLStreamException
1833 error("parser was configured not to support DTDs");
1834 requireWhitespace();
1835 String rootName
= readNmtoken(true);
1837 ExternalIds ids
= readExternalIds(false, true);
1839 this.new Doctype(rootName
, ids
.publicId
, ids
.systemId
);
1841 // Parse internal subset first
1853 readMarkupdecl(false);
1859 // Parse external subset
1860 if (ids
.systemId
!= null && externalEntities
)
1862 pushInput("", ">", false, false);
1863 pushInput("[dtd]", ids
, true, true);
1864 // loop until we get back to ">"
1872 if (c
== 0x3e) // '>'
1880 readMarkupdecl(true);
1884 if (inputStack
.size() != 2)
1885 error("external subset has unmatched '>'");
1892 // Make rootName available for reading
1894 buf
.append(rootName
);
1898 * Checks the well-formedness of the DTD.
1900 private void checkDoctype()
1901 throws XMLStreamException
1903 // TODO check entity recursion
1907 * Parse the markupdecl production.
1909 private void readMarkupdecl(boolean inExternalSubset
)
1910 throws IOException
, XMLStreamException
1912 boolean saved
= expandPE
;
1917 if (tryRead(TEST_ELEMENT_DECL
))
1922 else if (tryRead(TEST_ATTLIST_DECL
))
1927 else if (tryRead(TEST_ENTITY_DECL
))
1930 readEntityDecl(inExternalSubset
);
1932 else if (tryRead(TEST_NOTATION_DECL
))
1935 readNotationDecl(inExternalSubset
);
1937 else if (tryRead(TEST_PI
))
1942 else if (tryRead(TEST_COMMENT
))
1947 else if (tryRead("<!["))
1949 // conditional section
1951 if (inputStack
.size() < 2)
1952 error("conditional sections illegal in internal subset");
1954 if (tryRead("INCLUDE"))
1959 while (!tryRead("]]>"))
1961 readMarkupdecl(inExternalSubset
);
1965 else if (tryRead("IGNORE"))
1970 for (int nesting
= 1; nesting
> 0; )
1984 throw new EOFException();
1990 error("conditional section must begin with INCLUDE or IGNORE");
1993 error("expected markup declaration");
1997 * Parse the elementdecl production.
1999 private void readElementDecl()
2000 throws IOException
, XMLStreamException
2002 requireWhitespace();
2003 boolean saved
= expandPE
;
2004 expandPE
= (inputStack
.size() > 1);
2005 String name
= readNmtoken(true);
2007 requireWhitespace();
2008 readContentspec(name
);
2014 * Parse the contentspec production.
2016 private void readContentspec(String elementName
)
2017 throws IOException
, XMLStreamException
2019 if (tryRead("EMPTY"))
2020 doctype
.addElementDecl(elementName
, "EMPTY", new EmptyContentModel());
2021 else if (tryRead("ANY"))
2022 doctype
.addElementDecl(elementName
, "ANY", new AnyContentModel());
2026 CPStringBuilder acc
= new CPStringBuilder();
2030 if (tryRead("#PCDATA"))
2033 acc
.append("#PCDATA");
2034 MixedContentModel mm
= new MixedContentModel();
2048 while (!tryRead(")"))
2053 String name
= readNmtoken(true);
2065 model
= readElements(acc
);
2066 doctype
.addElementDecl(elementName
, acc
.toString(), model
);
2071 * Parses an element content model.
2073 private ElementContentModel
readElements(CPStringBuilder acc
)
2074 throws IOException
, XMLStreamException
2077 ElementContentModel model
= new ElementContentModel();
2079 // Parse first content particle
2081 model
.addContentParticle(readContentParticle(acc
));
2111 return model
; // done
2117 acc
.append(Character
.toChars(c
));
2120 error("bad separator in content model",
2121 "U+" + Integer
.toHexString(c
));
2124 // Parse subsequent content particles
2128 model
.addContentParticle(readContentParticle(acc
));
2131 if (c
== 0x29) // ')'
2136 else if (c
!= separator
)
2138 error("bad separator in content model",
2139 "U+" + Integer
.toHexString(c
));
2145 // Check for occurrence indicator
2172 * Parse a cp production.
2174 private ContentParticle
readContentParticle(CPStringBuilder acc
)
2175 throws IOException
, XMLStreamException
2177 ContentParticle cp
= new ContentParticle();
2181 cp
.content
= readElements(acc
);
2185 String name
= readNmtoken(true);
2215 * Parse an attribute-list definition.
2217 private void readAttlistDecl()
2218 throws IOException
, XMLStreamException
2220 requireWhitespace();
2221 boolean saved
= expandPE
;
2222 expandPE
= (inputStack
.size() > 1);
2223 String elementName
= readNmtoken(true);
2225 boolean white
= tryWhitespace();
2226 while (!tryRead('>'))
2229 error("whitespace required before attribute definition");
2230 readAttDef(elementName
);
2231 white
= tryWhitespace();
2236 * Parse a single attribute definition.
2238 private void readAttDef(String elementName
)
2239 throws IOException
, XMLStreamException
2241 String name
= readNmtoken(true);
2242 requireWhitespace();
2243 CPStringBuilder acc
= new CPStringBuilder();
2244 HashSet values
= new HashSet();
2245 String type
= readAttType(acc
, values
);
2248 if ("ID".equals(type
))
2250 // VC: One ID per Element Type
2251 for (Iterator i
= doctype
.attlistIterator(elementName
);
2254 Map
.Entry entry
= (Map
.Entry
) i
.next();
2255 AttributeDecl decl
= (AttributeDecl
) entry
.getValue();
2256 if ("ID".equals(decl
.type
))
2257 error("element types must not have more than one ID " +
2261 else if ("NOTATION".equals(type
))
2263 // VC: One Notation Per Element Type
2264 for (Iterator i
= doctype
.attlistIterator(elementName
);
2267 Map
.Entry entry
= (Map
.Entry
) i
.next();
2268 AttributeDecl decl
= (AttributeDecl
) entry
.getValue();
2269 if ("NOTATION".equals(decl
.type
))
2270 error("element types must not have more than one NOTATION " +
2273 // VC: No Notation on Empty Element
2274 ContentModel model
= doctype
.getElementModel(elementName
);
2275 if (model
!= null && model
.type
== ContentModel
.EMPTY
)
2276 error("attributes of type NOTATION must not be declared on an " +
2277 "element declared EMPTY");
2280 String enumer
= null;
2281 if ("ENUMERATION".equals(type
) || "NOTATION".equals(type
))
2282 enumer
= acc
.toString();
2285 requireWhitespace();
2286 readDefault(elementName
, name
, type
, enumer
, values
);
2290 * Parse an attribute type.
2292 private String
readAttType(CPStringBuilder acc
, HashSet values
)
2293 throws IOException
, XMLStreamException
2297 readEnumeration(false, acc
, values
);
2298 return "ENUMERATION";
2302 String typeString
= readNmtoken(true);
2303 if ("NOTATION".equals(typeString
))
2305 readNotationType(acc
, values
);
2308 else if ("CDATA".equals(typeString
) ||
2309 "ID".equals(typeString
) ||
2310 "IDREF".equals(typeString
) ||
2311 "IDREFS".equals(typeString
) ||
2312 "ENTITY".equals(typeString
) ||
2313 "ENTITIES".equals(typeString
) ||
2314 "NMTOKEN".equals(typeString
) ||
2315 "NMTOKENS".equals(typeString
))
2319 error("illegal attribute type", typeString
);
2326 * Parse an enumeration.
2328 private void readEnumeration(boolean isNames
, CPStringBuilder acc
,
2330 throws IOException
, XMLStreamException
2335 String token
= readNmtoken(isNames
);
2338 // subsequent tokens
2340 while (!tryRead(')'))
2345 token
= readNmtoken(isNames
);
2346 // VC: No Duplicate Tokens
2347 if (validating
&& values
.contains(token
))
2348 error("duplicate token", token
);
2357 * Parse a notation type for an attribute.
2359 private void readNotationType(CPStringBuilder acc
, HashSet values
)
2360 throws IOException
, XMLStreamException
2362 requireWhitespace();
2364 readEnumeration(true, acc
, values
);
2368 * Parse the default value for an attribute.
2370 private void readDefault(String elementName
, String name
,
2371 String type
, String enumeration
, HashSet values
)
2372 throws IOException
, XMLStreamException
2374 int valueType
= ATTRIBUTE_DEFAULT_SPECIFIED
;
2375 int flags
= LIT_ATTRIBUTE
;
2376 String value
= null, defaultType
= null;
2377 boolean saved
= expandPE
;
2379 if (!"CDATA".equals(type
))
2380 flags
|= LIT_NORMALIZE
;
2385 if (tryRead("FIXED"))
2387 defaultType
= "#FIXED";
2388 valueType
= ATTRIBUTE_DEFAULT_FIXED
;
2389 requireWhitespace();
2390 value
= readLiteral(flags
, false);
2392 else if (tryRead("REQUIRED"))
2394 defaultType
= "#REQUIRED";
2395 valueType
= ATTRIBUTE_DEFAULT_REQUIRED
;
2397 else if (tryRead("IMPLIED"))
2399 defaultType
= "#IMPLIED";
2400 valueType
= ATTRIBUTE_DEFAULT_IMPLIED
;
2403 error("illegal keyword for attribute default value");
2406 value
= readLiteral(flags
, false);
2410 if ("ID".equals(type
))
2412 // VC: Attribute Default Value Syntactically Correct
2413 if (value
!= null && !isNmtoken(value
, true))
2414 error("default value must match Name production", value
);
2415 // VC: ID Attribute Default
2416 if (valueType
!= ATTRIBUTE_DEFAULT_REQUIRED
&&
2417 valueType
!= ATTRIBUTE_DEFAULT_IMPLIED
)
2418 error("ID attributes must have a declared default of " +
2419 "#IMPLIED or #REQUIRED");
2421 else if (value
!= null)
2423 // VC: Attribute Default Value Syntactically Correct
2424 if ("IDREF".equals(type
) || "ENTITY".equals(type
))
2426 if (!isNmtoken(value
, true))
2427 error("default value must match Name production", value
);
2429 else if ("IDREFS".equals(type
) || "ENTITIES".equals(type
))
2431 StringTokenizer st
= new StringTokenizer(value
);
2432 while (st
.hasMoreTokens())
2434 String token
= st
.nextToken();
2435 if (!isNmtoken(token
, true))
2436 error("default value must match Name production", token
);
2439 else if ("NMTOKEN".equals(type
) || "ENUMERATION".equals(type
))
2441 if (!isNmtoken(value
, false))
2442 error("default value must match Nmtoken production", value
);
2444 else if ("NMTOKENS".equals(type
))
2446 StringTokenizer st
= new StringTokenizer(value
);
2447 while (st
.hasMoreTokens())
2449 String token
= st
.nextToken();
2450 if (!isNmtoken(token
, false))
2451 error("default value must match Nmtoken production",
2457 // Register attribute def
2458 AttributeDecl attribute
=
2459 new AttributeDecl(type
, value
, valueType
, enumeration
, values
,
2460 inputStack
.size() != 1);
2461 doctype
.addAttributeDecl(elementName
, name
, attribute
);
2465 * Parse the EntityDecl production.
2467 private void readEntityDecl(boolean inExternalSubset
)
2468 throws IOException
, XMLStreamException
2471 // Check if parameter entity
2472 boolean peFlag
= false;
2474 requireWhitespace();
2478 requireWhitespace();
2482 String name
= readNmtoken(true);
2483 if (name
.indexOf(':') != -1)
2484 error("illegal character ':' in entity name", name
);
2487 requireWhitespace();
2491 if (c
== 0x22 || c
== 0x27) // " | '
2493 // Internal entity replacement text
2494 String value
= readLiteral(flags
| LIT_DISABLE_EREF
, true);
2495 int ai
= value
.indexOf('&');
2498 int sci
= value
.indexOf(';', ai
);
2500 error("malformed reference in entity value", value
);
2501 String ref
= value
.substring(ai
+ 1, sci
);
2502 int[] cp
= UnicodeReader
.toCodePointArray(ref
);
2504 error("malformed reference in entity value", value
);
2505 if (cp
[0] == 0x23) // #
2508 error("malformed reference in entity value", value
);
2509 if (cp
[1] == 0x78) // 'x'
2512 error("malformed reference in entity value", value
);
2513 for (int i
= 2; i
< cp
.length
; i
++)
2517 (x
> 0x39 && x
< 0x41) ||
2518 (x
> 0x46 && x
< 0x61) ||
2520 error("malformed character reference in entity value",
2526 for (int i
= 1; i
< cp
.length
; i
++)
2529 if (x
< 0x30 || x
> 0x39)
2530 error("malformed character reference in entity value",
2537 if (!isNameStartCharacter(cp
[0], input
.xml11
))
2538 error("malformed reference in entity value", value
);
2539 for (int i
= 1; i
< cp
.length
; i
++)
2541 if (!isNameCharacter(cp
[i
], input
.xml11
))
2542 error("malformed reference in entity value", value
);
2545 ai
= value
.indexOf('&', sci
);
2547 doctype
.addEntityDecl(name
, value
, inExternalSubset
);
2551 ExternalIds ids
= readExternalIds(false, false);
2553 boolean white
= tryWhitespace();
2554 if (!peFlag
&& tryRead("NDATA"))
2557 error("whitespace required before NDATA");
2558 requireWhitespace();
2559 ids
.notationName
= readNmtoken(true);
2561 doctype
.addEntityDecl(name
, ids
, inExternalSubset
);
2569 * Parse the NotationDecl production.
2571 private void readNotationDecl(boolean inExternalSubset
)
2572 throws IOException
, XMLStreamException
2574 requireWhitespace();
2575 String notationName
= readNmtoken(true);
2576 if (notationName
.indexOf(':') != -1)
2577 error("illegal character ':' in notation name", notationName
);
2580 // VC: Unique Notation Name
2581 ExternalIds notation
= doctype
.getNotation(notationName
);
2582 if (notation
!= null)
2583 error("duplicate notation name", notationName
);
2585 requireWhitespace();
2586 ExternalIds ids
= readExternalIds(true, false);
2587 ids
.notationName
= notationName
;
2588 doctype
.addNotationDecl(notationName
, ids
, inExternalSubset
);
2594 * Returns a tuple {publicId, systemId}.
2596 private ExternalIds
readExternalIds(boolean inNotation
, boolean isSubset
)
2597 throws IOException
, XMLStreamException
2600 int flags
= LIT_DISABLE_CREF
| LIT_DISABLE_PE
| LIT_DISABLE_EREF
;
2601 ExternalIds ids
= new ExternalIds();
2603 if (tryRead("PUBLIC"))
2605 requireWhitespace();
2606 ids
.publicId
= readLiteral(LIT_NORMALIZE
| LIT_PUBID
| flags
, false);
2613 if (c
== 0x22 || c
== 0x27) // " | '
2615 String href
= readLiteral(flags
, false);
2616 ids
.systemId
= absolutize(input
.systemId
, href
);
2621 requireWhitespace();
2622 String href
= readLiteral(flags
, false);
2623 ids
.systemId
= absolutize(input
.systemId
, href
);
2625 // Check valid URI characters
2626 for (int i
= 0; i
< ids
.publicId
.length(); i
++)
2628 char d
= ids
.publicId
.charAt(i
);
2629 if (d
>= 'a' && d
<= 'z')
2631 if (d
>= 'A' && d
<= 'Z')
2633 if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d
) != -1)
2635 error("illegal PUBLIC id character",
2636 "U+" + Integer
.toHexString(d
));
2639 else if (tryRead("SYSTEM"))
2641 requireWhitespace();
2642 String href
= readLiteral(flags
, false);
2643 ids
.systemId
= absolutize(input
.systemId
, href
);
2647 error("missing SYSTEM or PUBLIC keyword");
2649 if (ids
.systemId
!= null && !inNotation
)
2651 if (ids
.systemId
.indexOf('#') != -1)
2652 error("SYSTEM id has a URI fragment", ids
.systemId
);
2658 * Parse the start of an element.
2659 * @return the state of the parser afterwards (EMPTY_ELEMENT or CONTENT)
2661 private int readStartElement()
2662 throws IOException
, XMLStreamException
2664 // Read element name
2665 String elementName
= readNmtoken(true);
2667 // Push namespace context
2670 if (elementName
.charAt(0) == ':' ||
2671 elementName
.charAt(elementName
.length() - 1) == ':')
2672 error("not a QName", elementName
);
2673 namespaces
.addFirst(new LinkedHashMap());
2675 // Read element content
2676 boolean white
= tryWhitespace();
2679 while (c
!= 0x2f && c
!= 0x3e) // '/' | '>'
2684 error("need whitespace between attributes");
2685 readAttribute(elementName
);
2686 white
= tryWhitespace();
2690 // supply defaulted attributes
2691 if (doctype
!= null)
2693 for (Iterator i
= doctype
.attlistIterator(elementName
); i
.hasNext(); )
2695 Map
.Entry entry
= (Map
.Entry
) i
.next();
2696 String attName
= (String
) entry
.getKey();
2697 AttributeDecl decl
= (AttributeDecl
) entry
.getValue();
2700 switch (decl
.valueType
)
2702 case ATTRIBUTE_DEFAULT_REQUIRED
:
2703 // VC: Required Attribute
2704 if (decl
.value
== null && !attributeSpecified(attName
))
2705 error("value for " + attName
+ " attribute is required");
2707 case ATTRIBUTE_DEFAULT_FIXED
:
2708 // VC: Fixed Attribute Default
2709 for (Iterator j
= attrs
.iterator(); j
.hasNext(); )
2711 Attribute a
= (Attribute
) j
.next();
2712 if (attName
.equals(a
.name
) &&
2713 !decl
.value
.equals(a
.value
))
2714 error("value for " + attName
+ " attribute must be " +
2720 if (namespaceAware
&& attName
.equals("xmlns"))
2723 (LinkedHashMap
) namespaces
.getFirst();
2724 if (ctx
.containsKey(XMLConstants
.DEFAULT_NS_PREFIX
))
2725 continue; // namespace was specified
2727 else if (namespaceAware
&& attName
.startsWith("xmlns:"))
2730 (LinkedHashMap
) namespaces
.getFirst();
2731 if (ctx
.containsKey(attName
.substring(6)))
2732 continue; // namespace was specified
2734 else if (attributeSpecified(attName
))
2736 if (decl
.value
== null)
2738 // VC: Standalone Document Declaration
2739 if (validating
&& decl
.external
&& xmlStandalone
== Boolean
.TRUE
)
2740 error("standalone must be 'no' if attributes inherit values " +
2741 "from externally declared markup declarations");
2743 new Attribute(attName
, decl
.type
, false, decl
.value
);
2746 if (!addNamespace(attr
))
2755 String uri
= getAttributeValue(XMLConstants
.XML_NS_URI
, "base");
2756 String base
= getXMLBase();
2757 bases
.addFirst(absolutize(base
, uri
));
2761 // check prefix bindings
2762 int ci
= elementName
.indexOf(':');
2765 String prefix
= elementName
.substring(0, ci
);
2766 String uri
= getNamespaceURI(prefix
);
2768 error("unbound element prefix", prefix
);
2769 else if (input
.xml11
&& "".equals(uri
))
2770 error("XML 1.1 unbound element prefix", prefix
);
2772 for (Iterator i
= attrs
.iterator(); i
.hasNext(); )
2774 Attribute attr
= (Attribute
) i
.next();
2775 if (attr
.prefix
!= null &&
2776 !XMLConstants
.XMLNS_ATTRIBUTE
.equals(attr
.prefix
))
2778 String uri
= getNamespaceURI(attr
.prefix
);
2780 error("unbound attribute prefix", attr
.prefix
);
2781 else if (input
.xml11
&& "".equals(uri
))
2782 error("XML 1.1 unbound attribute prefix", attr
.prefix
);
2786 if (validating
&& doctype
!= null)
2788 validateStartElement(elementName
);
2789 currentContentModel
= doctype
.getElementModel(elementName
);
2790 if (currentContentModel
== null)
2791 error("no element declaration", elementName
);
2792 validationStack
.add(new LinkedList());
2794 // make element name available for read
2796 buf
.append(elementName
);
2797 // push element onto stack
2798 stack
.addLast(elementName
);
2805 return EMPTY_ELEMENT
;
2807 return -1; // to satisfy compiler
2811 * Indicates whether the specified attribute name was specified for the
2814 private boolean attributeSpecified(String attName
)
2816 for (Iterator j
= attrs
.iterator(); j
.hasNext(); )
2818 Attribute a
= (Attribute
) j
.next();
2819 if (attName
.equals(a
.name
))
2826 * Parse an attribute.
2828 private void readAttribute(String elementName
)
2829 throws IOException
, XMLStreamException
2831 // Read attribute name
2832 String attributeName
= readNmtoken(true);
2833 String type
= getAttributeType(elementName
, attributeName
);
2836 final int flags
= LIT_ATTRIBUTE
| LIT_ENTITY_REF
;
2837 String value
= (type
== null || "CDATA".equals(type
)) ?
2838 readLiteral(flags
, false) : readLiteral(flags
| LIT_NORMALIZE
, false);
2839 // add attribute event
2840 Attribute attr
= this.new Attribute(attributeName
, type
, true, value
);
2843 if (attributeName
.charAt(0) == ':' ||
2844 attributeName
.charAt(attributeName
.length() - 1) == ':')
2845 error("not a QName", attributeName
);
2846 else if (attributeName
.equals("xmlns"))
2848 LinkedHashMap ctx
= (LinkedHashMap
) namespaces
.getFirst();
2849 if (ctx
.containsKey(XMLConstants
.DEFAULT_NS_PREFIX
))
2850 error("duplicate default namespace");
2852 else if (attributeName
.startsWith("xmlns:"))
2854 LinkedHashMap ctx
= (LinkedHashMap
) namespaces
.getFirst();
2855 if (ctx
.containsKey(attributeName
.substring(6)))
2856 error("duplicate namespace", attributeName
.substring(6));
2858 else if (attrs
.contains(attr
))
2859 error("duplicate attribute", attributeName
);
2861 else if (attrs
.contains(attr
))
2862 error("duplicate attribute", attributeName
);
2863 if (validating
&& doctype
!= null)
2865 // VC: Attribute Value Type
2866 AttributeDecl decl
=
2867 doctype
.getAttributeDecl(elementName
, attributeName
);
2869 error("attribute must be declared", attributeName
);
2870 if ("ENUMERATION".equals(decl
.type
))
2873 if (!decl
.values
.contains(value
))
2874 error("value does not match enumeration " + decl
.enumeration
,
2877 else if ("ID".equals(decl
.type
))
2880 if (!isNmtoken(value
, true))
2881 error("ID values must match the Name production");
2882 if (ids
.contains(value
))
2883 error("Duplicate ID", value
);
2886 else if ("IDREF".equals(decl
.type
) || "IDREFS".equals(decl
.type
))
2888 StringTokenizer st
= new StringTokenizer(value
);
2889 while (st
.hasMoreTokens())
2891 String token
= st
.nextToken();
2893 if (!isNmtoken(token
, true))
2894 error("IDREF values must match the Name production");
2898 else if ("NMTOKEN".equals(decl
.type
) || "NMTOKENS".equals(decl
.type
))
2900 StringTokenizer st
= new StringTokenizer(value
);
2901 while (st
.hasMoreTokens())
2903 String token
= st
.nextToken();
2905 if (!isNmtoken(token
, false))
2906 error("NMTOKEN values must match the Nmtoken production");
2909 else if ("ENTITY".equals(decl
.type
))
2912 if (!isNmtoken(value
, true))
2913 error("ENTITY values must match the Name production");
2914 Object entity
= doctype
.getEntity(value
);
2915 if (entity
== null || !(entity
instanceof ExternalIds
) ||
2916 ((ExternalIds
) entity
).notationName
== null)
2917 error("ENTITY values must match the name of an unparsed " +
2918 "entity declared in the DTD");
2920 else if ("NOTATION".equals(decl
.type
))
2922 if (!decl
.values
.contains(value
))
2923 error("NOTATION values must match a declared notation name",
2925 // VC: Notation Attributes
2926 ExternalIds notation
= doctype
.getNotation(value
);
2927 if (notation
== null)
2928 error("NOTATION values must match the name of a notation " +
2929 "declared in the DTD", value
);
2934 if (!addNamespace(attr
))
2942 * Determines whether the specified attribute is a namespace declaration,
2943 * and adds it to the current namespace context if so. Returns false if
2944 * the attribute is an ordinary attribute.
2946 private boolean addNamespace(Attribute attr
)
2947 throws XMLStreamException
2949 if ("xmlns".equals(attr
.name
))
2951 LinkedHashMap ctx
= (LinkedHashMap
) namespaces
.getFirst();
2952 if (ctx
.get(XMLConstants
.DEFAULT_NS_PREFIX
) != null)
2953 error("Duplicate default namespace declaration");
2954 if (XMLConstants
.XML_NS_URI
.equals(attr
.value
))
2955 error("can't bind XML namespace");
2956 ctx
.put(XMLConstants
.DEFAULT_NS_PREFIX
, attr
.value
);
2959 else if ("xmlns".equals(attr
.prefix
))
2961 LinkedHashMap ctx
= (LinkedHashMap
) namespaces
.getFirst();
2962 if (ctx
.get(attr
.localName
) != null)
2963 error("Duplicate namespace declaration for prefix",
2965 if (XMLConstants
.XML_NS_PREFIX
.equals(attr
.localName
))
2967 if (!XMLConstants
.XML_NS_URI
.equals(attr
.value
))
2968 error("can't redeclare xml prefix");
2970 return false; // treat as attribute
2972 if (XMLConstants
.XML_NS_URI
.equals(attr
.value
))
2973 error("can't bind non-xml prefix to XML namespace");
2974 if (XMLConstants
.XMLNS_ATTRIBUTE
.equals(attr
.localName
))
2975 error("can't redeclare xmlns prefix");
2976 if (XMLConstants
.XMLNS_ATTRIBUTE_NS_URI
.equals(attr
.value
))
2977 error("can't bind non-xmlns prefix to XML Namespace namespace");
2978 if ("".equals(attr
.value
) && !input
.xml11
)
2979 error("illegal use of 1.1-style prefix unbinding in 1.0 document");
2980 ctx
.put(attr
.localName
, attr
.value
);
2987 * Parse a closing tag.
2989 private void readEndElement()
2990 throws IOException
, XMLStreamException
2992 // pop element off stack
2993 String expected
= (String
) stack
.removeLast();
2997 // Make element name available
2999 buf
.append(expected
);
3000 if (validating
&& doctype
!= null)
3001 endElementValidationHook();
3005 * Validate the end of an element.
3006 * Called on an end-element or empty element if validating.
3008 private void endElementValidationHook()
3009 throws XMLStreamException
3011 validateEndElement();
3012 validationStack
.removeLast();
3013 if (stack
.isEmpty())
3014 currentContentModel
= null;
3017 String parent
= (String
) stack
.getLast();
3018 currentContentModel
= doctype
.getElementModel(parent
);
3025 private void readComment(boolean inDTD
)
3026 throws IOException
, XMLStreamException
3028 boolean saved
= expandPE
;
3031 readUntil(TEST_END_COMMENT
);
3035 doctype
.addComment(buf
.toString());
3039 * Parse a processing instruction.
3041 private void readPI(boolean inDTD
)
3042 throws IOException
, XMLStreamException
3044 boolean saved
= expandPE
;
3046 piTarget
= readNmtoken(true);
3047 if (piTarget
.indexOf(':') != -1)
3048 error("illegal character in PI target", new Character(':'));
3049 if ("xml".equalsIgnoreCase(piTarget
))
3050 error("illegal PI target", piTarget
);
3051 if (tryRead(TEST_END_PI
))
3055 if (!tryWhitespace())
3056 error("whitespace required between PI target and data");
3058 readUntil(TEST_END_PI
);
3059 piData
= buf
.toString();
3063 doctype
.addPI(piTarget
, piData
);
3067 * Parse an entity reference.
3069 private void readReference()
3070 throws IOException
, XMLStreamException
3073 String entityName
= readNmtoken(true);
3076 buf
.append(entityName
);
3080 * Read an CDATA section.
3082 private void readCDSect()
3083 throws IOException
, XMLStreamException
3086 readUntil(TEST_END_CDATA
);
3090 * Read character data.
3091 * @return the type of text read (CHARACTERS or SPACE)
3093 private int readCharData(String prefix
)
3094 throws IOException
, XMLStreamException
3096 boolean white
= true;
3100 boolean done
= false;
3101 boolean entities
= false;
3105 mark(tmpBuf
.length
);
3106 int len
= read(tmpBuf
, 0, tmpBuf
.length
);
3109 if (inputStack
.size() > 1)
3112 // report end-entity
3116 throw new EOFException();
3118 for (int i
= 0; i
< len
&& !done
; i
++)
3127 buf
.append(Character
.toChars(c
));
3128 break; // whitespace
3132 // character reference?
3136 if (c
== 0x23) // '#'
3140 boolean hex
= (c
== 0x78); // 'x'
3143 char[] ch
= readCharacterRef(hex ?
16 : 10);
3144 buf
.append(ch
, 0, ch
.length
);
3145 for (int j
= 0; j
< ch
.length
; j
++)
3153 break; // whitespace
3164 String entityName
= readNmtoken(true);
3167 (String
) PREDEFINED_ENTITIES
.get(entityName
);
3172 pushInput("", "&" + entityName
+ ";", false, false);
3177 // continue processing
3179 mark(tmpBuf
.length
);
3180 len
= read(tmpBuf
, 0, tmpBuf
.length
);
3183 if (inputStack
.size() > 1)
3189 throw new EOFException();
3192 break; // end of text sequence
3194 int l
= buf
.length();
3196 buf
.charAt(l
- 1) == ']' &&
3197 buf
.charAt(l
- 2) == ']')
3198 error("Character data may not contain unescaped ']]>'");
3199 buf
.append(Character
.toChars(c
));
3203 // read i characters
3204 int count
= 0, remaining
= i
;
3207 int r
= read(tmpBuf
, 0, remaining
);
3213 if (coalescing
&& tryRead(TEST_CDATA
))
3214 readUntil(TEST_END_CDATA
); // read CDATA section into buf
3216 done
= true; // end of text sequence
3221 if (!isXML11Char(c
) || isXML11RestrictedChar(c
))
3222 error("illegal XML 1.1 character",
3223 "U+" + Integer
.toHexString(c
));
3225 else if (!isChar(c
))
3226 error("illegal XML character",
3227 "U+" + Integer
.toHexString(c
));
3229 buf
.append(Character
.toChars(c
));
3232 // if text buffer >= 2MB, return it as a chunk
3233 // to avoid excessive memory use
3234 if (buf
.length() >= 2097152)
3239 return white ? XMLStreamConstants
.SPACE
: XMLStreamConstants
.CHARACTERS
;
3243 * Expands the specified entity.
3245 private void expandEntity(String name
, boolean inAttr
, boolean normalize
)
3246 throws IOException
, XMLStreamException
3248 if (doctype
!= null)
3250 Object value
= doctype
.getEntity(name
);
3253 if (xmlStandalone
== Boolean
.TRUE
)
3255 // VC: Standalone Document Declaration
3256 if (doctype
.isEntityExternal(name
))
3257 error("reference to external entity in standalone document");
3258 else if (value
instanceof ExternalIds
)
3260 ExternalIds ids
= (ExternalIds
) value
;
3261 if (ids
.notationName
!= null &&
3262 doctype
.isNotationExternal(ids
.notationName
))
3263 error("reference to external notation in " +
3264 "standalone document");
3267 if (value
instanceof String
)
3269 String text
= (String
) value
;
3270 if (inAttr
&& text
.indexOf('<') != -1)
3271 error("< in attribute value");
3272 pushInput(name
, text
, !inAttr
, normalize
);
3275 error("reference to external entity in attribute value", name
);
3277 pushInput(name
, (ExternalIds
) value
, !inAttr
, normalize
);
3281 error("reference to undeclared entity", name
);
3285 * Indicates whether the specified entity is unparsed.
3287 private boolean isUnparsedEntity(String name
)
3289 if (doctype
!= null)
3291 Object value
= doctype
.getEntity(name
);
3292 if (value
!= null && value
instanceof ExternalIds
)
3293 return ((ExternalIds
) value
).notationName
!= null;
3299 * Read an equals sign.
3301 private void readEq()
3302 throws IOException
, XMLStreamException
3310 * Character read for reading literals.
3311 * @param recognizePEs whether to recognize parameter-entity references
3313 private int literalReadCh(boolean recognizePEs
)
3314 throws IOException
, XMLStreamException
3316 int c
= recognizePEs ?
readCh() : read();
3319 if (inputStack
.size() > 1)
3321 inputStack
.removeLast();
3322 input
= (Input
) inputStack
.getLast();
3323 // Don't issue end-entity
3324 c
= recognizePEs ?
readCh() : read();
3327 throw new EOFException();
3333 * Read a string literal.
3335 private String
readLiteral(int flags
, boolean recognizePEs
)
3336 throws IOException
, XMLStreamException
3338 boolean saved
= expandPE
;
3339 int delim
= readCh();
3340 if (delim
!= 0x27 && delim
!= 0x22)
3341 error("expected '\"' or \"'\"", "U+" + Integer
.toHexString(delim
));
3342 literalBuf
.setLength(0);
3343 if ((flags
& LIT_DISABLE_PE
) != 0)
3345 boolean entities
= false;
3346 int inputStackSize
= inputStack
.size();
3349 int c
= literalReadCh(recognizePEs
);
3350 if (c
== delim
&& inputStackSize
== inputStack
.size())
3356 if ((flags
& (LIT_ATTRIBUTE
| LIT_PUBID
)) != 0)
3357 c
= 0x20; // normalize to space
3360 if ((flags
& LIT_ATTRIBUTE
) != 0)
3361 c
= 0x20; // normalize to space
3366 if (c
== 0x23) // '#'
3368 if ((flags
& LIT_DISABLE_CREF
) != 0)
3377 boolean hex
= (c
== 0x78); // 'x'
3380 char[] ref
= readCharacterRef(hex ?
16 : 10);
3381 for (int i
= 0; i
< ref
.length
; i
++)
3382 literalBuf
.append(ref
[i
]);
3389 if ((flags
& LIT_DISABLE_EREF
) != 0)
3397 String entityName
= readNmtoken(true);
3400 (String
) PREDEFINED_ENTITIES
.get(entityName
);
3402 literalBuf
.append(text
);
3404 expandEntity(entityName
,
3405 (flags
& LIT_ATTRIBUTE
) != 0,
3413 if ((flags
& LIT_ATTRIBUTE
) != 0)
3414 error("attribute values may not contain '<'");
3417 if (inputStack
.size() > 1)
3422 throw new EOFException();
3424 if ((c
< 0x0020 || c
> 0xfffd) ||
3425 (c
>= 0xd800 && c
< 0xdc00) ||
3426 (input
.xml11
&& (c
>= 0x007f) &&
3427 (c
<= 0x009f) && (c
!= 0x0085)))
3428 error("illegal character", "U+" + Integer
.toHexString(c
));
3430 literalBuf
.append(Character
.toChars(c
));
3435 normalizeCRLF(literalBuf
);
3436 if ((flags
& LIT_NORMALIZE
) > 0)
3437 literalBuf
= normalize(literalBuf
);
3438 return literalBuf
.toString();
3442 * Performs attribute-value normalization of the text buffer.
3443 * This discards leading and trailing whitespace, and replaces sequences
3444 * of whitespace with a single space.
3446 private StringBuffer
normalize(StringBuffer buf
)
3448 StringBuffer acc
= new StringBuffer();
3449 int len
= buf
.length();
3451 for (int i
= 0; i
< len
; i
++)
3453 char c
= buf
.charAt(i
);
3455 avState
= (avState
== 0) ?
0 : 1;
3468 * Replace any CR/LF pairs in the buffer with LF.
3469 * This may be necessary if combinations of CR or LF were declared as
3470 * (character) entity references in the input.
3472 private void normalizeCRLF(StringBuffer buf
)
3474 int len
= buf
.length() - 1;
3475 for (int i
= 0; i
< len
; i
++)
3477 char c
= buf
.charAt(i
);
3478 if (c
== '\r' && buf
.charAt(i
+ 1) == '\n')
3480 buf
.deleteCharAt(i
--);
3487 * Parse and expand a parameter entity reference.
3489 private void expandPEReference()
3490 throws IOException
, XMLStreamException
3492 String name
= readNmtoken(true, new StringBuffer());
3494 mark(1); // ensure we don't reset to before the semicolon
3495 if (doctype
!= null)
3497 String entityName
= "%" + name
;
3498 Object entity
= doctype
.getEntity(entityName
);
3501 if (xmlStandalone
== Boolean
.TRUE
)
3503 if (doctype
.isEntityExternal(entityName
))
3504 error("reference to external parameter entity in " +
3505 "standalone document");
3507 if (entity
instanceof String
)
3509 pushInput(name
, (String
) entity
, false, input
.normalize
);
3510 //pushInput(name, " " + (String) entity + " ");
3514 //pushInput("", " ");
3515 pushInput(name
, (ExternalIds
) entity
, false, input
.normalize
);
3516 //pushInput("", " ");
3520 error("reference to undeclared parameter entity", name
);
3523 error("reference to parameter entity without doctype", name
);
3527 * Parse the digits in a character reference.
3528 * @param base the base of the digits (10 or 16)
3530 private char[] readCharacterRef(int base
)
3531 throws IOException
, XMLStreamException
3533 CPStringBuilder b
= new CPStringBuilder();
3534 for (int c
= readCh(); c
!= 0x3b && c
!= -1; c
= readCh())
3535 b
.append(Character
.toChars(c
));
3538 int ord
= Integer
.parseInt(b
.toString(), base
);
3541 if (!isXML11Char(ord
))
3542 error("illegal XML 1.1 character reference " +
3543 "U+" + Integer
.toHexString(ord
));
3547 if ((ord
< 0x20 && !(ord
== 0x0a || ord
== 0x09 || ord
== 0x0d))
3548 || (ord
>= 0xd800 && ord
<= 0xdfff)
3549 || ord
== 0xfffe || ord
== 0xffff
3550 || ord
> 0x0010ffff)
3551 error("illegal XML character reference " +
3552 "U+" + Integer
.toHexString(ord
));
3554 return Character
.toChars(ord
);
3556 catch (NumberFormatException e
)
3558 error("illegal characters in character reference", b
.toString());
3564 * Parses an NMTOKEN or Name production.
3565 * @param isName if a Name, otherwise an NMTOKEN
3567 private String
readNmtoken(boolean isName
)
3568 throws IOException
, XMLStreamException
3570 return readNmtoken(isName
, nmtokenBuf
);
3574 * Parses an NMTOKEN or Name production using the specified buffer.
3575 * @param isName if a Name, otherwise an NMTOKEN
3576 * @param buf the character buffer to use
3578 private String
readNmtoken(boolean isName
, StringBuffer buf
)
3579 throws IOException
, XMLStreamException
3585 if (!isNameStartCharacter(c
, input
.xml11
))
3586 error("not a name start character",
3587 "U+" + Integer
.toHexString(c
));
3591 if (!isNameCharacter(c
, input
.xml11
))
3592 error("not a name character",
3593 "U+" + Integer
.toHexString(c
));
3595 buf
.append(Character
.toChars(c
));
3624 return intern(buf
.toString());
3626 if (!isNameCharacter(c
, input
.xml11
))
3627 error("not a name character",
3628 "U+" + Integer
.toHexString(c
));
3630 buf
.append(Character
.toChars(c
));
3637 * Indicates whether the specified Unicode character is an XML 1.1 Char.
3639 public static boolean isXML11Char(int c
)
3641 return ((c
>= 0x0001 && c
<= 0xD7FF) ||
3642 (c
>= 0xE000 && c
< 0xFFFE) ||
3643 (c
>= 0x10000 && c
<= 0x10FFFF));
3647 * Indicates whether the specified Unicode character is an XML 1.1
3650 public static boolean isXML11RestrictedChar(int c
)
3652 return ((c
>= 0x0001 && c
<= 0x0008) ||
3653 (c
>= 0x000B && c
<= 0x000C) ||
3654 (c
>= 0x000E && c
<= 0x001F) ||
3655 (c
>= 0x007F && c
<= 0x0084) ||
3656 (c
>= 0x0086 && c
<= 0x009F));
3660 * Indicates whether the specified text matches the Name or Nmtoken
3663 private boolean isNmtoken(String text
, boolean isName
)
3667 int[] cp
= UnicodeReader
.toCodePointArray(text
);
3672 if (!isNameStartCharacter(cp
[0], input
.xml11
))
3677 if (!isNameCharacter(cp
[0], input
.xml11
))
3680 for (int i
= 1; i
< cp
.length
; i
++)
3682 if (!isNameCharacter(cp
[i
], input
.xml11
))
3687 catch (IOException e
)
3694 * Indicates whether the specified Unicode character is a Name start
3697 public static boolean isNameStartCharacter(int c
, boolean xml11
)
3700 return ((c
>= 0x0041 && c
<= 0x005a) ||
3701 (c
>= 0x0061 && c
<= 0x007a) ||
3704 (c
>= 0xC0 && c
<= 0xD6) ||
3705 (c
>= 0xD8 && c
<= 0xF6) ||
3706 (c
>= 0xF8 && c
<= 0x2FF) ||
3707 (c
>= 0x370 && c
<= 0x37D) ||
3708 (c
>= 0x37F && c
<= 0x1FFF) ||
3709 (c
>= 0x200C && c
<= 0x200D) ||
3710 (c
>= 0x2070 && c
<= 0x218F) ||
3711 (c
>= 0x2C00 && c
<= 0x2FEF) ||
3712 (c
>= 0x3001 && c
<= 0xD7FF) ||
3713 (c
>= 0xF900 && c
<= 0xFDCF) ||
3714 (c
>= 0xFDF0 && c
<= 0xFFFD) ||
3715 (c
>= 0x10000 && c
<= 0xEFFFF));
3717 return (c
== 0x5f || c
== 0x3a || isLetter(c
));
3721 * Indicates whether the specified Unicode character is a Name non-initial
3724 public static boolean isNameCharacter(int c
, boolean xml11
)
3727 return ((c
>= 0x0041 && c
<= 0x005a) ||
3728 (c
>= 0x0061 && c
<= 0x007a) ||
3729 (c
>= 0x0030 && c
<= 0x0039) ||
3735 (c
>= 0xC0 && c
<= 0xD6) ||
3736 (c
>= 0xD8 && c
<= 0xF6) ||
3737 (c
>= 0xF8 && c
<= 0x2FF) ||
3738 (c
>= 0x300 && c
<= 0x37D) ||
3739 (c
>= 0x37F && c
<= 0x1FFF) ||
3740 (c
>= 0x200C && c
<= 0x200D) ||
3741 (c
>= 0x203F && c
<= 0x2040) ||
3742 (c
>= 0x2070 && c
<= 0x218F) ||
3743 (c
>= 0x2C00 && c
<= 0x2FEF) ||
3744 (c
>= 0x3001 && c
<= 0xD7FF) ||
3745 (c
>= 0xF900 && c
<= 0xFDCF) ||
3746 (c
>= 0xFDF0 && c
<= 0xFFFD) ||
3747 (c
>= 0x10000 && c
<= 0xEFFFF));
3749 return (c
== 0x2e || c
== 0x2d || c
== 0x5f || c
== 0x3a ||
3750 isLetter(c
) || isDigit(c
) ||
3751 isCombiningChar(c
) || isExtender(c
));
3755 * Indicates whether the specified Unicode character matches the Letter
3758 public static boolean isLetter(int c
)
3760 if ((c
>= 0x0041 && c
<= 0x005A) ||
3761 (c
>= 0x0061 && c
<= 0x007A) ||
3762 (c
>= 0x00C0 && c
<= 0x00D6) ||
3763 (c
>= 0x00D8 && c
<= 0x00F6) ||
3764 (c
>= 0x00F8 && c
<= 0x00FF) ||
3765 (c
>= 0x0100 && c
<= 0x0131) ||
3766 (c
>= 0x0134 && c
<= 0x013E) ||
3767 (c
>= 0x0141 && c
<= 0x0148) ||
3768 (c
>= 0x014A && c
<= 0x017E) ||
3769 (c
>= 0x0180 && c
<= 0x01C3) ||
3770 (c
>= 0x01CD && c
<= 0x01F0) ||
3771 (c
>= 0x01F4 && c
<= 0x01F5) ||
3772 (c
>= 0x01FA && c
<= 0x0217) ||
3773 (c
>= 0x0250 && c
<= 0x02A8) ||
3774 (c
>= 0x02BB && c
<= 0x02C1) ||
3776 (c
>= 0x0388 && c
<= 0x038A) ||
3778 (c
>= 0x038E && c
<= 0x03A1) ||
3779 (c
>= 0x03A3 && c
<= 0x03CE) ||
3780 (c
>= 0x03D0 && c
<= 0x03D6) ||
3785 (c
>= 0x03E2 && c
<= 0x03F3) ||
3786 (c
>= 0x0401 && c
<= 0x040C) ||
3787 (c
>= 0x040E && c
<= 0x044F) ||
3788 (c
>= 0x0451 && c
<= 0x045C) ||
3789 (c
>= 0x045E && c
<= 0x0481) ||
3790 (c
>= 0x0490 && c
<= 0x04C4) ||
3791 (c
>= 0x04C7 && c
<= 0x04C8) ||
3792 (c
>= 0x04CB && c
<= 0x04CC) ||
3793 (c
>= 0x04D0 && c
<= 0x04EB) ||
3794 (c
>= 0x04EE && c
<= 0x04F5) ||
3795 (c
>= 0x04F8 && c
<= 0x04F9) ||
3796 (c
>= 0x0531 && c
<= 0x0556) ||
3798 (c
>= 0x0561 && c
<= 0x0586) ||
3799 (c
>= 0x05D0 && c
<= 0x05EA) ||
3800 (c
>= 0x05F0 && c
<= 0x05F2) ||
3801 (c
>= 0x0621 && c
<= 0x063A) ||
3802 (c
>= 0x0641 && c
<= 0x064A) ||
3803 (c
>= 0x0671 && c
<= 0x06B7) ||
3804 (c
>= 0x06BA && c
<= 0x06BE) ||
3805 (c
>= 0x06C0 && c
<= 0x06CE) ||
3806 (c
>= 0x06D0 && c
<= 0x06D3) ||
3808 (c
>= 0x06E5 && c
<= 0x06E6) ||
3809 (c
>= 0x0905 && c
<= 0x0939) ||
3811 (c
>= 0x0958 && c
<= 0x0961) ||
3812 (c
>= 0x0985 && c
<= 0x098C) ||
3813 (c
>= 0x098F && c
<= 0x0990) ||
3814 (c
>= 0x0993 && c
<= 0x09A8) ||
3815 (c
>= 0x09AA && c
<= 0x09B0) ||
3817 (c
>= 0x09B6 && c
<= 0x09B9) ||
3818 (c
>= 0x09DC && c
<= 0x09DD) ||
3819 (c
>= 0x09DF && c
<= 0x09E1) ||
3820 (c
>= 0x09F0 && c
<= 0x09F1) ||
3821 (c
>= 0x0A05 && c
<= 0x0A0A) ||
3822 (c
>= 0x0A0F && c
<= 0x0A10) ||
3823 (c
>= 0x0A13 && c
<= 0x0A28) ||
3824 (c
>= 0x0A2A && c
<= 0x0A30) ||
3825 (c
>= 0x0A32 && c
<= 0x0A33) ||
3826 (c
>= 0x0A35 && c
<= 0x0A36) ||
3827 (c
>= 0x0A38 && c
<= 0x0A39) ||
3828 (c
>= 0x0A59 && c
<= 0x0A5C) ||
3830 (c
>= 0x0A72 && c
<= 0x0A74) ||
3831 (c
>= 0x0A85 && c
<= 0x0A8B) ||
3833 (c
>= 0x0A8F && c
<= 0x0A91) ||
3834 (c
>= 0x0A93 && c
<= 0x0AA8) ||
3835 (c
>= 0x0AAA && c
<= 0x0AB0) ||
3836 (c
>= 0x0AB2 && c
<= 0x0AB3) ||
3837 (c
>= 0x0AB5 && c
<= 0x0AB9) ||
3840 (c
>= 0x0B05 && c
<= 0x0B0C) ||
3841 (c
>= 0x0B0F && c
<= 0x0B10) ||
3842 (c
>= 0x0B13 && c
<= 0x0B28) ||
3843 (c
>= 0x0B2A && c
<= 0x0B30) ||
3844 (c
>= 0x0B32 && c
<= 0x0B33) ||
3845 (c
>= 0x0B36 && c
<= 0x0B39) ||
3847 (c
>= 0x0B5C && c
<= 0x0B5D) ||
3848 (c
>= 0x0B5F && c
<= 0x0B61) ||
3849 (c
>= 0x0B85 && c
<= 0x0B8A) ||
3850 (c
>= 0x0B8E && c
<= 0x0B90) ||
3851 (c
>= 0x0B92 && c
<= 0x0B95) ||
3852 (c
>= 0x0B99 && c
<= 0x0B9A) ||
3854 (c
>= 0x0B9E && c
<= 0x0B9F) ||
3855 (c
>= 0x0BA3 && c
<= 0x0BA4) ||
3856 (c
>= 0x0BA8 && c
<= 0x0BAA) ||
3857 (c
>= 0x0BAE && c
<= 0x0BB5) ||
3858 (c
>= 0x0BB7 && c
<= 0x0BB9) ||
3859 (c
>= 0x0C05 && c
<= 0x0C0C) ||
3860 (c
>= 0x0C0E && c
<= 0x0C10) ||
3861 (c
>= 0x0C12 && c
<= 0x0C28) ||
3862 (c
>= 0x0C2A && c
<= 0x0C33) ||
3863 (c
>= 0x0C35 && c
<= 0x0C39) ||
3864 (c
>= 0x0C60 && c
<= 0x0C61) ||
3865 (c
>= 0x0C85 && c
<= 0x0C8C) ||
3866 (c
>= 0x0C8E && c
<= 0x0C90) ||
3867 (c
>= 0x0C92 && c
<= 0x0CA8) ||
3868 (c
>= 0x0CAA && c
<= 0x0CB3) ||
3869 (c
>= 0x0CB5 && c
<= 0x0CB9) ||
3871 (c
>= 0x0CE0 && c
<= 0x0CE1) ||
3872 (c
>= 0x0D05 && c
<= 0x0D0C) ||
3873 (c
>= 0x0D0E && c
<= 0x0D10) ||
3874 (c
>= 0x0D12 && c
<= 0x0D28) ||
3875 (c
>= 0x0D2A && c
<= 0x0D39) ||
3876 (c
>= 0x0D60 && c
<= 0x0D61) ||
3877 (c
>= 0x0E01 && c
<= 0x0E2E) ||
3879 (c
>= 0x0E32 && c
<= 0x0E33) ||
3880 (c
>= 0x0E40 && c
<= 0x0E45) ||
3881 (c
>= 0x0E81 && c
<= 0x0E82) ||
3883 (c
>= 0x0E87 && c
<= 0x0E88) ||
3886 (c
>= 0x0E94 && c
<= 0x0E97) ||
3887 (c
>= 0x0E99 && c
<= 0x0E9F) ||
3888 (c
>= 0x0EA1 && c
<= 0x0EA3) ||
3891 (c
>= 0x0EAA && c
<= 0x0EAB) ||
3892 (c
>= 0x0EAD && c
<= 0x0EAE) ||
3894 (c
>= 0x0EB2 && c
<= 0x0EB3) ||
3896 (c
>= 0x0EC0 && c
<= 0x0EC4) ||
3897 (c
>= 0x0F40 && c
<= 0x0F47) ||
3898 (c
>= 0x0F49 && c
<= 0x0F69) ||
3899 (c
>= 0x10A0 && c
<= 0x10C5) ||
3900 (c
>= 0x10D0 && c
<= 0x10F6) ||
3902 (c
>= 0x1102 && c
<= 0x1103) ||
3903 (c
>= 0x1105 && c
<= 0x1107) ||
3905 (c
>= 0x110B && c
<= 0x110C) ||
3906 (c
>= 0x110E && c
<= 0x1112) ||
3913 (c
>= 0x1154 && c
<= 0x1155) ||
3915 (c
>= 0x115F && c
<= 0x1161) ||
3920 (c
>= 0x116D && c
<= 0x116E) ||
3921 (c
>= 0x1172 && c
<= 0x1173) ||
3926 (c
>= 0x11AE && c
<= 0x11AF) ||
3927 (c
>= 0x11B7 && c
<= 0x11B8) ||
3929 (c
>= 0x11BC && c
<= 0x11C2) ||
3933 (c
>= 0x1E00 && c
<= 0x1E9B) ||
3934 (c
>= 0x1EA0 && c
<= 0x1EF9) ||
3935 (c
>= 0x1F00 && c
<= 0x1F15) ||
3936 (c
>= 0x1F18 && c
<= 0x1F1D) ||
3937 (c
>= 0x1F20 && c
<= 0x1F45) ||
3938 (c
>= 0x1F48 && c
<= 0x1F4D) ||
3939 (c
>= 0x1F50 && c
<= 0x1F57) ||
3943 (c
>= 0x1F5F && c
<= 0x1F7D) ||
3944 (c
>= 0x1F80 && c
<= 0x1FB4) ||
3945 (c
>= 0x1FB6 && c
<= 0x1FBC) ||
3947 (c
>= 0x1FC2 && c
<= 0x1FC4) ||
3948 (c
>= 0x1FC6 && c
<= 0x1FCC) ||
3949 (c
>= 0x1FD0 && c
<= 0x1FD3) ||
3950 (c
>= 0x1FD6 && c
<= 0x1FDB) ||
3951 (c
>= 0x1FE0 && c
<= 0x1FEC) ||
3952 (c
>= 0x1FF2 && c
<= 0x1FF4) ||
3953 (c
>= 0x1FF6 && c
<= 0x1FFC) ||
3955 (c
>= 0x212A && c
<= 0x212B) ||
3957 (c
>= 0x2180 && c
<= 0x2182) ||
3958 (c
>= 0x3041 && c
<= 0x3094) ||
3959 (c
>= 0x30A1 && c
<= 0x30FA) ||
3960 (c
>= 0x3105 && c
<= 0x312C) ||
3961 (c
>= 0xAC00 && c
<= 0xD7A3))
3962 return true; // BaseChar
3963 if ((c
>= 0x4e00 && c
<= 0x9fa5) ||
3965 (c
>= 0x3021 && c
<= 0x3029))
3966 return true; // Ideographic
3971 * Indicates whether the specified Unicode character matches the Digit
3974 public static boolean isDigit(int c
)
3976 return ((c
>= 0x0030 && c
<= 0x0039) ||
3977 (c
>= 0x0660 && c
<= 0x0669) ||
3978 (c
>= 0x06F0 && c
<= 0x06F9) ||
3979 (c
>= 0x0966 && c
<= 0x096F) ||
3980 (c
>= 0x09E6 && c
<= 0x09EF) ||
3981 (c
>= 0x0A66 && c
<= 0x0A6F) ||
3982 (c
>= 0x0AE6 && c
<= 0x0AEF) ||
3983 (c
>= 0x0B66 && c
<= 0x0B6F) ||
3984 (c
>= 0x0BE7 && c
<= 0x0BEF) ||
3985 (c
>= 0x0C66 && c
<= 0x0C6F) ||
3986 (c
>= 0x0CE6 && c
<= 0x0CEF) ||
3987 (c
>= 0x0D66 && c
<= 0x0D6F) ||
3988 (c
>= 0x0E50 && c
<= 0x0E59) ||
3989 (c
>= 0x0ED0 && c
<= 0x0ED9) ||
3990 (c
>= 0x0F20 && c
<= 0x0F29));
3994 * Indicates whether the specified Unicode character matches the
3995 * CombiningChar production.
3997 public static boolean isCombiningChar(int c
)
3999 return ((c
>= 0x0300 && c
<= 0x0345) ||
4000 (c
>= 0x0360 && c
<= 0x0361) ||
4001 (c
>= 0x0483 && c
<= 0x0486) ||
4002 (c
>= 0x0591 && c
<= 0x05A1) ||
4003 (c
>= 0x05A3 && c
<= 0x05B9) ||
4004 (c
>= 0x05BB && c
<= 0x05BD) ||
4006 (c
>= 0x05C1 && c
<= 0x05C2) ||
4008 (c
>= 0x064B && c
<= 0x0652) ||
4010 (c
>= 0x06D6 && c
<= 0x06DC) ||
4011 (c
>= 0x06DD && c
<= 0x06DF) ||
4012 (c
>= 0x06E0 && c
<= 0x06E4) ||
4013 (c
>= 0x06E7 && c
<= 0x06E8) ||
4014 (c
>= 0x06EA && c
<= 0x06ED) ||
4015 (c
>= 0x0901 && c
<= 0x0903) ||
4017 (c
>= 0x093E && c
<= 0x094C) ||
4019 (c
>= 0x0951 && c
<= 0x0954) ||
4020 (c
>= 0x0962 && c
<= 0x0963) ||
4021 (c
>= 0x0981 && c
<= 0x0983) ||
4025 (c
>= 0x09C0 && c
<= 0x09C4) ||
4026 (c
>= 0x09C7 && c
<= 0x09C8) ||
4027 (c
>= 0x09CB && c
<= 0x09CD) ||
4029 (c
>= 0x09E2 && c
<= 0x09E3) ||
4034 (c
>= 0x0A40 && c
<= 0x0A42) ||
4035 (c
>= 0x0A47 && c
<= 0x0A48) ||
4036 (c
>= 0x0A4B && c
<= 0x0A4D) ||
4037 (c
>= 0x0A70 && c
<= 0x0A71) ||
4038 (c
>= 0x0A81 && c
<= 0x0A83) ||
4040 (c
>= 0x0ABE && c
<= 0x0AC5) ||
4041 (c
>= 0x0AC7 && c
<= 0x0AC9) ||
4042 (c
>= 0x0ACB && c
<= 0x0ACD) ||
4043 (c
>= 0x0B01 && c
<= 0x0B03) ||
4045 (c
>= 0x0B3E && c
<= 0x0B43) ||
4046 (c
>= 0x0B47 && c
<= 0x0B48) ||
4047 (c
>= 0x0B4B && c
<= 0x0B4D) ||
4048 (c
>= 0x0B56 && c
<= 0x0B57) ||
4049 (c
>= 0x0B82 && c
<= 0x0B83) ||
4050 (c
>= 0x0BBE && c
<= 0x0BC2) ||
4051 (c
>= 0x0BC6 && c
<= 0x0BC8) ||
4052 (c
>= 0x0BCA && c
<= 0x0BCD) ||
4054 (c
>= 0x0C01 && c
<= 0x0C03) ||
4055 (c
>= 0x0C3E && c
<= 0x0C44) ||
4056 (c
>= 0x0C46 && c
<= 0x0C48) ||
4057 (c
>= 0x0C4A && c
<= 0x0C4D) ||
4058 (c
>= 0x0C55 && c
<= 0x0C56) ||
4059 (c
>= 0x0C82 && c
<= 0x0C83) ||
4060 (c
>= 0x0CBE && c
<= 0x0CC4) ||
4061 (c
>= 0x0CC6 && c
<= 0x0CC8) ||
4062 (c
>= 0x0CCA && c
<= 0x0CCD) ||
4063 (c
>= 0x0CD5 && c
<= 0x0CD6) ||
4064 (c
>= 0x0D02 && c
<= 0x0D03) ||
4065 (c
>= 0x0D3E && c
<= 0x0D43) ||
4066 (c
>= 0x0D46 && c
<= 0x0D48) ||
4067 (c
>= 0x0D4A && c
<= 0x0D4D) ||
4070 (c
>= 0x0E34 && c
<= 0x0E3A) ||
4071 (c
>= 0x0E47 && c
<= 0x0E4E) ||
4073 (c
>= 0x0EB4 && c
<= 0x0EB9) ||
4074 (c
>= 0x0EBB && c
<= 0x0EBC) ||
4075 (c
>= 0x0EC8 && c
<= 0x0ECD) ||
4076 (c
>= 0x0F18 && c
<= 0x0F19) ||
4082 (c
>= 0x0F71 && c
<= 0x0F84) ||
4083 (c
>= 0x0F86 && c
<= 0x0F8B) ||
4084 (c
>= 0x0F90 && c
<= 0x0F95) ||
4086 (c
>= 0x0F99 && c
<= 0x0FAD) ||
4087 (c
>= 0x0FB1 && c
<= 0x0FB7) ||
4089 (c
>= 0x20D0 && c
<= 0x20DC) ||
4091 (c
>= 0x302A && c
<= 0x302F) ||
4097 * Indicates whether the specified Unicode character matches the Extender
4100 public static boolean isExtender(int c
)
4102 return (c
== 0x00B7 ||
4110 (c
>= 0x3031 && c
<= 0x3035) ||
4111 (c
>= 0x309D && c
<= 0x309E) ||
4112 (c
>= 0x30FC && c
<= 0x30FE));
4116 * Indicates whether the specified Unicode character matches the Char
4119 public static boolean isChar(int c
)
4121 return (c
>= 0x20 && c
< 0xd800) ||
4122 (c
>= 0xe00 && c
< 0xfffe) ||
4123 (c
>= 0x10000 && c
< 0x110000) ||
4124 c
== 0xa || c
== 0x9 || c
== 0xd;
4128 * Interns the specified text or not, depending on the value of
4131 private String
intern(String text
)
4133 return stringInterning ? text
.intern() : text
;
4137 * Report a parsing error.
4139 private void error(String message
)
4140 throws XMLStreamException
4142 error(message
, null);
4146 * Report a parsing error.
4148 private void error(String message
, Object info
)
4149 throws XMLStreamException
4153 if (info
instanceof String
)
4154 message
+= ": \"" + ((String
) info
) + "\"";
4155 else if (info
instanceof Character
)
4156 message
+= ": '" + ((Character
) info
) + "'";
4158 throw new XMLStreamException(message
);
4162 * Perform validation of a start-element event.
4164 private void validateStartElement(String elementName
)
4165 throws XMLStreamException
4167 if (currentContentModel
== null)
4170 // VC: Root Element Type
4171 if (!elementName
.equals(doctype
.rootName
))
4172 error("root element name must match name in DTD");
4175 // VC: Element Valid
4176 switch (currentContentModel
.type
)
4178 case ContentModel
.EMPTY
:
4179 error("child element found in empty element", elementName
);
4181 case ContentModel
.ELEMENT
:
4182 LinkedList ctx
= (LinkedList
) validationStack
.getLast();
4183 ctx
.add(elementName
);
4185 case ContentModel
.MIXED
:
4186 MixedContentModel mm
= (MixedContentModel
) currentContentModel
;
4187 if (!mm
.containsName(elementName
))
4188 error("illegal element for content model", elementName
);
4194 * Perform validation of an end-element event.
4196 private void validateEndElement()
4197 throws XMLStreamException
4199 if (currentContentModel
== null)
4203 if (!idrefs
.containsAll(ids
))
4204 error("IDREF values must match the value of some ID attribute");
4207 // VC: Element Valid
4208 switch (currentContentModel
.type
)
4210 case ContentModel
.ELEMENT
:
4211 LinkedList ctx
= (LinkedList
) validationStack
.getLast();
4212 ElementContentModel ecm
= (ElementContentModel
) currentContentModel
;
4213 validateElementContent(ecm
, ctx
);
4219 * Perform validation of character data.
4221 private void validatePCData(String text
)
4222 throws XMLStreamException
4224 // VC: Element Valid
4225 switch (currentContentModel
.type
)
4227 case ContentModel
.EMPTY
:
4228 error("character data found in empty element", text
);
4230 case ContentModel
.ELEMENT
:
4231 boolean white
= true;
4232 int len
= text
.length();
4233 for (int i
= 0; i
< len
; i
++)
4235 char c
= text
.charAt(i
);
4236 if (c
!= ' ' && c
!= '\t' && c
!= '\n' && c
!= '\r')
4243 error("character data found in element with element content", text
);
4244 else if (xmlStandalone
== Boolean
.TRUE
&& currentContentModel
.external
)
4245 // VC: Standalone Document Declaration
4246 error("whitespace in element content of externally declared " +
4247 "element in standalone document");
4253 * Validates the specified validation context (list of child elements)
4254 * against the element content model for the current element.
4256 private void validateElementContent(ElementContentModel model
,
4257 LinkedList children
)
4258 throws XMLStreamException
4260 // Use regular expression
4261 CPStringBuilder buf
= new CPStringBuilder();
4262 for (Iterator i
= children
.iterator(); i
.hasNext(); )
4264 buf
.append((String
) i
.next());
4267 String c
= buf
.toString();
4268 String regex
= createRegularExpression(model
);
4269 if (!c
.matches(regex
))
4270 error("element content "+model
.text
+" does not match expression "+regex
, c
);
4274 * Creates the regular expression used to validate an element content
4277 private String
createRegularExpression(ElementContentModel model
)
4279 if (model
.regex
== null)
4281 CPStringBuilder buf
= new CPStringBuilder();
4283 for (Iterator i
= model
.contentParticles
.iterator(); i
.hasNext(); )
4285 ContentParticle cp
= (ContentParticle
) i
.next();
4286 if (cp
.content
instanceof String
)
4289 buf
.append((String
) cp
.content
);
4299 else if (cp
.min
== 0)
4304 ElementContentModel ecm
= (ElementContentModel
) cp
.content
;
4305 buf
.append(createRegularExpression(ecm
));
4307 if (model
.or
&& i
.hasNext())
4311 if (model
.max
== -1)
4318 else if (model
.min
== 0)
4320 model
.regex
= buf
.toString();
4326 * Performs validation of a document type declaration event.
4328 void validateDoctype()
4329 throws XMLStreamException
4331 for (Iterator i
= doctype
.entityIterator(); i
.hasNext(); )
4333 Map
.Entry entry
= (Map
.Entry
) i
.next();
4334 Object entity
= entry
.getValue();
4335 if (entity
instanceof ExternalIds
)
4337 ExternalIds ids
= (ExternalIds
) entity
;
4338 if (ids
.notationName
!= null)
4340 // VC: Notation Declared
4341 ExternalIds notation
= doctype
.getNotation(ids
.notationName
);
4342 if (notation
== null)
4343 error("Notation name must match the declared name of a " +
4344 "notation", ids
.notationName
);
4351 * Simple test harness for reading an XML file.
4352 * args[0] is the filename of the XML file
4353 * If args[1] is "-x", enable XInclude processing
4355 public static void main(String
[] args
)
4358 boolean validating
= false;
4359 boolean namespaceAware
= false;
4360 boolean xIncludeAware
= false;
4362 while (pos
< args
.length
&& args
[pos
].startsWith("-"))
4364 if ("-x".equals(args
[pos
]))
4365 xIncludeAware
= true;
4366 else if ("-v".equals(args
[pos
]))
4368 else if ("-n".equals(args
[pos
]))
4369 namespaceAware
= true;
4372 if (pos
>= args
.length
)
4374 System
.out
.println("Syntax: XMLParser [-n] [-v] [-x] <file> [<file2> [...]]");
4375 System
.out
.println("\t-n: use namespace aware mode");
4376 System
.out
.println("\t-v: use validating parser");
4377 System
.out
.println("\t-x: use XInclude aware mode");
4380 while (pos
< args
.length
)
4382 XMLParser p
= new XMLParser(new java
.io
.FileInputStream(args
[pos
]),
4383 absolutize(null, args
[pos
]),
4384 validating
, // validating
4385 namespaceAware
, // namespaceAware
4386 true, // coalescing,
4387 true, // replaceERefs
4388 true, // externalEntities
4391 true, // stringInterning
4392 true, // extendedEventTypes
4395 XMLStreamReader reader
= p
;
4397 reader
= new XIncludeFilter(p
, args
[pos
], true, true, true);
4402 while (reader
.hasNext())
4404 event
= reader
.next();
4405 Location loc
= reader
.getLocation();
4406 System
.out
.print(loc
.getLineNumber() + ":" +
4407 loc
.getColumnNumber() + " ");
4410 case XMLStreamConstants
.START_DOCUMENT
:
4411 System
.out
.println("START_DOCUMENT version=" +
4412 reader
.getVersion() +
4414 reader
.getEncoding());
4416 case XMLStreamConstants
.END_DOCUMENT
:
4417 System
.out
.println("END_DOCUMENT");
4419 case XMLStreamConstants
.START_ELEMENT
:
4420 System
.out
.println("START_ELEMENT " +
4422 int l
= reader
.getNamespaceCount();
4423 for (int i
= 0; i
< l
; i
++)
4424 System
.out
.println("\tnamespace " +
4425 reader
.getNamespacePrefix(i
) + "='" +
4426 reader
.getNamespaceURI(i
)+"'");
4427 l
= reader
.getAttributeCount();
4428 for (int i
= 0; i
< l
; i
++)
4429 System
.out
.println("\tattribute " +
4430 reader
.getAttributeName(i
) + "='" +
4431 reader
.getAttributeValue(i
) + "'");
4433 case XMLStreamConstants
.END_ELEMENT
:
4434 System
.out
.println("END_ELEMENT " + reader
.getName());
4436 case XMLStreamConstants
.CHARACTERS
:
4437 System
.out
.println("CHARACTERS '" +
4438 encodeText(reader
.getText()) + "'");
4440 case XMLStreamConstants
.CDATA
:
4441 System
.out
.println("CDATA '" +
4442 encodeText(reader
.getText()) + "'");
4444 case XMLStreamConstants
.SPACE
:
4445 System
.out
.println("SPACE '" +
4446 encodeText(reader
.getText()) + "'");
4448 case XMLStreamConstants
.DTD
:
4449 System
.out
.println("DTD " + reader
.getText());
4451 case XMLStreamConstants
.ENTITY_REFERENCE
:
4452 System
.out
.println("ENTITY_REFERENCE " + reader
.getText());
4454 case XMLStreamConstants
.COMMENT
:
4455 System
.out
.println("COMMENT '" +
4456 encodeText(reader
.getText()) + "'");
4458 case XMLStreamConstants
.PROCESSING_INSTRUCTION
:
4459 System
.out
.println("PROCESSING_INSTRUCTION " +
4460 reader
.getPITarget() + " " +
4461 reader
.getPIData());
4464 System
.out
.println("START_ENTITY " + reader
.getText());
4467 System
.out
.println("END_ENTITY " + reader
.getText());
4470 System
.out
.println("Unknown event: " + event
);
4474 catch (XMLStreamException e
)
4476 Location l
= reader
.getLocation();
4477 System
.out
.println("At line "+l
.getLineNumber()+
4478 ", column "+l
.getColumnNumber()+
4479 " of "+l
.getSystemId());
4487 * Escapes control characters in the specified text. For debugging.
4489 private static String
encodeText(String text
)
4491 CPStringBuilder b
= new CPStringBuilder();
4492 int len
= text
.length();
4493 for (int i
= 0; i
< len
; i
++)
4495 char c
= text
.charAt(i
);
4511 return b
.toString();
4515 * An attribute instance.
4526 * Attribute type as declared in the DTD, or CDATA otherwise.
4531 * Whether the attribute was specified or defaulted.
4533 final boolean specified
;
4536 * The attribute value.
4541 * The namespace prefix.
4543 final String prefix
;
4546 * The namespace local-name.
4548 final String localName
;
4550 Attribute(String name
, String type
, boolean specified
, String value
)
4554 this.specified
= specified
;
4556 int ci
= name
.indexOf(':');
4560 localName
= intern(name
);
4564 prefix
= intern(name
.substring(0, ci
));
4565 localName
= intern(name
.substring(ci
+ 1));
4569 public boolean equals(Object other
)
4571 if (other
instanceof Attribute
)
4573 Attribute a
= (Attribute
) other
;
4576 if (!a
.localName
.equals(localName
))
4578 String auri
= getNamespaceURI(a
.prefix
);
4579 String uri
= getNamespaceURI(prefix
);
4580 if (uri
== null && (auri
== null ||
4581 (input
.xml11
&& "".equals(auri
))))
4585 if ("".equals(uri
) && input
.xml11
&& "".equals(auri
))
4587 return uri
.equals(auri
);
4592 return a
.name
.equals(name
);
4597 public String
toString()
4599 CPStringBuilder buf
= new CPStringBuilder(getClass().getName());
4601 buf
.append("name=");
4605 buf
.append(",value=");
4610 buf
.append(",type=");
4614 buf
.append(",specified");
4616 return buf
.toString();
4622 * Representation of a DTD.
4628 * Name of the root element.
4630 final String rootName
;
4633 * Public ID, if any, of external subset.
4635 final String publicId
;
4638 * System ID (URL), if any, of external subset.
4640 final String systemId
;
4643 * Map of element names to content models.
4645 private final LinkedHashMap elements
= new LinkedHashMap();
4648 * Map of element names to maps of attribute declarations.
4650 private final LinkedHashMap attlists
= new LinkedHashMap();
4653 * Map of entity names to entities (String or ExternalIds).
4655 private final LinkedHashMap entities
= new LinkedHashMap();
4658 * Map of notation names to ExternalIds.
4660 private final LinkedHashMap notations
= new LinkedHashMap();
4663 * Map of anonymous keys to comments.
4665 private final LinkedHashMap comments
= new LinkedHashMap();
4668 * Map of anonymous keys to processing instructions (String[2]
4669 * containing {target, data}).
4671 private final LinkedHashMap pis
= new LinkedHashMap();
4674 * List of keys to all markup entries in the DTD.
4676 private final LinkedList entries
= new LinkedList();
4679 * Set of the entities defined in the external subset.
4681 private final HashSet externalEntities
= new HashSet();
4684 * Set of the notations defined in the external subset.
4686 private final HashSet externalNotations
= new HashSet();
4689 * Counter for making anonymous keys.
4691 private int anon
= 1;
4696 Doctype(String rootName
, String publicId
, String systemId
)
4698 this.rootName
= rootName
;
4699 this.publicId
= publicId
;
4700 this.systemId
= systemId
;
4704 * Adds an element declaration.
4705 * @param name the element name
4706 * @param text the content model text
4707 * @param model the parsed content model
4709 void addElementDecl(String name
, String text
, ContentModel model
)
4711 if (elements
.containsKey(name
))
4714 model
.external
= (inputStack
.size() != 1);
4715 elements
.put(name
, model
);
4716 entries
.add("E" + name
);
4720 * Adds an attribute declaration.
4721 * @param ename the element name
4722 * @param aname the attribute name
4723 * @param decl the attribute declaration details
4725 void addAttributeDecl(String ename
, String aname
, AttributeDecl decl
)
4727 LinkedHashMap attlist
= (LinkedHashMap
) attlists
.get(ename
);
4728 if (attlist
== null)
4730 attlist
= new LinkedHashMap();
4731 attlists
.put(ename
, attlist
);
4733 else if (attlist
.containsKey(aname
))
4735 attlist
.put(aname
, decl
);
4736 String key
= "A" + ename
;
4737 if (!entries
.contains(key
))
4742 * Adds an entity declaration.
4743 * @param name the entity name
4744 * @param text the entity replacement text
4745 * @param inExternalSubset if we are in the exernal subset
4747 void addEntityDecl(String name
, String text
, boolean inExternalSubset
)
4749 if (entities
.containsKey(name
))
4751 entities
.put(name
, text
);
4752 entries
.add("e" + name
);
4753 if (inExternalSubset
)
4754 externalEntities
.add(name
);
4758 * Adds an entity declaration.
4759 * @param name the entity name
4760 * @param ids the external IDs
4761 * @param inExternalSubset if we are in the exernal subset
4763 void addEntityDecl(String name
, ExternalIds ids
, boolean inExternalSubset
)
4765 if (entities
.containsKey(name
))
4767 entities
.put(name
, ids
);
4768 entries
.add("e" + name
);
4769 if (inExternalSubset
)
4770 externalEntities
.add(name
);
4774 * Adds a notation declaration.
4775 * @param name the notation name
4776 * @param ids the external IDs
4777 * @param inExternalSubset if we are in the exernal subset
4779 void addNotationDecl(String name
, ExternalIds ids
, boolean inExternalSubset
)
4781 if (notations
.containsKey(name
))
4783 notations
.put(name
, ids
);
4784 entries
.add("n" + name
);
4785 if (inExternalSubset
)
4786 externalNotations
.add(name
);
4792 void addComment(String text
)
4794 String key
= Integer
.toString(anon
++);
4795 comments
.put(key
, text
);
4796 entries
.add("c" + key
);
4800 * Adds a processing instruction.
4802 void addPI(String target
, String data
)
4804 String key
= Integer
.toString(anon
++);
4805 pis
.put(key
, new String
[] {target
, data
});
4806 entries
.add("p" + key
);
4810 * Returns the content model for the specified element.
4811 * @param name the element name
4813 ContentModel
getElementModel(String name
)
4815 return (ContentModel
) elements
.get(name
);
4819 * Returns the attribute definition for the given attribute
4820 * @param ename the element name
4821 * @param aname the attribute name
4823 AttributeDecl
getAttributeDecl(String ename
, String aname
)
4825 LinkedHashMap attlist
= (LinkedHashMap
) attlists
.get(ename
);
4826 return (attlist
== null) ?
null : (AttributeDecl
) attlist
.get(aname
);
4830 * Indicates whether the specified attribute was declared in the DTD.
4831 * @param ename the element name
4832 * @param aname the attribute name
4834 boolean isAttributeDeclared(String ename
, String aname
)
4836 LinkedHashMap attlist
= (LinkedHashMap
) attlists
.get(ename
);
4837 return (attlist
== null) ?
false : attlist
.containsKey(aname
);
4841 * Returns an iterator over the entries in the attribute list for the
4843 * @param ename the element name
4845 Iterator
attlistIterator(String ename
)
4847 LinkedHashMap attlist
= (LinkedHashMap
) attlists
.get(ename
);
4848 return (attlist
== null) ? Collections
.EMPTY_LIST
.iterator() :
4849 attlist
.entrySet().iterator();
4853 * Returns the entity (String or ExternalIds) for the given entity name.
4855 Object
getEntity(String name
)
4857 return entities
.get(name
);
4861 * Indicates whether the specified entity was declared in the external
4864 boolean isEntityExternal(String name
)
4866 return externalEntities
.contains(name
);
4870 * Returns an iterator over the entity map entries.
4872 Iterator
entityIterator()
4874 return entities
.entrySet().iterator();
4878 * Returns the notation IDs for the given notation name.
4880 ExternalIds
getNotation(String name
)
4882 return (ExternalIds
) notations
.get(name
);
4886 * Indicates whether the specified notation was declared in the external
4889 boolean isNotationExternal(String name
)
4891 return externalNotations
.contains(name
);
4895 * Returns the comment associated with the specified (anonymous) key.
4897 String
getComment(String key
)
4899 return (String
) comments
.get(key
);
4903 * Returns the processing instruction associated with the specified
4906 String
[] getPI(String key
)
4908 return (String
[]) pis
.get(key
);
4912 * Returns an iterator over the keys of the markup entries in this DTD,
4913 * in the order declared.
4915 Iterator
entryIterator()
4917 return entries
.iterator();
4923 * Combination of an ExternalID and an optional NDataDecl.
4939 * The notation name declared with the NDATA keyword.
4941 String notationName
;
4947 abstract class ContentModel
4949 static final int EMPTY
= 0;
4950 static final int ANY
= 1;
4951 static final int ELEMENT
= 2;
4952 static final int MIXED
= 3;
4960 ContentModel(int type
)
4970 * The EMPTY content model.
4972 class EmptyContentModel
4973 extends ContentModel
4978 super(ContentModel
.EMPTY
);
4986 * The ANY content model.
4988 class AnyContentModel
4989 extends ContentModel
4994 super(ContentModel
.ANY
);
5002 * An element content model.
5004 class ElementContentModel
5005 extends ContentModel
5008 LinkedList contentParticles
;
5010 String regex
; // regular expression cache
5012 ElementContentModel()
5014 super(ContentModel
.ELEMENT
);
5015 contentParticles
= new LinkedList();
5018 void addContentParticle(ContentParticle cp
)
5020 contentParticles
.add(cp
);
5025 class ContentParticle
5030 Object content
; // Name (String) or ElementContentModel
5035 * A mixed content model.
5037 class MixedContentModel
5038 extends ContentModel
5041 private HashSet names
;
5045 super(ContentModel
.MIXED
);
5046 names
= new HashSet();
5049 void addName(String name
)
5054 boolean containsName(String name
)
5056 return names
.contains(name
);
5062 * An attribute definition.
5068 * The attribute type (CDATA, ID, etc).
5073 * The default value.
5078 * The value type (#FIXED, #IMPLIED, etc).
5080 final int valueType
;
5083 * The enumeration text.
5085 final String enumeration
;
5088 * The enumeration tokens.
5090 final HashSet values
;
5093 * Whether this attribute declaration occurred in the external subset.
5095 final boolean external
;
5097 AttributeDecl(String type
, String value
,
5098 int valueType
, String enumeration
,
5099 HashSet values
, boolean external
)
5103 this.valueType
= valueType
;
5104 this.enumeration
= enumeration
;
5105 this.values
= values
;
5106 this.external
= external
;
5112 * An XML input source.
5118 int line
= 1, markLine
;
5119 int column
, markColumn
;
5120 int offset
, markOffset
;
5121 final String publicId
, systemId
, name
;
5122 final boolean report
; // report start- and end-entity
5123 final boolean normalize
; // normalize CR, etc to LF
5127 UnicodeReader unicodeReader
;
5128 boolean initialized
;
5129 boolean encodingDetected
;
5130 String inputEncoding
;
5133 Input(InputStream in
, Reader reader
, String publicId
, String systemId
,
5134 String name
, String inputEncoding
, boolean report
,
5137 if (inputEncoding
== null)
5138 inputEncoding
= "UTF-8";
5139 this.inputEncoding
= inputEncoding
;
5140 this.publicId
= publicId
;
5141 this.systemId
= systemId
;
5143 this.report
= report
;
5144 this.normalize
= normalize
;
5148 throw new IllegalStateException("both byte and char streams "+
5151 in
= new CRLFInputStream(in
);
5152 in
= new BufferedInputStream(in
);
5157 this.reader
= normalize ?
new CRLFReader(reader
) : reader
;
5158 unicodeReader
= new UnicodeReader(this.reader
);
5160 initialized
= false;
5165 public int getCharacterOffset()
5170 public int getColumnNumber()
5175 public int getLineNumber()
5180 public String
getPublicId()
5185 public String
getSystemId()
5203 markOffset
= offset
;
5205 markColumn
= column
;
5206 if (unicodeReader
!= null)
5207 unicodeReader
.mark(len
);
5219 int ret
= (unicodeReader
!= null) ? unicodeReader
.read() : in
.read();
5221 (ret
== 0x0d || (xml11
&& (ret
== 0x85 || ret
== 0x2028))))
5223 // Normalize CR etc to LF
5240 int read(int[] b
, int off
, int len
)
5244 if (unicodeReader
!= null)
5246 ret
= unicodeReader
.read(b
, off
, len
);
5250 byte[] b2
= new byte[len
];
5251 ret
= in
.read(b2
, 0, len
);
5254 String s
= new String(b2
, 0, ret
, inputEncoding
);
5255 int[] c
= UnicodeReader
.toCodePointArray(s
);
5257 System
.arraycopy(c
, 0, b
, off
, ret
);
5263 for (int i
= 0; i
< ret
; i
++)
5267 (c
== 0x0d || (xml11
&& (c
== 0x85 || c
== 0x2028))))
5269 // Normalize CR etc to LF
5288 if (unicodeReader
!= null)
5289 unicodeReader
.reset();
5292 offset
= markOffset
;
5294 column
= markColumn
;
5297 // Detection of input encoding
5299 private static final int[] SIGNATURE_UCS_4_1234
=
5300 new int[] { 0x00, 0x00, 0x00, 0x3c };
5301 private static final int[] SIGNATURE_UCS_4_4321
=
5302 new int[] { 0x3c, 0x00, 0x00, 0x00 };
5303 private static final int[] SIGNATURE_UCS_4_2143
=
5304 new int[] { 0x00, 0x00, 0x3c, 0x00 };
5305 private static final int[] SIGNATURE_UCS_4_3412
=
5306 new int[] { 0x00, 0x3c, 0x00, 0x00 };
5307 private static final int[] SIGNATURE_UCS_2_12
=
5308 new int[] { 0xfe, 0xff };
5309 private static final int[] SIGNATURE_UCS_2_21
=
5310 new int[] { 0xff, 0xfe };
5311 private static final int[] SIGNATURE_UCS_2_12_NOBOM
=
5312 new int[] { 0x00, 0x3c, 0x00, 0x3f };
5313 private static final int[] SIGNATURE_UCS_2_21_NOBOM
=
5314 new int[] { 0x3c, 0x00, 0x3f, 0x00 };
5315 private static final int[] SIGNATURE_UTF_8
=
5316 new int[] { 0x3c, 0x3f, 0x78, 0x6d };
5317 private static final int[] SIGNATURE_UTF_8_BOM
=
5318 new int[] { 0xef, 0xbb, 0xbf };
5321 * Detect the input encoding.
5323 private void detectEncoding()
5326 int[] signature
= new int[4];
5328 for (int i
= 0; i
< 4; i
++)
5329 signature
[i
] = in
.read();
5333 if (equals(SIGNATURE_UCS_4_1234
, signature
))
5339 setInputEncoding("UTF-32BE");
5340 encodingDetected
= true;
5342 else if (equals(SIGNATURE_UCS_4_4321
, signature
))
5348 setInputEncoding("UTF-32LE");
5349 encodingDetected
= true;
5351 else if (equals(SIGNATURE_UCS_4_2143
, signature
) ||
5352 equals(SIGNATURE_UCS_4_3412
, signature
))
5353 throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering");
5356 else if (equals(SIGNATURE_UCS_2_12
, signature
))
5360 setInputEncoding("UTF-16BE");
5361 encodingDetected
= true;
5363 else if (equals(SIGNATURE_UCS_2_21
, signature
))
5367 setInputEncoding("UTF-16LE");
5368 encodingDetected
= true;
5370 else if (equals(SIGNATURE_UCS_2_12_NOBOM
, signature
))
5372 //setInputEncoding("UTF-16BE");
5373 throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5375 else if (equals(SIGNATURE_UCS_2_21_NOBOM
, signature
))
5377 //setInputEncoding("UTF-16LE");
5378 throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5380 // ASCII-derived encodings
5381 else if (equals(SIGNATURE_UTF_8
, signature
))
5383 // UTF-8 input encoding implied, TextDecl
5385 else if (equals(SIGNATURE_UTF_8_BOM
, signature
))
5390 setInputEncoding("UTF-8");
5391 encodingDetected
= true;
5395 private static boolean equals(int[] b1
, int[] b2
)
5397 for (int i
= 0; i
< b1
.length
; i
++)
5405 void setInputEncoding(String encoding
)
5408 if (encoding
.equals(inputEncoding
))
5410 if ("UTF-16".equalsIgnoreCase(encoding
) &&
5411 inputEncoding
.startsWith("UTF-16"))
5413 if (encodingDetected
)
5414 throw new UnsupportedEncodingException("document is not in its " +
5415 "declared encoding " +
5418 inputEncoding
= encoding
;
5422 void finalizeEncoding()
5427 reader
= new BufferedReader(new InputStreamReader(in
, inputEncoding
));
5428 unicodeReader
= new UnicodeReader(reader
);