Merged revisions 143552,143554,143557,143560,143562,143564-143567,143570-143573,14357...
[official-gcc.git] / libjava / classpath / gnu / xml / stream / XMLParser.java
blob6072a9c730f4ecdbf5d6b94b92d381755614beca
1 /* XMLParser.java --
2 Copyright (C) 2005 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version.
38 Partly derived from code which carried the following notice:
40 Copyright (c) 1997, 1998 by Microstar Software Ltd.
42 AElfred is free for both commercial and non-commercial use and
43 redistribution, provided that Microstar's copyright and disclaimer are
44 retained intact. You are free to modify AElfred for your own use and
45 to redistribute AElfred with your modifications, provided that the
46 modifications are clearly documented.
48 This program is distributed in the hope that it will be useful, but
49 WITHOUT ANY WARRANTY; without even the implied warranty of
50 merchantability or fitness for a particular purpose. Please use it AT
51 YOUR OWN RISK.
54 package gnu.xml.stream;
56 import gnu.java.lang.CPStringBuilder;
58 import java.io.BufferedInputStream;
59 import java.io.EOFException;
60 import java.io.File;
61 import java.io.FileOutputStream;
62 import java.io.FileWriter;
63 import java.io.InputStream;
64 import java.io.InputStreamReader;
65 import java.io.IOException;
66 import java.io.Reader;
67 import java.io.StringReader;
68 import java.io.UnsupportedEncodingException;
69 import java.net.MalformedURLException;
70 import java.net.URL;
71 import java.util.ArrayList;
72 import java.util.Collections;
73 import java.util.HashSet;
74 import java.util.Iterator;
75 import java.util.LinkedHashMap;
76 import java.util.LinkedList;
77 import java.util.Map;
78 import java.util.NoSuchElementException;
79 import java.util.StringTokenizer;
81 import javax.xml.XMLConstants;
82 import javax.xml.namespace.NamespaceContext;
83 import javax.xml.namespace.QName;
84 import javax.xml.stream.Location;
85 import javax.xml.stream.XMLInputFactory;
86 import javax.xml.stream.XMLReporter;
87 import javax.xml.stream.XMLResolver;
88 import javax.xml.stream.XMLStreamConstants;
89 import javax.xml.stream.XMLStreamException;
90 import javax.xml.stream.XMLStreamReader;
92 import gnu.java.net.CRLFInputStream;
93 import gnu.classpath.debug.TeeInputStream;
94 import gnu.classpath.debug.TeeReader;
96 /**
97 * An XML parser.
98 * This parser supports the following additional StAX properties:
99 * <table>
100 * <tr><td>gnu.xml.stream.stringInterning</td>
101 * <td>Boolean</td>
102 * <td>Indicates whether markup strings will be interned</td></tr>
103 * <tr><td>gnu.xml.stream.xmlBase</td>
104 * <td>Boolean</td>
105 * <td>Indicates whether XML Base processing will be performed</td></tr>
106 * <tr><td>gnu.xml.stream.baseURI</td>
107 * <td>String</td>
108 * <td>Returns the base URI of the current event</td></tr>
109 * </table>
111 * @see http://www.w3.org/TR/REC-xml/
112 * @see http://www.w3.org/TR/xml11/
113 * @see http://www.w3.org/TR/REC-xml-names
114 * @see http://www.w3.org/TR/xml-names11
115 * @see http://www.w3.org/TR/xmlbase/
117 * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
119 public class XMLParser
120 implements XMLStreamReader, NamespaceContext
123 // -- parser state machine states --
124 private static final int INIT = 0; // start state
125 private static final int PROLOG = 1; // in prolog
126 private static final int CONTENT = 2; // in content
127 private static final int EMPTY_ELEMENT = 3; // empty element state
128 private static final int MISC = 4; // in Misc (after root element)
130 // -- parameters for parsing literals --
131 private final static int LIT_ENTITY_REF = 2;
132 private final static int LIT_NORMALIZE = 4;
133 private final static int LIT_ATTRIBUTE = 8;
134 private final static int LIT_DISABLE_PE = 16;
135 private final static int LIT_DISABLE_CREF = 32;
136 private final static int LIT_DISABLE_EREF = 64;
137 private final static int LIT_PUBID = 256;
139 // -- types of attribute values --
140 final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
141 final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
142 final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
143 final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
144 final static int ATTRIBUTE_DEFAULT_FIXED = 34;
146 // -- additional event types --
147 final static int START_ENTITY = 50;
148 final static int END_ENTITY = 51;
151 * The current input.
153 private Input input;
156 * Stack of inputs representing XML general entities.
157 * The input representing the XML input stream or reader is always the
158 * first element in this stack.
160 private LinkedList inputStack = new LinkedList();
163 * Stack of start-entity events to be reported.
165 private LinkedList startEntityStack = new LinkedList();
168 * Stack of end-entity events to be reported.
170 private LinkedList endEntityStack = new LinkedList();
173 * Current parser state within the main state machine.
175 private int state = INIT;
178 * The (type of the) current event.
180 private int event;
183 * The element name stack. The first element in this stack will be the
184 * root element.
186 private LinkedList stack = new LinkedList();
189 * Stack of namespace contexts. These are maps specifying prefix-to-URI
190 * mappings. The first element in this stack is the most recent namespace
191 * context (i.e. the other way around from the element name stack).
193 private LinkedList namespaces = new LinkedList();
196 * The base-URI stack. This holds the base URI context for each element.
197 * The first element in this stack is the most recent context (i.e. the
198 * other way around from the element name stack).
200 private LinkedList bases = new LinkedList();
203 * The list of attributes for the current element, in the order defined in
204 * the XML stream.
206 private ArrayList attrs = new ArrayList();
209 * Buffer for text and character data.
211 private StringBuffer buf = new StringBuffer();
214 * Buffer for NMTOKEN strings (markup).
216 private StringBuffer nmtokenBuf = new StringBuffer();
219 * Buffer for string literals. (e.g. attribute values)
221 private StringBuffer literalBuf = new StringBuffer();
224 * Temporary Unicode character buffer used during character data reads.
226 private int[] tmpBuf = new int[1024];
229 * The element content model for the current element.
231 private ContentModel currentContentModel;
234 * The validation stack. This holds lists of the elements seen for each
235 * element, in order to determine whether the names and order of these
236 * elements match the content model for the element. The last entry in
237 * this stack represents the current element.
239 private LinkedList validationStack;
242 * These sets contain the IDs and the IDREFs seen in the document, to
243 * ensure that IDs are unique and that each IDREF refers to an ID in the
244 * document.
246 private HashSet ids, idrefs;
249 * The target and data associated with the current processing instruction
250 * event.
252 private String piTarget, piData;
255 * The XML version declared in the XML declaration.
257 private String xmlVersion;
260 * The encoding declared in the XML declaration.
262 private String xmlEncoding;
265 * The standalone value declared in the XML declaration.
267 private Boolean xmlStandalone;
270 * The document type definition.
272 Doctype doctype;
275 * State variables for determining parameter-entity expansion.
277 private boolean expandPE, peIsError;
280 * Whether this is a validating parser.
282 private final boolean validating;
285 * Whether strings representing markup will be interned.
287 private final boolean stringInterning;
290 * If true, CDATA sections will be merged with adjacent text nodes into a
291 * single event.
293 private final boolean coalescing;
296 * Whether to replace general entity references with their replacement
297 * text automatically during parsing.
298 * Otherwise entity-reference events will be issued.
300 private final boolean replaceERefs;
303 * Whether to support external entities.
305 private final boolean externalEntities;
308 * Whether to support DTDs.
310 private final boolean supportDTD;
313 * Whether to support XML namespaces. If true, namespace information will
314 * be available. Otherwise namespaces will simply be reported as ordinary
315 * attributes.
317 private final boolean namespaceAware;
320 * Whether to support XML Base. If true, URIs specified in xml:base
321 * attributes will be honoured when resolving external entities.
323 private final boolean baseAware;
326 * Whether to report extended event types (START_ENTITY and END_ENTITY)
327 * in addition to the standard event types. Used by the SAX parser.
329 private final boolean extendedEventTypes;
332 * The reporter to receive parsing warnings.
334 final XMLReporter reporter;
337 * Callback interface for resolving external entities.
339 final XMLResolver resolver;
341 // -- Constants for testing the next kind of markup event --
342 private static final String TEST_START_ELEMENT = "<";
343 private static final String TEST_END_ELEMENT = "</";
344 private static final String TEST_COMMENT = "<!--";
345 private static final String TEST_PI = "<?";
346 private static final String TEST_CDATA = "<![CDATA[";
347 private static final String TEST_XML_DECL = "<?xml";
348 private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE";
349 private static final String TEST_ELEMENT_DECL = "<!ELEMENT";
350 private static final String TEST_ATTLIST_DECL = "<!ATTLIST";
351 private static final String TEST_ENTITY_DECL = "<!ENTITY";
352 private static final String TEST_NOTATION_DECL = "<!NOTATION";
353 private static final String TEST_KET = ">";
354 private static final String TEST_END_COMMENT = "--";
355 private static final String TEST_END_PI = "?>";
356 private static final String TEST_END_CDATA = "]]>";
359 * The general entities predefined by the XML specification.
361 private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap();
362 static
364 PREDEFINED_ENTITIES.put("amp", "&");
365 PREDEFINED_ENTITIES.put("lt", "<");
366 PREDEFINED_ENTITIES.put("gt", ">");
367 PREDEFINED_ENTITIES.put("apos", "'");
368 PREDEFINED_ENTITIES.put("quot", "\"");
372 * Creates a new XML parser for the given input stream.
373 * This constructor should be used where possible, as it allows the
374 * encoding of the XML data to be correctly determined from the stream.
375 * @param in the input stream
376 * @param systemId the URL from which the input stream was retrieved
377 * (necessary if there are external entities to be resolved)
378 * @param validating if the parser is to be a validating parser
379 * @param namespaceAware if the parser should support XML Namespaces
380 * @param coalescing if CDATA sections should be merged into adjacent text
381 * nodes
382 * @param replaceERefs if entity references should be automatically
383 * replaced by their replacement text (otherwise they will be reported as
384 * entity-reference events)
385 * @param externalEntities if external entities should be loaded
386 * @param supportDTD if support for the XML DTD should be enabled
387 * @param baseAware if the parser should support XML Base to resolve
388 * external entities
389 * @param stringInterning whether strings will be interned during parsing
390 * @param reporter the reporter to receive warnings during processing
391 * @param resolver the callback interface used to resolve external
392 * entities
394 public XMLParser(InputStream in, String systemId,
395 boolean validating,
396 boolean namespaceAware,
397 boolean coalescing,
398 boolean replaceERefs,
399 boolean externalEntities,
400 boolean supportDTD,
401 boolean baseAware,
402 boolean stringInterning,
403 boolean extendedEventTypes,
404 XMLReporter reporter,
405 XMLResolver resolver)
407 this.validating = validating;
408 this.namespaceAware = namespaceAware;
409 this.coalescing = coalescing;
410 this.replaceERefs = replaceERefs;
411 this.externalEntities = externalEntities;
412 this.supportDTD = supportDTD;
413 this.baseAware = baseAware;
414 this.stringInterning = stringInterning;
415 this.extendedEventTypes = extendedEventTypes;
416 this.reporter = reporter;
417 this.resolver = resolver;
418 if (validating)
420 validationStack = new LinkedList();
421 ids = new HashSet();
422 idrefs = new HashSet();
424 String debug = System.getProperty("gnu.xml.debug.input");
425 if (debug != null)
429 File file = File.createTempFile(debug, ".xml");
430 in = new TeeInputStream(in, new FileOutputStream(file));
432 catch (IOException e)
434 RuntimeException e2 = new RuntimeException();
435 e2.initCause(e);
436 throw e2;
439 systemId = canonicalize(systemId);
440 pushInput(new Input(in, null, null, systemId, null, null, false, true));
444 * Creates a new XML parser for the given character stream.
445 * This constructor is only available for compatibility with the JAXP
446 * APIs, which permit XML to be parsed from a character stream. Because
447 * the encoding specified by the character stream may conflict with that
448 * specified in the XML declaration, this method should be avoided where
449 * possible.
450 * @param in the input stream
451 * @param systemId the URL from which the input stream was retrieved
452 * (necessary if there are external entities to be resolved)
453 * @param validating if the parser is to be a validating parser
454 * @param namespaceAware if the parser should support XML Namespaces
455 * @param coalescing if CDATA sections should be merged into adjacent text
456 * nodes
457 * @param replaceERefs if entity references should be automatically
458 * replaced by their replacement text (otherwise they will be reported as
459 * entity-reference events)
460 * @param externalEntities if external entities should be loaded
461 * @param supportDTD if support for the XML DTD should be enabled
462 * @param baseAware if the parser should support XML Base to resolve
463 * external entities
464 * @param stringInterning whether strings will be interned during parsing
465 * @param reporter the reporter to receive warnings during processing
466 * @param resolver the callback interface used to resolve external
467 * entities
469 public XMLParser(Reader reader, String systemId,
470 boolean validating,
471 boolean namespaceAware,
472 boolean coalescing,
473 boolean replaceERefs,
474 boolean externalEntities,
475 boolean supportDTD,
476 boolean baseAware,
477 boolean stringInterning,
478 boolean extendedEventTypes,
479 XMLReporter reporter,
480 XMLResolver resolver)
482 this.validating = validating;
483 this.namespaceAware = namespaceAware;
484 this.coalescing = coalescing;
485 this.replaceERefs = replaceERefs;
486 this.externalEntities = externalEntities;
487 this.supportDTD = supportDTD;
488 this.baseAware = baseAware;
489 this.stringInterning = stringInterning;
490 this.extendedEventTypes = extendedEventTypes;
491 this.reporter = reporter;
492 this.resolver = resolver;
493 if (validating)
495 validationStack = new LinkedList();
496 ids = new HashSet();
497 idrefs = new HashSet();
499 String debug = System.getProperty("gnu.xml.debug.input");
500 if (debug != null)
504 File file = File.createTempFile(debug, ".xml");
505 reader = new TeeReader(reader, new FileWriter(file));
507 catch (IOException e)
509 RuntimeException e2 = new RuntimeException();
510 e2.initCause(e);
511 throw e2;
514 systemId = canonicalize(systemId);
515 pushInput(new Input(null, reader, null, systemId, null, null, false, true));
518 // -- NamespaceContext --
520 public String getNamespaceURI(String prefix)
522 if (XMLConstants.XML_NS_PREFIX.equals(prefix))
523 return XMLConstants.XML_NS_URI;
524 if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix))
525 return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
526 for (Iterator i = namespaces.iterator(); i.hasNext(); )
528 LinkedHashMap ctx = (LinkedHashMap) i.next();
529 String namespaceURI = (String) ctx.get(prefix);
530 if (namespaceURI != null)
531 return namespaceURI;
533 return null;
536 public String getPrefix(String namespaceURI)
538 if (XMLConstants.XML_NS_URI.equals(namespaceURI))
539 return XMLConstants.XML_NS_PREFIX;
540 if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
541 return XMLConstants.XMLNS_ATTRIBUTE;
542 for (Iterator i = namespaces.iterator(); i.hasNext(); )
544 LinkedHashMap ctx = (LinkedHashMap) i.next();
545 if (ctx.containsValue(namespaceURI))
547 for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
549 Map.Entry entry = (Map.Entry) i.next();
550 String uri = (String) entry.getValue();
551 if (uri.equals(namespaceURI))
552 return (String) entry.getKey();
556 return null;
559 public Iterator getPrefixes(String namespaceURI)
561 if (XMLConstants.XML_NS_URI.equals(namespaceURI))
562 return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator();
563 if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
564 return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator();
565 LinkedList acc = new LinkedList();
566 for (Iterator i = namespaces.iterator(); i.hasNext(); )
568 LinkedHashMap ctx = (LinkedHashMap) i.next();
569 if (ctx.containsValue(namespaceURI))
571 for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
573 Map.Entry entry = (Map.Entry) i.next();
574 String uri = (String) entry.getValue();
575 if (uri.equals(namespaceURI))
576 acc.add(entry.getKey());
580 return acc.iterator();
583 // -- XMLStreamReader --
585 public void close()
586 throws XMLStreamException
588 stack = null;
589 namespaces = null;
590 bases = null;
591 buf = null;
592 attrs = null;
593 doctype = null;
595 inputStack = null;
596 validationStack = null;
597 ids = null;
598 idrefs = null;
601 public NamespaceContext getNamespaceContext()
603 return this;
606 public int getAttributeCount()
608 return attrs.size();
611 public String getAttributeLocalName(int index)
613 Attribute a = (Attribute) attrs.get(index);
614 return a.localName;
617 public String getAttributeNamespace(int index)
619 String prefix = getAttributePrefix(index);
620 return getNamespaceURI(prefix);
623 public String getAttributePrefix(int index)
625 Attribute a = (Attribute) attrs.get(index);
626 return a.prefix;
629 public QName getAttributeName(int index)
631 Attribute a = (Attribute) attrs.get(index);
632 String namespaceURI = getNamespaceURI(a.prefix);
633 return new QName(namespaceURI, a.localName, a.prefix);
636 public String getAttributeType(int index)
638 Attribute a = (Attribute) attrs.get(index);
639 return a.type;
642 private String getAttributeType(String elementName, String attName)
644 if (doctype != null)
646 AttributeDecl att = doctype.getAttributeDecl(elementName, attName);
647 if (att != null)
648 return att.type;
650 return "CDATA";
653 public String getAttributeValue(int index)
655 Attribute a = (Attribute) attrs.get(index);
656 return a.value;
659 public String getAttributeValue(String namespaceURI, String localName)
661 for (Iterator i = attrs.iterator(); i.hasNext(); )
663 Attribute a = (Attribute) i.next();
664 if (a.localName.equals(localName))
666 String uri = getNamespaceURI(a.prefix);
667 if ((uri == null && namespaceURI == null) ||
668 (uri != null && uri.equals(namespaceURI)))
669 return a.value;
672 return null;
675 boolean isAttributeDeclared(int index)
677 if (doctype == null)
678 return false;
679 Attribute a = (Attribute) attrs.get(index);
680 String qn = ("".equals(a.prefix)) ? a.localName :
681 a.prefix + ":" + a.localName;
682 String elementName = buf.toString();
683 return doctype.isAttributeDeclared(elementName, qn);
686 public String getCharacterEncodingScheme()
688 return xmlEncoding;
691 public String getElementText()
692 throws XMLStreamException
694 if (event != XMLStreamConstants.START_ELEMENT)
695 throw new XMLStreamException("current event must be START_ELEMENT");
696 CPStringBuilder elementText = new CPStringBuilder();
697 int depth = stack.size();
698 while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth)
700 switch (next())
702 case XMLStreamConstants.CHARACTERS:
703 case XMLStreamConstants.SPACE:
704 elementText.append(buf.toString());
707 return elementText.toString();
710 public String getEncoding()
712 return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding;
715 public int getEventType()
717 return event;
720 public String getLocalName()
722 switch (event)
724 case XMLStreamConstants.START_ELEMENT:
725 case XMLStreamConstants.END_ELEMENT:
726 String qName = buf.toString();
727 int ci = qName.indexOf(':');
728 String localName = (ci == -1) ? qName : qName.substring(ci + 1);
729 if (stringInterning)
730 localName = localName.intern();
731 return localName;
732 default:
733 return null;
737 public Location getLocation()
739 return input;
742 public QName getName()
744 switch (event)
746 case XMLStreamConstants.START_ELEMENT:
747 case XMLStreamConstants.END_ELEMENT:
748 String qName = buf.toString();
749 int ci = qName.indexOf(':');
750 String localName = (ci == -1) ? qName : qName.substring(ci + 1);
751 if (stringInterning)
752 localName = localName.intern();
753 String prefix = (ci == -1) ?
754 (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
755 qName.substring(0, ci);
756 if (stringInterning && prefix != null)
757 prefix = prefix.intern();
758 String namespaceURI = getNamespaceURI(prefix);
759 return new QName(namespaceURI, localName, prefix);
760 default:
761 return null;
765 public int getNamespaceCount()
767 if (!namespaceAware || namespaces.isEmpty())
768 return 0;
769 switch (event)
771 case XMLStreamConstants.START_ELEMENT:
772 case XMLStreamConstants.END_ELEMENT:
773 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
774 return ctx.size();
775 default:
776 return 0;
780 public String getNamespacePrefix(int index)
782 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
783 int count = 0;
784 for (Iterator i = ctx.keySet().iterator(); i.hasNext(); )
786 String prefix = (String) i.next();
787 if (count++ == index)
788 return prefix;
790 return null;
793 public String getNamespaceURI()
795 switch (event)
797 case XMLStreamConstants.START_ELEMENT:
798 case XMLStreamConstants.END_ELEMENT:
799 String qName = buf.toString();
800 int ci = qName.indexOf(':');
801 if (ci == -1)
802 return null;
803 String prefix = qName.substring(0, ci);
804 return getNamespaceURI(prefix);
805 default:
806 return null;
810 public String getNamespaceURI(int index)
812 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
813 int count = 0;
814 for (Iterator i = ctx.values().iterator(); i.hasNext(); )
816 String uri = (String) i.next();
817 if (count++ == index)
818 return uri;
820 return null;
823 public String getPIData()
825 return piData;
828 public String getPITarget()
830 return piTarget;
833 public String getPrefix()
835 switch (event)
837 case XMLStreamConstants.START_ELEMENT:
838 case XMLStreamConstants.END_ELEMENT:
839 String qName = buf.toString();
840 int ci = qName.indexOf(':');
841 String prefix = (ci == -1) ?
842 (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
843 qName.substring(0, ci);
844 if (stringInterning && prefix != null)
845 prefix = prefix.intern();
846 return prefix;
847 default:
848 return null;
852 public Object getProperty(String name)
853 throws IllegalArgumentException
855 if (name == null)
856 throw new IllegalArgumentException("name is null");
857 if (XMLInputFactory.ALLOCATOR.equals(name))
858 return null;
859 if (XMLInputFactory.IS_COALESCING.equals(name))
860 return coalescing ? Boolean.TRUE : Boolean.FALSE;
861 if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name))
862 return namespaceAware ? Boolean.TRUE : Boolean.FALSE;
863 if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name))
864 return replaceERefs ? Boolean.TRUE : Boolean.FALSE;
865 if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name))
866 return externalEntities ? Boolean.TRUE : Boolean.FALSE;
867 if (XMLInputFactory.IS_VALIDATING.equals(name))
868 return Boolean.FALSE;
869 if (XMLInputFactory.REPORTER.equals(name))
870 return reporter;
871 if (XMLInputFactory.RESOLVER.equals(name))
872 return resolver;
873 if (XMLInputFactory.SUPPORT_DTD.equals(name))
874 return supportDTD ? Boolean.TRUE : Boolean.FALSE;
875 if ("gnu.xml.stream.stringInterning".equals(name))
876 return stringInterning ? Boolean.TRUE : Boolean.FALSE;
877 if ("gnu.xml.stream.xmlBase".equals(name))
878 return baseAware ? Boolean.TRUE : Boolean.FALSE;
879 if ("gnu.xml.stream.baseURI".equals(name))
880 return getXMLBase();
881 return null;
884 public String getText()
886 return buf.toString();
889 public char[] getTextCharacters()
891 return buf.toString().toCharArray();
894 public int getTextCharacters(int sourceStart, char[] target,
895 int targetStart, int length)
896 throws XMLStreamException
898 length = Math.min(sourceStart + buf.length(), length);
899 int sourceEnd = sourceStart + length;
900 buf.getChars(sourceStart, sourceEnd, target, targetStart);
901 return length;
904 public int getTextLength()
906 return buf.length();
909 public int getTextStart()
911 return 0;
914 public String getVersion()
916 return (xmlVersion == null) ? "1.0" : xmlVersion;
919 public boolean hasName()
921 switch (event)
923 case XMLStreamConstants.START_ELEMENT:
924 case XMLStreamConstants.END_ELEMENT:
925 return true;
926 default:
927 return false;
931 public boolean hasText()
933 switch (event)
935 case XMLStreamConstants.CHARACTERS:
936 case XMLStreamConstants.SPACE:
937 return true;
938 default:
939 return false;
943 public boolean isAttributeSpecified(int index)
945 Attribute a = (Attribute) attrs.get(index);
946 return a.specified;
949 public boolean isCharacters()
951 return (event == XMLStreamConstants.CHARACTERS);
954 public boolean isEndElement()
956 return (event == XMLStreamConstants.END_ELEMENT);
959 public boolean isStandalone()
961 return Boolean.TRUE.equals(xmlStandalone);
964 public boolean isStartElement()
966 return (event == XMLStreamConstants.START_ELEMENT);
969 public boolean isWhiteSpace()
971 return (event == XMLStreamConstants.SPACE);
974 public int nextTag()
975 throws XMLStreamException
979 switch (next())
981 case XMLStreamConstants.START_ELEMENT:
982 case XMLStreamConstants.END_ELEMENT:
983 case XMLStreamConstants.CHARACTERS:
984 case XMLStreamConstants.SPACE:
985 case XMLStreamConstants.COMMENT:
986 case XMLStreamConstants.PROCESSING_INSTRUCTION:
987 break;
988 default:
989 throw new XMLStreamException("Unexpected event type: " + event);
992 while (event != XMLStreamConstants.START_ELEMENT &&
993 event != XMLStreamConstants.END_ELEMENT);
994 return event;
997 public void require(int type, String namespaceURI, String localName)
998 throws XMLStreamException
1000 if (event != type)
1001 throw new XMLStreamException("Current event type is " + event);
1002 if (event == XMLStreamConstants.START_ELEMENT ||
1003 event == XMLStreamConstants.END_ELEMENT)
1005 String ln = getLocalName();
1006 if (!ln.equals(localName))
1007 throw new XMLStreamException("Current local-name is " + ln);
1008 String uri = getNamespaceURI();
1009 if ((uri == null && namespaceURI != null) ||
1010 (uri != null && !uri.equals(namespaceURI)))
1011 throw new XMLStreamException("Current namespace URI is " + uri);
1015 public boolean standaloneSet()
1017 return (xmlStandalone != null);
1020 public boolean hasNext()
1021 throws XMLStreamException
1023 return (event != XMLStreamConstants.END_DOCUMENT && event != -1);
1026 public int next()
1027 throws XMLStreamException
1029 if (event == XMLStreamConstants.END_ELEMENT)
1031 // Pop namespace context
1032 if (namespaceAware && !namespaces.isEmpty())
1033 namespaces.removeFirst();
1034 // Pop base context
1035 if (baseAware && !bases.isEmpty())
1036 bases.removeFirst();
1038 if (!startEntityStack.isEmpty())
1040 String entityName = (String) startEntityStack.removeFirst();
1041 buf.setLength(0);
1042 buf.append(entityName);
1043 event = START_ENTITY;
1044 return extendedEventTypes ? event : next();
1046 else if (!endEntityStack.isEmpty())
1048 String entityName = (String) endEntityStack.removeFirst();
1049 buf.setLength(0);
1050 buf.append(entityName);
1051 event = END_ENTITY;
1052 return extendedEventTypes ? event : next();
1056 if (!input.initialized)
1057 input.init();
1058 switch (state)
1060 case CONTENT:
1061 if (tryRead(TEST_END_ELEMENT))
1063 readEndElement();
1064 if (stack.isEmpty())
1065 state = MISC;
1066 event = XMLStreamConstants.END_ELEMENT;
1068 else if (tryRead(TEST_COMMENT))
1070 readComment(false);
1071 event = XMLStreamConstants.COMMENT;
1073 else if (tryRead(TEST_PI))
1075 readPI(false);
1076 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1078 else if (tryRead(TEST_CDATA))
1080 readCDSect();
1081 event = XMLStreamConstants.CDATA;
1083 else if (tryRead(TEST_START_ELEMENT))
1085 state = readStartElement();
1086 event = XMLStreamConstants.START_ELEMENT;
1088 else
1090 // Check for character reference or predefined entity
1091 mark(8);
1092 int c = readCh();
1093 if (c == 0x26) // '&'
1095 c = readCh();
1096 if (c == 0x23) // '#'
1098 reset();
1099 event = readCharData(null);
1101 else
1103 // entity reference
1104 reset();
1105 readCh(); // &
1106 readReference();
1107 String ref = buf.toString();
1108 String text = (String) PREDEFINED_ENTITIES.get(ref);
1109 if (text != null)
1111 event = readCharData(text);
1113 else if (replaceERefs && !isUnparsedEntity(ref))
1115 // this will report a start-entity event
1116 boolean external = false;
1117 if (doctype != null)
1119 Object entity = doctype.getEntity(ref);
1120 if (entity instanceof ExternalIds)
1121 external = true;
1123 expandEntity(ref, false, external);
1124 event = next();
1126 else
1128 event = XMLStreamConstants.ENTITY_REFERENCE;
1132 else
1134 reset();
1135 event = readCharData(null);
1136 if (validating && doctype != null)
1137 validatePCData(buf.toString());
1140 break;
1141 case EMPTY_ELEMENT:
1142 String elementName = (String) stack.removeLast();
1143 buf.setLength(0);
1144 buf.append(elementName);
1145 state = stack.isEmpty() ? MISC : CONTENT;
1146 event = XMLStreamConstants.END_ELEMENT;
1147 if (validating && doctype != null)
1148 endElementValidationHook();
1149 break;
1150 case INIT: // XMLDecl?
1151 if (tryRead(TEST_XML_DECL))
1152 readXMLDecl();
1153 input.finalizeEncoding();
1154 event = XMLStreamConstants.START_DOCUMENT;
1155 state = PROLOG;
1156 break;
1157 case PROLOG: // Misc* (doctypedecl Misc*)?
1158 skipWhitespace();
1159 if (doctype == null && tryRead(TEST_DOCTYPE_DECL))
1161 readDoctypeDecl();
1162 event = XMLStreamConstants.DTD;
1164 else if (tryRead(TEST_COMMENT))
1166 readComment(false);
1167 event = XMLStreamConstants.COMMENT;
1169 else if (tryRead(TEST_PI))
1171 readPI(false);
1172 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1174 else if (tryRead(TEST_START_ELEMENT))
1176 state = readStartElement();
1177 event = XMLStreamConstants.START_ELEMENT;
1179 else
1181 int c = readCh();
1182 error("no root element: U+" + Integer.toHexString(c));
1184 break;
1185 case MISC: // Comment | PI | S
1186 skipWhitespace();
1187 if (tryRead(TEST_COMMENT))
1189 readComment(false);
1190 event = XMLStreamConstants.COMMENT;
1192 else if (tryRead(TEST_PI))
1194 readPI(false);
1195 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1197 else
1199 if (event == XMLStreamConstants.END_DOCUMENT)
1200 throw new NoSuchElementException();
1201 int c = readCh();
1202 if (c != -1)
1203 error("Only comments and PIs may appear after " +
1204 "the root element");
1205 event = XMLStreamConstants.END_DOCUMENT;
1207 break;
1208 default:
1209 event = -1;
1211 return event;
1213 catch (IOException e)
1215 XMLStreamException e2 = new XMLStreamException();
1216 e2.initCause(e);
1217 throw e2;
1221 // package private
1224 * Returns the current element name.
1226 String getCurrentElement()
1228 return (String) stack.getLast();
1231 // private
1233 private void mark(int limit)
1234 throws IOException
1236 input.mark(limit);
1239 private void reset()
1240 throws IOException
1242 input.reset();
1245 private int read()
1246 throws IOException
1248 return input.read();
1251 private int read(int[] b, int off, int len)
1252 throws IOException
1254 return input.read(b, off, len);
1258 * Parsed character read.
1260 private int readCh()
1261 throws IOException, XMLStreamException
1263 int c = read();
1264 if (expandPE && c == 0x25) // '%'
1266 if (peIsError)
1267 error("PE reference within decl in internal subset.");
1268 expandPEReference();
1269 return readCh();
1271 return c;
1275 * Reads the next character, ensuring it is the character specified.
1276 * @param delim the character to match
1277 * @exception XMLStreamException if the next character is not the
1278 * specified one
1280 private void require(char delim)
1281 throws IOException, XMLStreamException
1283 mark(1);
1284 int c = readCh();
1285 if (delim != c)
1287 reset();
1288 error("required character (got U+" + Integer.toHexString(c) + ")",
1289 new Character(delim));
1294 * Reads the next few characters, ensuring they match the string specified.
1295 * @param delim the string to match
1296 * @exception XMLStreamException if the next characters do not match the
1297 * specified string
1299 private void require(String delim)
1300 throws IOException, XMLStreamException
1302 char[] chars = delim.toCharArray();
1303 int len = chars.length;
1304 mark(len);
1305 int off = 0;
1308 int l2 = read(tmpBuf, off, len - off);
1309 if (l2 == -1)
1311 reset();
1312 error("EOF before required string", delim);
1314 off += l2;
1316 while (off < len);
1317 for (int i = 0; i < chars.length; i++)
1319 if (chars[i] != tmpBuf[i])
1321 reset();
1322 error("required string", delim);
1328 * Try to read a single character. On failure, reset the stream.
1329 * @param delim the character to test
1330 * @return true if the character matched delim, false otherwise.
1332 private boolean tryRead(char delim)
1333 throws IOException, XMLStreamException
1335 mark(1);
1336 int c = readCh();
1337 if (delim != c)
1339 reset();
1340 return false;
1342 return true;
1346 * Tries to read the specified characters.
1347 * If successful, the stream is positioned after the last character,
1348 * otherwise it is reset.
1349 * @param test the string to test
1350 * @return true if the characters matched the test string, false otherwise.
1352 private boolean tryRead(String test)
1353 throws IOException
1355 char[] chars = test.toCharArray();
1356 int len = chars.length;
1357 mark(len);
1358 int count = 0;
1359 int l2 = read(tmpBuf, 0, len);
1360 if (l2 == -1)
1362 reset();
1363 return false;
1365 count += l2;
1366 // check the characters we received first before doing additional reads
1367 for (int i = 0; i < count; i++)
1369 if (chars[i] != tmpBuf[i])
1371 reset();
1372 return false;
1375 while (count < len)
1377 // force read
1378 int c = read();
1379 if (c == -1)
1381 reset();
1382 return false;
1384 tmpBuf[count] = (char) c;
1385 // check each character as it is read
1386 if (chars[count] != tmpBuf[count])
1388 reset();
1389 return false;
1391 count++;
1393 return true;
1397 * Reads characters until the specified test string is encountered.
1398 * @param delim the string delimiting the end of the characters
1400 private void readUntil(String delim)
1401 throws IOException, XMLStreamException
1403 int startLine = input.line;
1406 while (!tryRead(delim))
1408 int c = readCh();
1409 if (c == -1)
1410 throw new EOFException();
1411 else if (input.xml11)
1413 if (!isXML11Char(c) || isXML11RestrictedChar(c))
1414 error("illegal XML 1.1 character",
1415 "U+" + Integer.toHexString(c));
1417 else if (!isChar(c))
1418 error("illegal XML character",
1419 "U+" + Integer.toHexString(c));
1420 buf.append(Character.toChars(c));
1423 catch (EOFException e)
1425 error("end of input while looking for delimiter "+
1426 "(started on line " + startLine + ')', delim);
1431 * Reads any whitespace characters.
1432 * @return true if whitespace characters were read, false otherwise
1434 private boolean tryWhitespace()
1435 throws IOException, XMLStreamException
1437 boolean white;
1438 boolean ret = false;
1441 mark(1);
1442 int c = readCh();
1443 while (c == -1 && inputStack.size() > 1)
1445 popInput();
1446 c = readCh();
1448 white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1449 if (white)
1450 ret = true;
1452 while (white);
1453 reset();
1454 return ret;
1458 * Skip over any whitespace characters.
1460 private void skipWhitespace()
1461 throws IOException, XMLStreamException
1463 boolean white;
1466 mark(1);
1467 int c = readCh();
1468 while (c == -1 && inputStack.size() > 1)
1470 popInput();
1471 c = readCh();
1473 white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1475 while (white);
1476 reset();
1480 * Try to read as many whitespace characters as are available.
1481 * @exception XMLStreamException if no whitespace characters were seen
1483 private void requireWhitespace()
1484 throws IOException, XMLStreamException
1486 if (!tryWhitespace())
1487 error("whitespace required");
1491 * Returns the current base URI for resolving external entities.
1493 String getXMLBase()
1495 if (baseAware)
1497 for (Iterator i = bases.iterator(); i.hasNext(); )
1499 String base = (String) i.next();
1500 if (base != null)
1501 return base;
1504 return input.systemId;
1508 * Push the specified text input source.
1510 private void pushInput(String name, String text, boolean report,
1511 boolean normalize)
1512 throws IOException, XMLStreamException
1514 // Check for recursion
1515 if (name != null && !"".equals(name))
1517 for (Iterator i = inputStack.iterator(); i.hasNext(); )
1519 Input ctx = (Input) i.next();
1520 if (name.equals(ctx.name))
1521 error("entities may not be self-recursive", name);
1524 else
1525 report = false;
1526 pushInput(new Input(null, new StringReader(text), input.publicId,
1527 input.systemId, name, input.inputEncoding, report,
1528 normalize));
1532 * Push the specified external input source.
1534 private void pushInput(String name, ExternalIds ids, boolean report,
1535 boolean normalize)
1536 throws IOException, XMLStreamException
1538 if (!externalEntities)
1539 return;
1540 String url = canonicalize(absolutize(input.systemId, ids.systemId));
1541 // Check for recursion
1542 for (Iterator i = inputStack.iterator(); i.hasNext(); )
1544 Input ctx = (Input) i.next();
1545 if (url.equals(ctx.systemId))
1546 error("entities may not be self-recursive", url);
1547 if (name != null && !"".equals(name) && name.equals(ctx.name))
1548 error("entities may not be self-recursive", name);
1550 if (name == null || "".equals(name))
1551 report = false;
1552 InputStream in = null;
1553 if (resolver != null)
1555 Object obj = resolver.resolveEntity(ids.publicId, url, getXMLBase(),
1556 null);
1557 if (obj instanceof InputStream)
1558 in = (InputStream) obj;
1560 if (in == null)
1561 in = resolve(url);
1562 if (in == null)
1563 error("unable to resolve external entity",
1564 (ids.systemId != null) ? ids.systemId : ids.publicId);
1565 pushInput(new Input(in, null, ids.publicId, url, name, null, report,
1566 normalize));
1567 input.init();
1568 if (tryRead(TEST_XML_DECL))
1569 readTextDecl();
1570 input.finalizeEncoding();
1574 * Push the specified input source (general entity) onto the input stack.
1576 private void pushInput(Input input)
1578 if (input.report)
1579 startEntityStack.addFirst(input.name);
1580 inputStack.addLast(input);
1581 if (this.input != null)
1582 input.xml11 = this.input.xml11;
1583 this.input = input;
1587 * Returns a canonicalized version of the specified URL.
1588 * This is largely to work around a problem with the specification of
1589 * file URLs.
1591 static String canonicalize(String url)
1593 if (url == null)
1594 return null;
1595 if (url.startsWith("file:") && !url.startsWith("file://"))
1596 url = "file://" + url.substring(5);
1597 return url;
1601 * "Absolutize" a URL. This resolves a relative URL into an absolute one.
1602 * @param base the current base URL
1603 * @param href the (absolute or relative) URL to resolve
1605 public static String absolutize(String base, String href)
1607 if (href == null)
1608 return null;
1609 int ci = href.indexOf(':');
1610 if (ci > 1 && isURLScheme(href.substring(0, ci)))
1612 // href is absolute already
1613 return href;
1615 if (base == null)
1616 base = "";
1617 else
1619 int i = base.lastIndexOf('/');
1620 if (i != -1)
1621 base = base.substring(0, i + 1);
1622 else
1623 base = "";
1625 if ("".equals(base))
1627 // assume file URL relative to current directory
1628 base = System.getProperty("user.dir");
1629 if (base.charAt(0) == '/')
1630 base = base.substring(1);
1631 base = "file:///" + base.replace(File.separatorChar, '/');
1632 if (!base.endsWith("/"))
1633 base += "/";
1635 // We can't use java.net.URL here to do the parsing, as it searches for
1636 // a protocol handler. A protocol handler may not be registered for the
1637 // URL scheme here. Do it manually.
1639 // Set aside scheme and host portion of base URL
1640 String basePrefix = null;
1641 ci = base.indexOf(':');
1642 if (ci > 1 && isURLScheme(base.substring(0, ci)))
1644 if (base.length() > (ci + 3) &&
1645 base.charAt(ci + 1) == '/' &&
1646 base.charAt(ci + 2) == '/')
1648 int si = base.indexOf('/', ci + 3);
1649 if (si == -1)
1650 base = null;
1651 else
1653 basePrefix = base.substring(0, si);
1654 base = base.substring(si);
1657 else
1658 base = null;
1660 if (base == null) // unknown or malformed base URL, use href
1661 return href;
1662 if (href.startsWith("/")) // absolute href pathname
1663 return (basePrefix == null) ? href : basePrefix + href;
1664 // relative href pathname
1665 if (!base.endsWith("/"))
1667 int lsi = base.lastIndexOf('/');
1668 if (lsi == -1)
1669 base = "/";
1670 else
1671 base = base.substring(0, lsi + 1);
1673 while (href.startsWith("../") || href.startsWith("./"))
1675 if (href.startsWith("../"))
1677 // strip last path component from base
1678 int lsi = base.lastIndexOf('/', base.length() - 2);
1679 if (lsi > -1)
1680 base = base.substring(0, lsi + 1);
1681 href = href.substring(3); // strip ../ prefix
1683 else
1685 href = href.substring(2); // strip ./ prefix
1688 return (basePrefix == null) ? base + href : basePrefix + base + href;
1692 * Indicates whether the specified characters match the scheme portion of
1693 * a URL.
1694 * @see RFC 1738 section 2.1
1696 private static boolean isURLScheme(String text)
1698 int len = text.length();
1699 for (int i = 0; i < len; i++)
1701 char c = text.charAt(i);
1702 if (c == '+' || c == '.' || c == '-')
1703 continue;
1704 if (c < 65 || (c > 90 && c < 97) || c > 122)
1705 return false;
1707 return true;
1711 * Returns an input stream for the given URL.
1713 static InputStream resolve(String url)
1714 throws IOException
1718 return new URL(url).openStream();
1720 catch (MalformedURLException e)
1722 return null;
1724 catch (IOException e)
1726 IOException e2 = new IOException("error resolving " + url);
1727 e2.initCause(e);
1728 throw e2;
1733 * Pops the current input source (general entity) off the stack.
1735 private void popInput()
1737 Input old = (Input) inputStack.removeLast();
1738 if (old.report)
1739 endEntityStack.addFirst(old.name);
1740 input = (Input) inputStack.getLast();
1744 * Parse an entity text declaration.
1746 private void readTextDecl()
1747 throws IOException, XMLStreamException
1749 final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1750 requireWhitespace();
1751 if (tryRead("version"))
1753 readEq();
1754 String v = readLiteral(flags, false);
1755 if ("1.0".equals(v))
1756 input.xml11 = false;
1757 else if ("1.1".equals(v))
1759 Input i1 = (Input) inputStack.getFirst();
1760 if (!i1.xml11)
1761 error("external entity specifies later version number");
1762 input.xml11 = true;
1764 else
1765 throw new XMLStreamException("illegal XML version: " + v);
1766 requireWhitespace();
1768 require("encoding");
1769 readEq();
1770 String enc = readLiteral(flags, false);
1771 skipWhitespace();
1772 require("?>");
1773 input.setInputEncoding(enc);
1777 * Parse the XML declaration.
1779 private void readXMLDecl()
1780 throws IOException, XMLStreamException
1782 final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1784 requireWhitespace();
1785 require("version");
1786 readEq();
1787 xmlVersion = readLiteral(flags, false);
1788 if ("1.0".equals(xmlVersion))
1789 input.xml11 = false;
1790 else if ("1.1".equals(xmlVersion))
1791 input.xml11 = true;
1792 else
1793 throw new XMLStreamException("illegal XML version: " + xmlVersion);
1795 boolean white = tryWhitespace();
1797 if (tryRead("encoding"))
1799 if (!white)
1800 error("whitespace required before 'encoding='");
1801 readEq();
1802 xmlEncoding = readLiteral(flags, false);
1803 white = tryWhitespace();
1806 if (tryRead("standalone"))
1808 if (!white)
1809 error("whitespace required before 'standalone='");
1810 readEq();
1811 String standalone = readLiteral(flags, false);
1812 if ("yes".equals(standalone))
1813 xmlStandalone = Boolean.TRUE;
1814 else if ("no".equals(standalone))
1815 xmlStandalone = Boolean.FALSE;
1816 else
1817 error("standalone flag must be 'yes' or 'no'", standalone);
1820 skipWhitespace();
1821 require("?>");
1822 if (xmlEncoding != null)
1823 input.setInputEncoding(xmlEncoding);
1827 * Parse the DOCTYPE declaration.
1829 private void readDoctypeDecl()
1830 throws IOException, XMLStreamException
1832 if (!supportDTD)
1833 error("parser was configured not to support DTDs");
1834 requireWhitespace();
1835 String rootName = readNmtoken(true);
1836 skipWhitespace();
1837 ExternalIds ids = readExternalIds(false, true);
1838 doctype =
1839 this.new Doctype(rootName, ids.publicId, ids.systemId);
1841 // Parse internal subset first
1842 skipWhitespace();
1843 if (tryRead('['))
1845 while (true)
1847 expandPE = true;
1848 skipWhitespace();
1849 expandPE = false;
1850 if (tryRead(']'))
1851 break;
1852 else
1853 readMarkupdecl(false);
1856 skipWhitespace();
1857 require('>');
1859 // Parse external subset
1860 if (ids.systemId != null && externalEntities)
1862 pushInput("", ">", false, false);
1863 pushInput("[dtd]", ids, true, true);
1864 // loop until we get back to ">"
1865 while (true)
1867 expandPE = true;
1868 skipWhitespace();
1869 expandPE = false;
1870 mark(1);
1871 int c = readCh();
1872 if (c == 0x3e) // '>'
1873 break;
1874 else if (c == -1)
1875 popInput();
1876 else
1878 reset();
1879 expandPE = true;
1880 readMarkupdecl(true);
1881 expandPE = true;
1884 if (inputStack.size() != 2)
1885 error("external subset has unmatched '>'");
1886 popInput();
1888 checkDoctype();
1889 if (validating)
1890 validateDoctype();
1892 // Make rootName available for reading
1893 buf.setLength(0);
1894 buf.append(rootName);
1898 * Checks the well-formedness of the DTD.
1900 private void checkDoctype()
1901 throws XMLStreamException
1903 // TODO check entity recursion
1907 * Parse the markupdecl production.
1909 private void readMarkupdecl(boolean inExternalSubset)
1910 throws IOException, XMLStreamException
1912 boolean saved = expandPE;
1913 mark(1);
1914 require('<');
1915 reset();
1916 expandPE = false;
1917 if (tryRead(TEST_ELEMENT_DECL))
1919 expandPE = saved;
1920 readElementDecl();
1922 else if (tryRead(TEST_ATTLIST_DECL))
1924 expandPE = saved;
1925 readAttlistDecl();
1927 else if (tryRead(TEST_ENTITY_DECL))
1929 expandPE = saved;
1930 readEntityDecl(inExternalSubset);
1932 else if (tryRead(TEST_NOTATION_DECL))
1934 expandPE = saved;
1935 readNotationDecl(inExternalSubset);
1937 else if (tryRead(TEST_PI))
1939 readPI(true);
1940 expandPE = saved;
1942 else if (tryRead(TEST_COMMENT))
1944 readComment(true);
1945 expandPE = saved;
1947 else if (tryRead("<!["))
1949 // conditional section
1950 expandPE = saved;
1951 if (inputStack.size() < 2)
1952 error("conditional sections illegal in internal subset");
1953 skipWhitespace();
1954 if (tryRead("INCLUDE"))
1956 skipWhitespace();
1957 require('[');
1958 skipWhitespace();
1959 while (!tryRead("]]>"))
1961 readMarkupdecl(inExternalSubset);
1962 skipWhitespace();
1965 else if (tryRead("IGNORE"))
1967 skipWhitespace();
1968 require('[');
1969 expandPE = false;
1970 for (int nesting = 1; nesting > 0; )
1972 int c = readCh();
1973 switch (c)
1975 case 0x3c: // '<'
1976 if (tryRead("!["))
1977 nesting++;
1978 break;
1979 case 0x5d: // ']'
1980 if (tryRead("]>"))
1981 nesting--;
1982 break;
1983 case -1:
1984 throw new EOFException();
1987 expandPE = saved;
1989 else
1990 error("conditional section must begin with INCLUDE or IGNORE");
1992 else
1993 error("expected markup declaration");
1997 * Parse the elementdecl production.
1999 private void readElementDecl()
2000 throws IOException, XMLStreamException
2002 requireWhitespace();
2003 boolean saved = expandPE;
2004 expandPE = (inputStack.size() > 1);
2005 String name = readNmtoken(true);
2006 expandPE = saved;
2007 requireWhitespace();
2008 readContentspec(name);
2009 skipWhitespace();
2010 require('>');
2014 * Parse the contentspec production.
2016 private void readContentspec(String elementName)
2017 throws IOException, XMLStreamException
2019 if (tryRead("EMPTY"))
2020 doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel());
2021 else if (tryRead("ANY"))
2022 doctype.addElementDecl(elementName, "ANY", new AnyContentModel());
2023 else
2025 ContentModel model;
2026 CPStringBuilder acc = new CPStringBuilder();
2027 require('(');
2028 acc.append('(');
2029 skipWhitespace();
2030 if (tryRead("#PCDATA"))
2032 // mixed content
2033 acc.append("#PCDATA");
2034 MixedContentModel mm = new MixedContentModel();
2035 model = mm;
2036 skipWhitespace();
2037 if (tryRead(')'))
2039 acc.append(")");
2040 if (tryRead('*'))
2042 mm.min = 0;
2043 mm.max = -1;
2046 else
2048 while (!tryRead(")"))
2050 require('|');
2051 acc.append('|');
2052 skipWhitespace();
2053 String name = readNmtoken(true);
2054 acc.append(name);
2055 mm.addName(name);
2056 skipWhitespace();
2058 require('*');
2059 acc.append(")*");
2060 mm.min = 0;
2061 mm.max = -1;
2064 else
2065 model = readElements(acc);
2066 doctype.addElementDecl(elementName, acc.toString(), model);
2071 * Parses an element content model.
2073 private ElementContentModel readElements(CPStringBuilder acc)
2074 throws IOException, XMLStreamException
2076 int separator;
2077 ElementContentModel model = new ElementContentModel();
2079 // Parse first content particle
2080 skipWhitespace();
2081 model.addContentParticle(readContentParticle(acc));
2082 // End or separator
2083 skipWhitespace();
2084 int c = readCh();
2085 switch (c)
2087 case 0x29: // ')'
2088 acc.append(')');
2089 mark(1);
2090 c = readCh();
2091 switch (c)
2093 case 0x3f: // '?'
2094 acc.append('?');
2095 model.min = 0;
2096 model.max = 1;
2097 break;
2098 case 0x2a: // '*'
2099 acc.append('*');
2100 model.min = 0;
2101 model.max = -1;
2102 break;
2103 case 0x2b: // '+'
2104 acc.append('+');
2105 model.min = 1;
2106 model.max = -1;
2107 break;
2108 default:
2109 reset();
2111 return model; // done
2112 case 0x7c: // '|'
2113 model.or = true;
2114 // fall through
2115 case 0x2c: // ','
2116 separator = c;
2117 acc.append(Character.toChars(c));
2118 break;
2119 default:
2120 error("bad separator in content model",
2121 "U+" + Integer.toHexString(c));
2122 return model;
2124 // Parse subsequent content particles
2125 while (true)
2127 skipWhitespace();
2128 model.addContentParticle(readContentParticle(acc));
2129 skipWhitespace();
2130 c = readCh();
2131 if (c == 0x29) // ')'
2133 acc.append(')');
2134 break;
2136 else if (c != separator)
2138 error("bad separator in content model",
2139 "U+" + Integer.toHexString(c));
2140 return model;
2142 else
2143 acc.append(c);
2145 // Check for occurrence indicator
2146 mark(1);
2147 c = readCh();
2148 switch (c)
2150 case 0x3f: // '?'
2151 acc.append('?');
2152 model.min = 0;
2153 model.max = 1;
2154 break;
2155 case 0x2a: // '*'
2156 acc.append('*');
2157 model.min = 0;
2158 model.max = -1;
2159 break;
2160 case 0x2b: // '+'
2161 acc.append('+');
2162 model.min = 1;
2163 model.max = -1;
2164 break;
2165 default:
2166 reset();
2168 return model;
2172 * Parse a cp production.
2174 private ContentParticle readContentParticle(CPStringBuilder acc)
2175 throws IOException, XMLStreamException
2177 ContentParticle cp = new ContentParticle();
2178 if (tryRead('('))
2180 acc.append('(');
2181 cp.content = readElements(acc);
2183 else
2185 String name = readNmtoken(true);
2186 acc.append(name);
2187 cp.content = name;
2188 mark(1);
2189 int c = readCh();
2190 switch (c)
2192 case 0x3f: // '?'
2193 acc.append('?');
2194 cp.min = 0;
2195 cp.max = 1;
2196 break;
2197 case 0x2a: // '*'
2198 acc.append('*');
2199 cp.min = 0;
2200 cp.max = -1;
2201 break;
2202 case 0x2b: // '+'
2203 acc.append('+');
2204 cp.min = 1;
2205 cp.max = -1;
2206 break;
2207 default:
2208 reset();
2211 return cp;
2215 * Parse an attribute-list definition.
2217 private void readAttlistDecl()
2218 throws IOException, XMLStreamException
2220 requireWhitespace();
2221 boolean saved = expandPE;
2222 expandPE = (inputStack.size() > 1);
2223 String elementName = readNmtoken(true);
2224 expandPE = saved;
2225 boolean white = tryWhitespace();
2226 while (!tryRead('>'))
2228 if (!white)
2229 error("whitespace required before attribute definition");
2230 readAttDef(elementName);
2231 white = tryWhitespace();
2236 * Parse a single attribute definition.
2238 private void readAttDef(String elementName)
2239 throws IOException, XMLStreamException
2241 String name = readNmtoken(true);
2242 requireWhitespace();
2243 CPStringBuilder acc = new CPStringBuilder();
2244 HashSet values = new HashSet();
2245 String type = readAttType(acc, values);
2246 if (validating)
2248 if ("ID".equals(type))
2250 // VC: One ID per Element Type
2251 for (Iterator i = doctype.attlistIterator(elementName);
2252 i.hasNext(); )
2254 Map.Entry entry = (Map.Entry) i.next();
2255 AttributeDecl decl = (AttributeDecl) entry.getValue();
2256 if ("ID".equals(decl.type))
2257 error("element types must not have more than one ID " +
2258 "attribute");
2261 else if ("NOTATION".equals(type))
2263 // VC: One Notation Per Element Type
2264 for (Iterator i = doctype.attlistIterator(elementName);
2265 i.hasNext(); )
2267 Map.Entry entry = (Map.Entry) i.next();
2268 AttributeDecl decl = (AttributeDecl) entry.getValue();
2269 if ("NOTATION".equals(decl.type))
2270 error("element types must not have more than one NOTATION " +
2271 "attribute");
2273 // VC: No Notation on Empty Element
2274 ContentModel model = doctype.getElementModel(elementName);
2275 if (model != null && model.type == ContentModel.EMPTY)
2276 error("attributes of type NOTATION must not be declared on an " +
2277 "element declared EMPTY");
2280 String enumer = null;
2281 if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
2282 enumer = acc.toString();
2283 else
2284 values = null;
2285 requireWhitespace();
2286 readDefault(elementName, name, type, enumer, values);
2290 * Parse an attribute type.
2292 private String readAttType(CPStringBuilder acc, HashSet values)
2293 throws IOException, XMLStreamException
2295 if (tryRead('('))
2297 readEnumeration(false, acc, values);
2298 return "ENUMERATION";
2300 else
2302 String typeString = readNmtoken(true);
2303 if ("NOTATION".equals(typeString))
2305 readNotationType(acc, values);
2306 return typeString;
2308 else if ("CDATA".equals(typeString) ||
2309 "ID".equals(typeString) ||
2310 "IDREF".equals(typeString) ||
2311 "IDREFS".equals(typeString) ||
2312 "ENTITY".equals(typeString) ||
2313 "ENTITIES".equals(typeString) ||
2314 "NMTOKEN".equals(typeString) ||
2315 "NMTOKENS".equals(typeString))
2316 return typeString;
2317 else
2319 error("illegal attribute type", typeString);
2320 return null;
2326 * Parse an enumeration.
2328 private void readEnumeration(boolean isNames, CPStringBuilder acc,
2329 HashSet values)
2330 throws IOException, XMLStreamException
2332 acc.append('(');
2333 // first token
2334 skipWhitespace();
2335 String token = readNmtoken(isNames);
2336 acc.append(token);
2337 values.add(token);
2338 // subsequent tokens
2339 skipWhitespace();
2340 while (!tryRead(')'))
2342 require('|');
2343 acc.append('|');
2344 skipWhitespace();
2345 token = readNmtoken(isNames);
2346 // VC: No Duplicate Tokens
2347 if (validating && values.contains(token))
2348 error("duplicate token", token);
2349 acc.append(token);
2350 values.add(token);
2351 skipWhitespace();
2353 acc.append(')');
2357 * Parse a notation type for an attribute.
2359 private void readNotationType(CPStringBuilder acc, HashSet values)
2360 throws IOException, XMLStreamException
2362 requireWhitespace();
2363 require('(');
2364 readEnumeration(true, acc, values);
2368 * Parse the default value for an attribute.
2370 private void readDefault(String elementName, String name,
2371 String type, String enumeration, HashSet values)
2372 throws IOException, XMLStreamException
2374 int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
2375 int flags = LIT_ATTRIBUTE;
2376 String value = null, defaultType = null;
2377 boolean saved = expandPE;
2379 if (!"CDATA".equals(type))
2380 flags |= LIT_NORMALIZE;
2382 expandPE = false;
2383 if (tryRead('#'))
2385 if (tryRead("FIXED"))
2387 defaultType = "#FIXED";
2388 valueType = ATTRIBUTE_DEFAULT_FIXED;
2389 requireWhitespace();
2390 value = readLiteral(flags, false);
2392 else if (tryRead("REQUIRED"))
2394 defaultType = "#REQUIRED";
2395 valueType = ATTRIBUTE_DEFAULT_REQUIRED;
2397 else if (tryRead("IMPLIED"))
2399 defaultType = "#IMPLIED";
2400 valueType = ATTRIBUTE_DEFAULT_IMPLIED;
2402 else
2403 error("illegal keyword for attribute default value");
2405 else
2406 value = readLiteral(flags, false);
2407 expandPE = saved;
2408 if (validating)
2410 if ("ID".equals(type))
2412 // VC: Attribute Default Value Syntactically Correct
2413 if (value != null && !isNmtoken(value, true))
2414 error("default value must match Name production", value);
2415 // VC: ID Attribute Default
2416 if (valueType != ATTRIBUTE_DEFAULT_REQUIRED &&
2417 valueType != ATTRIBUTE_DEFAULT_IMPLIED)
2418 error("ID attributes must have a declared default of " +
2419 "#IMPLIED or #REQUIRED");
2421 else if (value != null)
2423 // VC: Attribute Default Value Syntactically Correct
2424 if ("IDREF".equals(type) || "ENTITY".equals(type))
2426 if (!isNmtoken(value, true))
2427 error("default value must match Name production", value);
2429 else if ("IDREFS".equals(type) || "ENTITIES".equals(type))
2431 StringTokenizer st = new StringTokenizer(value);
2432 while (st.hasMoreTokens())
2434 String token = st.nextToken();
2435 if (!isNmtoken(token, true))
2436 error("default value must match Name production", token);
2439 else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type))
2441 if (!isNmtoken(value, false))
2442 error("default value must match Nmtoken production", value);
2444 else if ("NMTOKENS".equals(type))
2446 StringTokenizer st = new StringTokenizer(value);
2447 while (st.hasMoreTokens())
2449 String token = st.nextToken();
2450 if (!isNmtoken(token, false))
2451 error("default value must match Nmtoken production",
2452 token);
2457 // Register attribute def
2458 AttributeDecl attribute =
2459 new AttributeDecl(type, value, valueType, enumeration, values,
2460 inputStack.size() != 1);
2461 doctype.addAttributeDecl(elementName, name, attribute);
2465 * Parse the EntityDecl production.
2467 private void readEntityDecl(boolean inExternalSubset)
2468 throws IOException, XMLStreamException
2470 int flags = 0;
2471 // Check if parameter entity
2472 boolean peFlag = false;
2473 expandPE = false;
2474 requireWhitespace();
2475 if (tryRead('%'))
2477 peFlag = true;
2478 requireWhitespace();
2480 expandPE = true;
2481 // Read entity name
2482 String name = readNmtoken(true);
2483 if (name.indexOf(':') != -1)
2484 error("illegal character ':' in entity name", name);
2485 if (peFlag)
2486 name = "%" + name;
2487 requireWhitespace();
2488 mark(1);
2489 int c = readCh();
2490 reset();
2491 if (c == 0x22 || c == 0x27) // " | '
2493 // Internal entity replacement text
2494 String value = readLiteral(flags | LIT_DISABLE_EREF, true);
2495 int ai = value.indexOf('&');
2496 while (ai != -1)
2498 int sci = value.indexOf(';', ai);
2499 if (sci == -1)
2500 error("malformed reference in entity value", value);
2501 String ref = value.substring(ai + 1, sci);
2502 int[] cp = UnicodeReader.toCodePointArray(ref);
2503 if (cp.length == 0)
2504 error("malformed reference in entity value", value);
2505 if (cp[0] == 0x23) // #
2507 if (cp.length == 1)
2508 error("malformed reference in entity value", value);
2509 if (cp[1] == 0x78) // 'x'
2511 if (cp.length == 2)
2512 error("malformed reference in entity value", value);
2513 for (int i = 2; i < cp.length; i++)
2515 int x = cp[i];
2516 if (x < 0x30 ||
2517 (x > 0x39 && x < 0x41) ||
2518 (x > 0x46 && x < 0x61) ||
2519 x > 0x66)
2520 error("malformed character reference in entity value",
2521 value);
2524 else
2526 for (int i = 1; i < cp.length; i++)
2528 int x = cp[i];
2529 if (x < 0x30 || x > 0x39)
2530 error("malformed character reference in entity value",
2531 value);
2535 else
2537 if (!isNameStartCharacter(cp[0], input.xml11))
2538 error("malformed reference in entity value", value);
2539 for (int i = 1; i < cp.length; i++)
2541 if (!isNameCharacter(cp[i], input.xml11))
2542 error("malformed reference in entity value", value);
2545 ai = value.indexOf('&', sci);
2547 doctype.addEntityDecl(name, value, inExternalSubset);
2549 else
2551 ExternalIds ids = readExternalIds(false, false);
2552 // Check for NDATA
2553 boolean white = tryWhitespace();
2554 if (!peFlag && tryRead("NDATA"))
2556 if (!white)
2557 error("whitespace required before NDATA");
2558 requireWhitespace();
2559 ids.notationName = readNmtoken(true);
2561 doctype.addEntityDecl(name, ids, inExternalSubset);
2563 // finish
2564 skipWhitespace();
2565 require('>');
2569 * Parse the NotationDecl production.
2571 private void readNotationDecl(boolean inExternalSubset)
2572 throws IOException, XMLStreamException
2574 requireWhitespace();
2575 String notationName = readNmtoken(true);
2576 if (notationName.indexOf(':') != -1)
2577 error("illegal character ':' in notation name", notationName);
2578 if (validating)
2580 // VC: Unique Notation Name
2581 ExternalIds notation = doctype.getNotation(notationName);
2582 if (notation != null)
2583 error("duplicate notation name", notationName);
2585 requireWhitespace();
2586 ExternalIds ids = readExternalIds(true, false);
2587 ids.notationName = notationName;
2588 doctype.addNotationDecl(notationName, ids, inExternalSubset);
2589 skipWhitespace();
2590 require('>');
2594 * Returns a tuple {publicId, systemId}.
2596 private ExternalIds readExternalIds(boolean inNotation, boolean isSubset)
2597 throws IOException, XMLStreamException
2599 int c;
2600 int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
2601 ExternalIds ids = new ExternalIds();
2603 if (tryRead("PUBLIC"))
2605 requireWhitespace();
2606 ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false);
2607 if (inNotation)
2609 skipWhitespace();
2610 mark(1);
2611 c = readCh();
2612 reset();
2613 if (c == 0x22 || c == 0x27) // " | '
2615 String href = readLiteral(flags, false);
2616 ids.systemId = absolutize(input.systemId, href);
2619 else
2621 requireWhitespace();
2622 String href = readLiteral(flags, false);
2623 ids.systemId = absolutize(input.systemId, href);
2625 // Check valid URI characters
2626 for (int i = 0; i < ids.publicId.length(); i++)
2628 char d = ids.publicId.charAt(i);
2629 if (d >= 'a' && d <= 'z')
2630 continue;
2631 if (d >= 'A' && d <= 'Z')
2632 continue;
2633 if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1)
2634 continue;
2635 error("illegal PUBLIC id character",
2636 "U+" + Integer.toHexString(d));
2639 else if (tryRead("SYSTEM"))
2641 requireWhitespace();
2642 String href = readLiteral(flags, false);
2643 ids.systemId = absolutize(input.systemId, href);
2645 else if (!isSubset)
2647 error("missing SYSTEM or PUBLIC keyword");
2649 if (ids.systemId != null && !inNotation)
2651 if (ids.systemId.indexOf('#') != -1)
2652 error("SYSTEM id has a URI fragment", ids.systemId);
2654 return ids;
2658 * Parse the start of an element.
2659 * @return the state of the parser afterwards (EMPTY_ELEMENT or CONTENT)
2661 private int readStartElement()
2662 throws IOException, XMLStreamException
2664 // Read element name
2665 String elementName = readNmtoken(true);
2666 attrs.clear();
2667 // Push namespace context
2668 if (namespaceAware)
2670 if (elementName.charAt(0) == ':' ||
2671 elementName.charAt(elementName.length() - 1) == ':')
2672 error("not a QName", elementName);
2673 namespaces.addFirst(new LinkedHashMap());
2675 // Read element content
2676 boolean white = tryWhitespace();
2677 mark(1);
2678 int c = readCh();
2679 while (c != 0x2f && c != 0x3e) // '/' | '>'
2681 // Read attribute
2682 reset();
2683 if (!white)
2684 error("need whitespace between attributes");
2685 readAttribute(elementName);
2686 white = tryWhitespace();
2687 mark(1);
2688 c = readCh();
2690 // supply defaulted attributes
2691 if (doctype != null)
2693 for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); )
2695 Map.Entry entry = (Map.Entry) i.next();
2696 String attName = (String) entry.getKey();
2697 AttributeDecl decl = (AttributeDecl) entry.getValue();
2698 if (validating)
2700 switch (decl.valueType)
2702 case ATTRIBUTE_DEFAULT_REQUIRED:
2703 // VC: Required Attribute
2704 if (decl.value == null && !attributeSpecified(attName))
2705 error("value for " + attName + " attribute is required");
2706 break;
2707 case ATTRIBUTE_DEFAULT_FIXED:
2708 // VC: Fixed Attribute Default
2709 for (Iterator j = attrs.iterator(); j.hasNext(); )
2711 Attribute a = (Attribute) j.next();
2712 if (attName.equals(a.name) &&
2713 !decl.value.equals(a.value))
2714 error("value for " + attName + " attribute must be " +
2715 decl.value);
2717 break;
2720 if (namespaceAware && attName.equals("xmlns"))
2722 LinkedHashMap ctx =
2723 (LinkedHashMap) namespaces.getFirst();
2724 if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2725 continue; // namespace was specified
2727 else if (namespaceAware && attName.startsWith("xmlns:"))
2729 LinkedHashMap ctx =
2730 (LinkedHashMap) namespaces.getFirst();
2731 if (ctx.containsKey(attName.substring(6)))
2732 continue; // namespace was specified
2734 else if (attributeSpecified(attName))
2735 continue;
2736 if (decl.value == null)
2737 continue;
2738 // VC: Standalone Document Declaration
2739 if (validating && decl.external && xmlStandalone == Boolean.TRUE)
2740 error("standalone must be 'no' if attributes inherit values " +
2741 "from externally declared markup declarations");
2742 Attribute attr =
2743 new Attribute(attName, decl.type, false, decl.value);
2744 if (namespaceAware)
2746 if (!addNamespace(attr))
2747 attrs.add(attr);
2749 else
2750 attrs.add(attr);
2753 if (baseAware)
2755 String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base");
2756 String base = getXMLBase();
2757 bases.addFirst(absolutize(base, uri));
2759 if (namespaceAware)
2761 // check prefix bindings
2762 int ci = elementName.indexOf(':');
2763 if (ci != -1)
2765 String prefix = elementName.substring(0, ci);
2766 String uri = getNamespaceURI(prefix);
2767 if (uri == null)
2768 error("unbound element prefix", prefix);
2769 else if (input.xml11 && "".equals(uri))
2770 error("XML 1.1 unbound element prefix", prefix);
2772 for (Iterator i = attrs.iterator(); i.hasNext(); )
2774 Attribute attr = (Attribute) i.next();
2775 if (attr.prefix != null &&
2776 !XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix))
2778 String uri = getNamespaceURI(attr.prefix);
2779 if (uri == null)
2780 error("unbound attribute prefix", attr.prefix);
2781 else if (input.xml11 && "".equals(uri))
2782 error("XML 1.1 unbound attribute prefix", attr.prefix);
2786 if (validating && doctype != null)
2788 validateStartElement(elementName);
2789 currentContentModel = doctype.getElementModel(elementName);
2790 if (currentContentModel == null)
2791 error("no element declaration", elementName);
2792 validationStack.add(new LinkedList());
2794 // make element name available for read
2795 buf.setLength(0);
2796 buf.append(elementName);
2797 // push element onto stack
2798 stack.addLast(elementName);
2799 switch (c)
2801 case 0x3e: // '>'
2802 return CONTENT;
2803 case 0x2f: // '/'
2804 require('>');
2805 return EMPTY_ELEMENT;
2807 return -1; // to satisfy compiler
2811 * Indicates whether the specified attribute name was specified for the
2812 * current element.
2814 private boolean attributeSpecified(String attName)
2816 for (Iterator j = attrs.iterator(); j.hasNext(); )
2818 Attribute a = (Attribute) j.next();
2819 if (attName.equals(a.name))
2820 return true;
2822 return false;
2826 * Parse an attribute.
2828 private void readAttribute(String elementName)
2829 throws IOException, XMLStreamException
2831 // Read attribute name
2832 String attributeName = readNmtoken(true);
2833 String type = getAttributeType(elementName, attributeName);
2834 readEq();
2835 // Read literal
2836 final int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF;
2837 String value = (type == null || "CDATA".equals(type)) ?
2838 readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false);
2839 // add attribute event
2840 Attribute attr = this.new Attribute(attributeName, type, true, value);
2841 if (namespaceAware)
2843 if (attributeName.charAt(0) == ':' ||
2844 attributeName.charAt(attributeName.length() - 1) == ':')
2845 error("not a QName", attributeName);
2846 else if (attributeName.equals("xmlns"))
2848 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2849 if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2850 error("duplicate default namespace");
2852 else if (attributeName.startsWith("xmlns:"))
2854 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2855 if (ctx.containsKey(attributeName.substring(6)))
2856 error("duplicate namespace", attributeName.substring(6));
2858 else if (attrs.contains(attr))
2859 error("duplicate attribute", attributeName);
2861 else if (attrs.contains(attr))
2862 error("duplicate attribute", attributeName);
2863 if (validating && doctype != null)
2865 // VC: Attribute Value Type
2866 AttributeDecl decl =
2867 doctype.getAttributeDecl(elementName, attributeName);
2868 if (decl == null)
2869 error("attribute must be declared", attributeName);
2870 if ("ENUMERATION".equals(decl.type))
2872 // VC: Enumeration
2873 if (!decl.values.contains(value))
2874 error("value does not match enumeration " + decl.enumeration,
2875 value);
2877 else if ("ID".equals(decl.type))
2879 // VC: ID
2880 if (!isNmtoken(value, true))
2881 error("ID values must match the Name production");
2882 if (ids.contains(value))
2883 error("Duplicate ID", value);
2884 ids.add(value);
2886 else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type))
2888 StringTokenizer st = new StringTokenizer(value);
2889 while (st.hasMoreTokens())
2891 String token = st.nextToken();
2892 // VC: IDREF
2893 if (!isNmtoken(token, true))
2894 error("IDREF values must match the Name production");
2895 idrefs.add(token);
2898 else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type))
2900 StringTokenizer st = new StringTokenizer(value);
2901 while (st.hasMoreTokens())
2903 String token = st.nextToken();
2904 // VC: Name Token
2905 if (!isNmtoken(token, false))
2906 error("NMTOKEN values must match the Nmtoken production");
2909 else if ("ENTITY".equals(decl.type))
2911 // VC: Entity Name
2912 if (!isNmtoken(value, true))
2913 error("ENTITY values must match the Name production");
2914 Object entity = doctype.getEntity(value);
2915 if (entity == null || !(entity instanceof ExternalIds) ||
2916 ((ExternalIds) entity).notationName == null)
2917 error("ENTITY values must match the name of an unparsed " +
2918 "entity declared in the DTD");
2920 else if ("NOTATION".equals(decl.type))
2922 if (!decl.values.contains(value))
2923 error("NOTATION values must match a declared notation name",
2924 value);
2925 // VC: Notation Attributes
2926 ExternalIds notation = doctype.getNotation(value);
2927 if (notation == null)
2928 error("NOTATION values must match the name of a notation " +
2929 "declared in the DTD", value);
2932 if (namespaceAware)
2934 if (!addNamespace(attr))
2935 attrs.add(attr);
2937 else
2938 attrs.add(attr);
2942 * Determines whether the specified attribute is a namespace declaration,
2943 * and adds it to the current namespace context if so. Returns false if
2944 * the attribute is an ordinary attribute.
2946 private boolean addNamespace(Attribute attr)
2947 throws XMLStreamException
2949 if ("xmlns".equals(attr.name))
2951 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2952 if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null)
2953 error("Duplicate default namespace declaration");
2954 if (XMLConstants.XML_NS_URI.equals(attr.value))
2955 error("can't bind XML namespace");
2956 ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value);
2957 return true;
2959 else if ("xmlns".equals(attr.prefix))
2961 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2962 if (ctx.get(attr.localName) != null)
2963 error("Duplicate namespace declaration for prefix",
2964 attr.localName);
2965 if (XMLConstants.XML_NS_PREFIX.equals(attr.localName))
2967 if (!XMLConstants.XML_NS_URI.equals(attr.value))
2968 error("can't redeclare xml prefix");
2969 else
2970 return false; // treat as attribute
2972 if (XMLConstants.XML_NS_URI.equals(attr.value))
2973 error("can't bind non-xml prefix to XML namespace");
2974 if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName))
2975 error("can't redeclare xmlns prefix");
2976 if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value))
2977 error("can't bind non-xmlns prefix to XML Namespace namespace");
2978 if ("".equals(attr.value) && !input.xml11)
2979 error("illegal use of 1.1-style prefix unbinding in 1.0 document");
2980 ctx.put(attr.localName, attr.value);
2981 return true;
2983 return false;
2987 * Parse a closing tag.
2989 private void readEndElement()
2990 throws IOException, XMLStreamException
2992 // pop element off stack
2993 String expected = (String) stack.removeLast();
2994 require(expected);
2995 skipWhitespace();
2996 require('>');
2997 // Make element name available
2998 buf.setLength(0);
2999 buf.append(expected);
3000 if (validating && doctype != null)
3001 endElementValidationHook();
3005 * Validate the end of an element.
3006 * Called on an end-element or empty element if validating.
3008 private void endElementValidationHook()
3009 throws XMLStreamException
3011 validateEndElement();
3012 validationStack.removeLast();
3013 if (stack.isEmpty())
3014 currentContentModel = null;
3015 else
3017 String parent = (String) stack.getLast();
3018 currentContentModel = doctype.getElementModel(parent);
3023 * Parse a comment.
3025 private void readComment(boolean inDTD)
3026 throws IOException, XMLStreamException
3028 boolean saved = expandPE;
3029 expandPE = false;
3030 buf.setLength(0);
3031 readUntil(TEST_END_COMMENT);
3032 require('>');
3033 expandPE = saved;
3034 if (inDTD)
3035 doctype.addComment(buf.toString());
3039 * Parse a processing instruction.
3041 private void readPI(boolean inDTD)
3042 throws IOException, XMLStreamException
3044 boolean saved = expandPE;
3045 expandPE = false;
3046 piTarget = readNmtoken(true);
3047 if (piTarget.indexOf(':') != -1)
3048 error("illegal character in PI target", new Character(':'));
3049 if ("xml".equalsIgnoreCase(piTarget))
3050 error("illegal PI target", piTarget);
3051 if (tryRead(TEST_END_PI))
3052 piData = null;
3053 else
3055 if (!tryWhitespace())
3056 error("whitespace required between PI target and data");
3057 buf.setLength(0);
3058 readUntil(TEST_END_PI);
3059 piData = buf.toString();
3061 expandPE = saved;
3062 if (inDTD)
3063 doctype.addPI(piTarget, piData);
3067 * Parse an entity reference.
3069 private void readReference()
3070 throws IOException, XMLStreamException
3072 buf.setLength(0);
3073 String entityName = readNmtoken(true);
3074 require(';');
3075 buf.setLength(0);
3076 buf.append(entityName);
3080 * Read an CDATA section.
3082 private void readCDSect()
3083 throws IOException, XMLStreamException
3085 buf.setLength(0);
3086 readUntil(TEST_END_CDATA);
3090 * Read character data.
3091 * @return the type of text read (CHARACTERS or SPACE)
3093 private int readCharData(String prefix)
3094 throws IOException, XMLStreamException
3096 boolean white = true;
3097 buf.setLength(0);
3098 if (prefix != null)
3099 buf.append(prefix);
3100 boolean done = false;
3101 boolean entities = false;
3102 while (!done)
3104 // Block read
3105 mark(tmpBuf.length);
3106 int len = read(tmpBuf, 0, tmpBuf.length);
3107 if (len == -1)
3109 if (inputStack.size() > 1)
3111 popInput();
3112 // report end-entity
3113 done = true;
3115 else
3116 throw new EOFException();
3118 for (int i = 0; i < len && !done; i++)
3120 int c = tmpBuf[i];
3121 switch (c)
3123 case 0x20:
3124 case 0x09:
3125 case 0x0a:
3126 case 0x0d:
3127 buf.append(Character.toChars(c));
3128 break; // whitespace
3129 case 0x26: // '&'
3130 reset();
3131 read(tmpBuf, 0, i);
3132 // character reference?
3133 mark(3);
3134 c = readCh(); // &
3135 c = readCh();
3136 if (c == 0x23) // '#'
3138 mark(1);
3139 c = readCh();
3140 boolean hex = (c == 0x78); // 'x'
3141 if (!hex)
3142 reset();
3143 char[] ch = readCharacterRef(hex ? 16 : 10);
3144 buf.append(ch, 0, ch.length);
3145 for (int j = 0; j < ch.length; j++)
3147 switch (ch[j])
3149 case 0x20:
3150 case 0x09:
3151 case 0x0a:
3152 case 0x0d:
3153 break; // whitespace
3154 default:
3155 white = false;
3159 else
3161 // entity reference
3162 reset();
3163 c = readCh(); // &
3164 String entityName = readNmtoken(true);
3165 require(';');
3166 String text =
3167 (String) PREDEFINED_ENTITIES.get(entityName);
3168 if (text != null)
3169 buf.append(text);
3170 else
3172 pushInput("", "&" + entityName + ";", false, false);
3173 done = true;
3174 break;
3177 // continue processing
3178 i = -1;
3179 mark(tmpBuf.length);
3180 len = read(tmpBuf, 0, tmpBuf.length);
3181 if (len == -1)
3183 if (inputStack.size() > 1)
3185 popInput();
3186 done = true;
3188 else
3189 throw new EOFException();
3191 entities = true;
3192 break; // end of text sequence
3193 case 0x3e: // '>'
3194 int l = buf.length();
3195 if (l > 1 &&
3196 buf.charAt(l - 1) == ']' &&
3197 buf.charAt(l - 2) == ']')
3198 error("Character data may not contain unescaped ']]>'");
3199 buf.append(Character.toChars(c));
3200 break;
3201 case 0x3c: // '<'
3202 reset();
3203 // read i characters
3204 int count = 0, remaining = i;
3207 int r = read(tmpBuf, 0, remaining);
3208 count += r;
3209 remaining -= r;
3211 while (count < i);
3212 i = len;
3213 if (coalescing && tryRead(TEST_CDATA))
3214 readUntil(TEST_END_CDATA); // read CDATA section into buf
3215 else
3216 done = true; // end of text sequence
3217 break;
3218 default:
3219 if (input.xml11)
3221 if (!isXML11Char(c) || isXML11RestrictedChar(c))
3222 error("illegal XML 1.1 character",
3223 "U+" + Integer.toHexString(c));
3225 else if (!isChar(c))
3226 error("illegal XML character",
3227 "U+" + Integer.toHexString(c));
3228 white = false;
3229 buf.append(Character.toChars(c));
3232 // if text buffer >= 2MB, return it as a chunk
3233 // to avoid excessive memory use
3234 if (buf.length() >= 2097152)
3235 done = true;
3237 if (entities)
3238 normalizeCRLF(buf);
3239 return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS;
3243 * Expands the specified entity.
3245 private void expandEntity(String name, boolean inAttr, boolean normalize)
3246 throws IOException, XMLStreamException
3248 if (doctype != null)
3250 Object value = doctype.getEntity(name);
3251 if (value != null)
3253 if (xmlStandalone == Boolean.TRUE)
3255 // VC: Standalone Document Declaration
3256 if (doctype.isEntityExternal(name))
3257 error("reference to external entity in standalone document");
3258 else if (value instanceof ExternalIds)
3260 ExternalIds ids = (ExternalIds) value;
3261 if (ids.notationName != null &&
3262 doctype.isNotationExternal(ids.notationName))
3263 error("reference to external notation in " +
3264 "standalone document");
3267 if (value instanceof String)
3269 String text = (String) value;
3270 if (inAttr && text.indexOf('<') != -1)
3271 error("< in attribute value");
3272 pushInput(name, text, !inAttr, normalize);
3274 else if (inAttr)
3275 error("reference to external entity in attribute value", name);
3276 else
3277 pushInput(name, (ExternalIds) value, !inAttr, normalize);
3278 return;
3281 error("reference to undeclared entity", name);
3285 * Indicates whether the specified entity is unparsed.
3287 private boolean isUnparsedEntity(String name)
3289 if (doctype != null)
3291 Object value = doctype.getEntity(name);
3292 if (value != null && value instanceof ExternalIds)
3293 return ((ExternalIds) value).notationName != null;
3295 return false;
3299 * Read an equals sign.
3301 private void readEq()
3302 throws IOException, XMLStreamException
3304 skipWhitespace();
3305 require('=');
3306 skipWhitespace();
3310 * Character read for reading literals.
3311 * @param recognizePEs whether to recognize parameter-entity references
3313 private int literalReadCh(boolean recognizePEs)
3314 throws IOException, XMLStreamException
3316 int c = recognizePEs ? readCh() : read();
3317 while (c == -1)
3319 if (inputStack.size() > 1)
3321 inputStack.removeLast();
3322 input = (Input) inputStack.getLast();
3323 // Don't issue end-entity
3324 c = recognizePEs ? readCh() : read();
3326 else
3327 throw new EOFException();
3329 return c;
3333 * Read a string literal.
3335 private String readLiteral(int flags, boolean recognizePEs)
3336 throws IOException, XMLStreamException
3338 boolean saved = expandPE;
3339 int delim = readCh();
3340 if (delim != 0x27 && delim != 0x22)
3341 error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim));
3342 literalBuf.setLength(0);
3343 if ((flags & LIT_DISABLE_PE) != 0)
3344 expandPE = false;
3345 boolean entities = false;
3346 int inputStackSize = inputStack.size();
3349 int c = literalReadCh(recognizePEs);
3350 if (c == delim && inputStackSize == inputStack.size())
3351 break;
3352 switch (c)
3354 case 0x0a:
3355 case 0x0d:
3356 if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
3357 c = 0x20; // normalize to space
3358 break;
3359 case 0x09:
3360 if ((flags & LIT_ATTRIBUTE) != 0)
3361 c = 0x20; // normalize to space
3362 break;
3363 case 0x26: // '&'
3364 mark(2);
3365 c = readCh();
3366 if (c == 0x23) // '#'
3368 if ((flags & LIT_DISABLE_CREF) != 0)
3370 reset();
3371 c = 0x26; // '&'
3373 else
3375 mark(1);
3376 c = readCh();
3377 boolean hex = (c == 0x78); // 'x'
3378 if (!hex)
3379 reset();
3380 char[] ref = readCharacterRef(hex ? 16 : 10);
3381 for (int i = 0; i < ref.length; i++)
3382 literalBuf.append(ref[i]);
3383 entities = true;
3384 continue;
3387 else
3389 if ((flags & LIT_DISABLE_EREF) != 0)
3391 reset();
3392 c = 0x26; // '&'
3394 else
3396 reset();
3397 String entityName = readNmtoken(true);
3398 require(';');
3399 String text =
3400 (String) PREDEFINED_ENTITIES.get(entityName);
3401 if (text != null)
3402 literalBuf.append(text);
3403 else
3404 expandEntity(entityName,
3405 (flags & LIT_ATTRIBUTE) != 0,
3406 true);
3407 entities = true;
3408 continue;
3411 break;
3412 case 0x3c: // '<'
3413 if ((flags & LIT_ATTRIBUTE) != 0)
3414 error("attribute values may not contain '<'");
3415 break;
3416 case -1:
3417 if (inputStack.size() > 1)
3419 popInput();
3420 continue;
3422 throw new EOFException();
3423 default:
3424 if ((c < 0x0020 || c > 0xfffd) ||
3425 (c >= 0xd800 && c < 0xdc00) ||
3426 (input.xml11 && (c >= 0x007f) &&
3427 (c <= 0x009f) && (c != 0x0085)))
3428 error("illegal character", "U+" + Integer.toHexString(c));
3430 literalBuf.append(Character.toChars(c));
3432 while (true);
3433 expandPE = saved;
3434 if (entities)
3435 normalizeCRLF(literalBuf);
3436 if ((flags & LIT_NORMALIZE) > 0)
3437 literalBuf = normalize(literalBuf);
3438 return literalBuf.toString();
3442 * Performs attribute-value normalization of the text buffer.
3443 * This discards leading and trailing whitespace, and replaces sequences
3444 * of whitespace with a single space.
3446 private StringBuffer normalize(StringBuffer buf)
3448 StringBuffer acc = new StringBuffer();
3449 int len = buf.length();
3450 int avState = 0;
3451 for (int i = 0; i < len; i++)
3453 char c = buf.charAt(i);
3454 if (c == ' ')
3455 avState = (avState == 0) ? 0 : 1;
3456 else
3458 if (avState == 1)
3459 acc.append(' ');
3460 acc.append(c);
3461 avState = 2;
3464 return acc;
3468 * Replace any CR/LF pairs in the buffer with LF.
3469 * This may be necessary if combinations of CR or LF were declared as
3470 * (character) entity references in the input.
3472 private void normalizeCRLF(StringBuffer buf)
3474 int len = buf.length() - 1;
3475 for (int i = 0; i < len; i++)
3477 char c = buf.charAt(i);
3478 if (c == '\r' && buf.charAt(i + 1) == '\n')
3480 buf.deleteCharAt(i--);
3481 len--;
3487 * Parse and expand a parameter entity reference.
3489 private void expandPEReference()
3490 throws IOException, XMLStreamException
3492 String name = readNmtoken(true, new StringBuffer());
3493 require(';');
3494 mark(1); // ensure we don't reset to before the semicolon
3495 if (doctype != null)
3497 String entityName = "%" + name;
3498 Object entity = doctype.getEntity(entityName);
3499 if (entity != null)
3501 if (xmlStandalone == Boolean.TRUE)
3503 if (doctype.isEntityExternal(entityName))
3504 error("reference to external parameter entity in " +
3505 "standalone document");
3507 if (entity instanceof String)
3509 pushInput(name, (String) entity, false, input.normalize);
3510 //pushInput(name, " " + (String) entity + " ");
3512 else
3514 //pushInput("", " ");
3515 pushInput(name, (ExternalIds) entity, false, input.normalize);
3516 //pushInput("", " ");
3519 else
3520 error("reference to undeclared parameter entity", name);
3522 else
3523 error("reference to parameter entity without doctype", name);
3527 * Parse the digits in a character reference.
3528 * @param base the base of the digits (10 or 16)
3530 private char[] readCharacterRef(int base)
3531 throws IOException, XMLStreamException
3533 CPStringBuilder b = new CPStringBuilder();
3534 for (int c = readCh(); c != 0x3b && c != -1; c = readCh())
3535 b.append(Character.toChars(c));
3538 int ord = Integer.parseInt(b.toString(), base);
3539 if (input.xml11)
3541 if (!isXML11Char(ord))
3542 error("illegal XML 1.1 character reference " +
3543 "U+" + Integer.toHexString(ord));
3545 else
3547 if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d))
3548 || (ord >= 0xd800 && ord <= 0xdfff)
3549 || ord == 0xfffe || ord == 0xffff
3550 || ord > 0x0010ffff)
3551 error("illegal XML character reference " +
3552 "U+" + Integer.toHexString(ord));
3554 return Character.toChars(ord);
3556 catch (NumberFormatException e)
3558 error("illegal characters in character reference", b.toString());
3559 return null;
3564 * Parses an NMTOKEN or Name production.
3565 * @param isName if a Name, otherwise an NMTOKEN
3567 private String readNmtoken(boolean isName)
3568 throws IOException, XMLStreamException
3570 return readNmtoken(isName, nmtokenBuf);
3574 * Parses an NMTOKEN or Name production using the specified buffer.
3575 * @param isName if a Name, otherwise an NMTOKEN
3576 * @param buf the character buffer to use
3578 private String readNmtoken(boolean isName, StringBuffer buf)
3579 throws IOException, XMLStreamException
3581 buf.setLength(0);
3582 int c = readCh();
3583 if (isName)
3585 if (!isNameStartCharacter(c, input.xml11))
3586 error("not a name start character",
3587 "U+" + Integer.toHexString(c));
3589 else
3591 if (!isNameCharacter(c, input.xml11))
3592 error("not a name character",
3593 "U+" + Integer.toHexString(c));
3595 buf.append(Character.toChars(c));
3598 mark(1);
3599 c = readCh();
3600 switch (c)
3602 case 0x25: // '%'
3603 case 0x3c: // '<'
3604 case 0x3e: // '>'
3605 case 0x26: // '&'
3606 case 0x2c: // ','
3607 case 0x7c: // '|'
3608 case 0x2a: // '*'
3609 case 0x2b: // '+'
3610 case 0x3f: // '?'
3611 case 0x29: // ')'
3612 case 0x3d: // '='
3613 case 0x27: // '\''
3614 case 0x22: // '"'
3615 case 0x5b: // '['
3616 case 0x20: // ' '
3617 case 0x09: // '\t'
3618 case 0x0a: // '\n'
3619 case 0x0d: // '\r'
3620 case 0x3b: // ';'
3621 case 0x2f: // '/'
3622 case -1:
3623 reset();
3624 return intern(buf.toString());
3625 default:
3626 if (!isNameCharacter(c, input.xml11))
3627 error("not a name character",
3628 "U+" + Integer.toHexString(c));
3629 else
3630 buf.append(Character.toChars(c));
3633 while (true);
3637 * Indicates whether the specified Unicode character is an XML 1.1 Char.
3639 public static boolean isXML11Char(int c)
3641 return ((c >= 0x0001 && c <= 0xD7FF) ||
3642 (c >= 0xE000 && c < 0xFFFE) ||
3643 (c >= 0x10000 && c <= 0x10FFFF));
3647 * Indicates whether the specified Unicode character is an XML 1.1
3648 * RestrictedChar.
3650 public static boolean isXML11RestrictedChar(int c)
3652 return ((c >= 0x0001 && c <= 0x0008) ||
3653 (c >= 0x000B && c <= 0x000C) ||
3654 (c >= 0x000E && c <= 0x001F) ||
3655 (c >= 0x007F && c <= 0x0084) ||
3656 (c >= 0x0086 && c <= 0x009F));
3660 * Indicates whether the specified text matches the Name or Nmtoken
3661 * production.
3663 private boolean isNmtoken(String text, boolean isName)
3667 int[] cp = UnicodeReader.toCodePointArray(text);
3668 if (cp.length == 0)
3669 return false;
3670 if (isName)
3672 if (!isNameStartCharacter(cp[0], input.xml11))
3673 return false;
3675 else
3677 if (!isNameCharacter(cp[0], input.xml11))
3678 return false;
3680 for (int i = 1; i < cp.length; i++)
3682 if (!isNameCharacter(cp[i], input.xml11))
3683 return false;
3685 return true;
3687 catch (IOException e)
3689 return false;
3694 * Indicates whether the specified Unicode character is a Name start
3695 * character.
3697 public static boolean isNameStartCharacter(int c, boolean xml11)
3699 if (xml11)
3700 return ((c >= 0x0041 && c <= 0x005a) ||
3701 (c >= 0x0061 && c <= 0x007a) ||
3702 c == 0x3a |
3703 c == 0x5f |
3704 (c >= 0xC0 && c <= 0xD6) ||
3705 (c >= 0xD8 && c <= 0xF6) ||
3706 (c >= 0xF8 && c <= 0x2FF) ||
3707 (c >= 0x370 && c <= 0x37D) ||
3708 (c >= 0x37F && c <= 0x1FFF) ||
3709 (c >= 0x200C && c <= 0x200D) ||
3710 (c >= 0x2070 && c <= 0x218F) ||
3711 (c >= 0x2C00 && c <= 0x2FEF) ||
3712 (c >= 0x3001 && c <= 0xD7FF) ||
3713 (c >= 0xF900 && c <= 0xFDCF) ||
3714 (c >= 0xFDF0 && c <= 0xFFFD) ||
3715 (c >= 0x10000 && c <= 0xEFFFF));
3716 else
3717 return (c == 0x5f || c == 0x3a || isLetter(c));
3721 * Indicates whether the specified Unicode character is a Name non-initial
3722 * character.
3724 public static boolean isNameCharacter(int c, boolean xml11)
3726 if (xml11)
3727 return ((c >= 0x0041 && c <= 0x005a) ||
3728 (c >= 0x0061 && c <= 0x007a) ||
3729 (c >= 0x0030 && c <= 0x0039) ||
3730 c == 0x3a |
3731 c == 0x5f |
3732 c == 0x2d |
3733 c == 0x2e |
3734 c == 0xB7 |
3735 (c >= 0xC0 && c <= 0xD6) ||
3736 (c >= 0xD8 && c <= 0xF6) ||
3737 (c >= 0xF8 && c <= 0x2FF) ||
3738 (c >= 0x300 && c <= 0x37D) ||
3739 (c >= 0x37F && c <= 0x1FFF) ||
3740 (c >= 0x200C && c <= 0x200D) ||
3741 (c >= 0x203F && c <= 0x2040) ||
3742 (c >= 0x2070 && c <= 0x218F) ||
3743 (c >= 0x2C00 && c <= 0x2FEF) ||
3744 (c >= 0x3001 && c <= 0xD7FF) ||
3745 (c >= 0xF900 && c <= 0xFDCF) ||
3746 (c >= 0xFDF0 && c <= 0xFFFD) ||
3747 (c >= 0x10000 && c <= 0xEFFFF));
3748 else
3749 return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a ||
3750 isLetter(c) || isDigit(c) ||
3751 isCombiningChar(c) || isExtender(c));
3755 * Indicates whether the specified Unicode character matches the Letter
3756 * production.
3758 public static boolean isLetter(int c)
3760 if ((c >= 0x0041 && c <= 0x005A) ||
3761 (c >= 0x0061 && c <= 0x007A) ||
3762 (c >= 0x00C0 && c <= 0x00D6) ||
3763 (c >= 0x00D8 && c <= 0x00F6) ||
3764 (c >= 0x00F8 && c <= 0x00FF) ||
3765 (c >= 0x0100 && c <= 0x0131) ||
3766 (c >= 0x0134 && c <= 0x013E) ||
3767 (c >= 0x0141 && c <= 0x0148) ||
3768 (c >= 0x014A && c <= 0x017E) ||
3769 (c >= 0x0180 && c <= 0x01C3) ||
3770 (c >= 0x01CD && c <= 0x01F0) ||
3771 (c >= 0x01F4 && c <= 0x01F5) ||
3772 (c >= 0x01FA && c <= 0x0217) ||
3773 (c >= 0x0250 && c <= 0x02A8) ||
3774 (c >= 0x02BB && c <= 0x02C1) ||
3775 c == 0x0386 ||
3776 (c >= 0x0388 && c <= 0x038A) ||
3777 c == 0x038C ||
3778 (c >= 0x038E && c <= 0x03A1) ||
3779 (c >= 0x03A3 && c <= 0x03CE) ||
3780 (c >= 0x03D0 && c <= 0x03D6) ||
3781 c == 0x03DA ||
3782 c == 0x03DC ||
3783 c == 0x03DE ||
3784 c == 0x03E0 ||
3785 (c >= 0x03E2 && c <= 0x03F3) ||
3786 (c >= 0x0401 && c <= 0x040C) ||
3787 (c >= 0x040E && c <= 0x044F) ||
3788 (c >= 0x0451 && c <= 0x045C) ||
3789 (c >= 0x045E && c <= 0x0481) ||
3790 (c >= 0x0490 && c <= 0x04C4) ||
3791 (c >= 0x04C7 && c <= 0x04C8) ||
3792 (c >= 0x04CB && c <= 0x04CC) ||
3793 (c >= 0x04D0 && c <= 0x04EB) ||
3794 (c >= 0x04EE && c <= 0x04F5) ||
3795 (c >= 0x04F8 && c <= 0x04F9) ||
3796 (c >= 0x0531 && c <= 0x0556) ||
3797 c == 0x0559 ||
3798 (c >= 0x0561 && c <= 0x0586) ||
3799 (c >= 0x05D0 && c <= 0x05EA) ||
3800 (c >= 0x05F0 && c <= 0x05F2) ||
3801 (c >= 0x0621 && c <= 0x063A) ||
3802 (c >= 0x0641 && c <= 0x064A) ||
3803 (c >= 0x0671 && c <= 0x06B7) ||
3804 (c >= 0x06BA && c <= 0x06BE) ||
3805 (c >= 0x06C0 && c <= 0x06CE) ||
3806 (c >= 0x06D0 && c <= 0x06D3) ||
3807 c == 0x06D5 ||
3808 (c >= 0x06E5 && c <= 0x06E6) ||
3809 (c >= 0x0905 && c <= 0x0939) ||
3810 c == 0x093D ||
3811 (c >= 0x0958 && c <= 0x0961) ||
3812 (c >= 0x0985 && c <= 0x098C) ||
3813 (c >= 0x098F && c <= 0x0990) ||
3814 (c >= 0x0993 && c <= 0x09A8) ||
3815 (c >= 0x09AA && c <= 0x09B0) ||
3816 c == 0x09B2 ||
3817 (c >= 0x09B6 && c <= 0x09B9) ||
3818 (c >= 0x09DC && c <= 0x09DD) ||
3819 (c >= 0x09DF && c <= 0x09E1) ||
3820 (c >= 0x09F0 && c <= 0x09F1) ||
3821 (c >= 0x0A05 && c <= 0x0A0A) ||
3822 (c >= 0x0A0F && c <= 0x0A10) ||
3823 (c >= 0x0A13 && c <= 0x0A28) ||
3824 (c >= 0x0A2A && c <= 0x0A30) ||
3825 (c >= 0x0A32 && c <= 0x0A33) ||
3826 (c >= 0x0A35 && c <= 0x0A36) ||
3827 (c >= 0x0A38 && c <= 0x0A39) ||
3828 (c >= 0x0A59 && c <= 0x0A5C) ||
3829 c == 0x0A5E ||
3830 (c >= 0x0A72 && c <= 0x0A74) ||
3831 (c >= 0x0A85 && c <= 0x0A8B) ||
3832 c == 0x0A8D ||
3833 (c >= 0x0A8F && c <= 0x0A91) ||
3834 (c >= 0x0A93 && c <= 0x0AA8) ||
3835 (c >= 0x0AAA && c <= 0x0AB0) ||
3836 (c >= 0x0AB2 && c <= 0x0AB3) ||
3837 (c >= 0x0AB5 && c <= 0x0AB9) ||
3838 c == 0x0ABD ||
3839 c == 0x0AE0 ||
3840 (c >= 0x0B05 && c <= 0x0B0C) ||
3841 (c >= 0x0B0F && c <= 0x0B10) ||
3842 (c >= 0x0B13 && c <= 0x0B28) ||
3843 (c >= 0x0B2A && c <= 0x0B30) ||
3844 (c >= 0x0B32 && c <= 0x0B33) ||
3845 (c >= 0x0B36 && c <= 0x0B39) ||
3846 c == 0x0B3D ||
3847 (c >= 0x0B5C && c <= 0x0B5D) ||
3848 (c >= 0x0B5F && c <= 0x0B61) ||
3849 (c >= 0x0B85 && c <= 0x0B8A) ||
3850 (c >= 0x0B8E && c <= 0x0B90) ||
3851 (c >= 0x0B92 && c <= 0x0B95) ||
3852 (c >= 0x0B99 && c <= 0x0B9A) ||
3853 c == 0x0B9C ||
3854 (c >= 0x0B9E && c <= 0x0B9F) ||
3855 (c >= 0x0BA3 && c <= 0x0BA4) ||
3856 (c >= 0x0BA8 && c <= 0x0BAA) ||
3857 (c >= 0x0BAE && c <= 0x0BB5) ||
3858 (c >= 0x0BB7 && c <= 0x0BB9) ||
3859 (c >= 0x0C05 && c <= 0x0C0C) ||
3860 (c >= 0x0C0E && c <= 0x0C10) ||
3861 (c >= 0x0C12 && c <= 0x0C28) ||
3862 (c >= 0x0C2A && c <= 0x0C33) ||
3863 (c >= 0x0C35 && c <= 0x0C39) ||
3864 (c >= 0x0C60 && c <= 0x0C61) ||
3865 (c >= 0x0C85 && c <= 0x0C8C) ||
3866 (c >= 0x0C8E && c <= 0x0C90) ||
3867 (c >= 0x0C92 && c <= 0x0CA8) ||
3868 (c >= 0x0CAA && c <= 0x0CB3) ||
3869 (c >= 0x0CB5 && c <= 0x0CB9) ||
3870 c == 0x0CDE ||
3871 (c >= 0x0CE0 && c <= 0x0CE1) ||
3872 (c >= 0x0D05 && c <= 0x0D0C) ||
3873 (c >= 0x0D0E && c <= 0x0D10) ||
3874 (c >= 0x0D12 && c <= 0x0D28) ||
3875 (c >= 0x0D2A && c <= 0x0D39) ||
3876 (c >= 0x0D60 && c <= 0x0D61) ||
3877 (c >= 0x0E01 && c <= 0x0E2E) ||
3878 c == 0x0E30 ||
3879 (c >= 0x0E32 && c <= 0x0E33) ||
3880 (c >= 0x0E40 && c <= 0x0E45) ||
3881 (c >= 0x0E81 && c <= 0x0E82) ||
3882 c == 0x0E84 ||
3883 (c >= 0x0E87 && c <= 0x0E88) ||
3884 c == 0x0E8A ||
3885 c == 0x0E8D ||
3886 (c >= 0x0E94 && c <= 0x0E97) ||
3887 (c >= 0x0E99 && c <= 0x0E9F) ||
3888 (c >= 0x0EA1 && c <= 0x0EA3) ||
3889 c == 0x0EA5 ||
3890 c == 0x0EA7 ||
3891 (c >= 0x0EAA && c <= 0x0EAB) ||
3892 (c >= 0x0EAD && c <= 0x0EAE) ||
3893 c == 0x0EB0 ||
3894 (c >= 0x0EB2 && c <= 0x0EB3) ||
3895 c == 0x0EBD ||
3896 (c >= 0x0EC0 && c <= 0x0EC4) ||
3897 (c >= 0x0F40 && c <= 0x0F47) ||
3898 (c >= 0x0F49 && c <= 0x0F69) ||
3899 (c >= 0x10A0 && c <= 0x10C5) ||
3900 (c >= 0x10D0 && c <= 0x10F6) ||
3901 c == 0x1100 ||
3902 (c >= 0x1102 && c <= 0x1103) ||
3903 (c >= 0x1105 && c <= 0x1107) ||
3904 c == 0x1109 ||
3905 (c >= 0x110B && c <= 0x110C) ||
3906 (c >= 0x110E && c <= 0x1112) ||
3907 c == 0x113C ||
3908 c == 0x113E ||
3909 c == 0x1140 ||
3910 c == 0x114C ||
3911 c == 0x114E ||
3912 c == 0x1150 ||
3913 (c >= 0x1154 && c <= 0x1155) ||
3914 c == 0x1159 ||
3915 (c >= 0x115F && c <= 0x1161) ||
3916 c == 0x1163 ||
3917 c == 0x1165 ||
3918 c == 0x1167 ||
3919 c == 0x1169 ||
3920 (c >= 0x116D && c <= 0x116E) ||
3921 (c >= 0x1172 && c <= 0x1173) ||
3922 c == 0x1175 ||
3923 c == 0x119E ||
3924 c == 0x11A8 ||
3925 c == 0x11AB ||
3926 (c >= 0x11AE && c <= 0x11AF) ||
3927 (c >= 0x11B7 && c <= 0x11B8) ||
3928 c == 0x11BA ||
3929 (c >= 0x11BC && c <= 0x11C2) ||
3930 c == 0x11EB ||
3931 c == 0x11F0 ||
3932 c == 0x11F9 ||
3933 (c >= 0x1E00 && c <= 0x1E9B) ||
3934 (c >= 0x1EA0 && c <= 0x1EF9) ||
3935 (c >= 0x1F00 && c <= 0x1F15) ||
3936 (c >= 0x1F18 && c <= 0x1F1D) ||
3937 (c >= 0x1F20 && c <= 0x1F45) ||
3938 (c >= 0x1F48 && c <= 0x1F4D) ||
3939 (c >= 0x1F50 && c <= 0x1F57) ||
3940 c == 0x1F59 ||
3941 c == 0x1F5B ||
3942 c == 0x1F5D ||
3943 (c >= 0x1F5F && c <= 0x1F7D) ||
3944 (c >= 0x1F80 && c <= 0x1FB4) ||
3945 (c >= 0x1FB6 && c <= 0x1FBC) ||
3946 c == 0x1FBE ||
3947 (c >= 0x1FC2 && c <= 0x1FC4) ||
3948 (c >= 0x1FC6 && c <= 0x1FCC) ||
3949 (c >= 0x1FD0 && c <= 0x1FD3) ||
3950 (c >= 0x1FD6 && c <= 0x1FDB) ||
3951 (c >= 0x1FE0 && c <= 0x1FEC) ||
3952 (c >= 0x1FF2 && c <= 0x1FF4) ||
3953 (c >= 0x1FF6 && c <= 0x1FFC) ||
3954 c == 0x2126 ||
3955 (c >= 0x212A && c <= 0x212B) ||
3956 c == 0x212E ||
3957 (c >= 0x2180 && c <= 0x2182) ||
3958 (c >= 0x3041 && c <= 0x3094) ||
3959 (c >= 0x30A1 && c <= 0x30FA) ||
3960 (c >= 0x3105 && c <= 0x312C) ||
3961 (c >= 0xAC00 && c <= 0xD7A3))
3962 return true; // BaseChar
3963 if ((c >= 0x4e00 && c <= 0x9fa5) ||
3964 c == 0x3007 ||
3965 (c >= 0x3021 && c <= 0x3029))
3966 return true; // Ideographic
3967 return false;
3971 * Indicates whether the specified Unicode character matches the Digit
3972 * production.
3974 public static boolean isDigit(int c)
3976 return ((c >= 0x0030 && c <= 0x0039) ||
3977 (c >= 0x0660 && c <= 0x0669) ||
3978 (c >= 0x06F0 && c <= 0x06F9) ||
3979 (c >= 0x0966 && c <= 0x096F) ||
3980 (c >= 0x09E6 && c <= 0x09EF) ||
3981 (c >= 0x0A66 && c <= 0x0A6F) ||
3982 (c >= 0x0AE6 && c <= 0x0AEF) ||
3983 (c >= 0x0B66 && c <= 0x0B6F) ||
3984 (c >= 0x0BE7 && c <= 0x0BEF) ||
3985 (c >= 0x0C66 && c <= 0x0C6F) ||
3986 (c >= 0x0CE6 && c <= 0x0CEF) ||
3987 (c >= 0x0D66 && c <= 0x0D6F) ||
3988 (c >= 0x0E50 && c <= 0x0E59) ||
3989 (c >= 0x0ED0 && c <= 0x0ED9) ||
3990 (c >= 0x0F20 && c <= 0x0F29));
3994 * Indicates whether the specified Unicode character matches the
3995 * CombiningChar production.
3997 public static boolean isCombiningChar(int c)
3999 return ((c >= 0x0300 && c <= 0x0345) ||
4000 (c >= 0x0360 && c <= 0x0361) ||
4001 (c >= 0x0483 && c <= 0x0486) ||
4002 (c >= 0x0591 && c <= 0x05A1) ||
4003 (c >= 0x05A3 && c <= 0x05B9) ||
4004 (c >= 0x05BB && c <= 0x05BD) ||
4005 c == 0x05BF ||
4006 (c >= 0x05C1 && c <= 0x05C2) ||
4007 c == 0x05C4 ||
4008 (c >= 0x064B && c <= 0x0652) ||
4009 c == 0x0670 ||
4010 (c >= 0x06D6 && c <= 0x06DC) ||
4011 (c >= 0x06DD && c <= 0x06DF) ||
4012 (c >= 0x06E0 && c <= 0x06E4) ||
4013 (c >= 0x06E7 && c <= 0x06E8) ||
4014 (c >= 0x06EA && c <= 0x06ED) ||
4015 (c >= 0x0901 && c <= 0x0903) ||
4016 c == 0x093C ||
4017 (c >= 0x093E && c <= 0x094C) ||
4018 c == 0x094D ||
4019 (c >= 0x0951 && c <= 0x0954) ||
4020 (c >= 0x0962 && c <= 0x0963) ||
4021 (c >= 0x0981 && c <= 0x0983) ||
4022 c == 0x09BC ||
4023 c == 0x09BE ||
4024 c == 0x09BF ||
4025 (c >= 0x09C0 && c <= 0x09C4) ||
4026 (c >= 0x09C7 && c <= 0x09C8) ||
4027 (c >= 0x09CB && c <= 0x09CD) ||
4028 c == 0x09D7 ||
4029 (c >= 0x09E2 && c <= 0x09E3) ||
4030 c == 0x0A02 ||
4031 c == 0x0A3C ||
4032 c == 0x0A3E ||
4033 c == 0x0A3F ||
4034 (c >= 0x0A40 && c <= 0x0A42) ||
4035 (c >= 0x0A47 && c <= 0x0A48) ||
4036 (c >= 0x0A4B && c <= 0x0A4D) ||
4037 (c >= 0x0A70 && c <= 0x0A71) ||
4038 (c >= 0x0A81 && c <= 0x0A83) ||
4039 c == 0x0ABC ||
4040 (c >= 0x0ABE && c <= 0x0AC5) ||
4041 (c >= 0x0AC7 && c <= 0x0AC9) ||
4042 (c >= 0x0ACB && c <= 0x0ACD) ||
4043 (c >= 0x0B01 && c <= 0x0B03) ||
4044 c == 0x0B3C ||
4045 (c >= 0x0B3E && c <= 0x0B43) ||
4046 (c >= 0x0B47 && c <= 0x0B48) ||
4047 (c >= 0x0B4B && c <= 0x0B4D) ||
4048 (c >= 0x0B56 && c <= 0x0B57) ||
4049 (c >= 0x0B82 && c <= 0x0B83) ||
4050 (c >= 0x0BBE && c <= 0x0BC2) ||
4051 (c >= 0x0BC6 && c <= 0x0BC8) ||
4052 (c >= 0x0BCA && c <= 0x0BCD) ||
4053 c == 0x0BD7 ||
4054 (c >= 0x0C01 && c <= 0x0C03) ||
4055 (c >= 0x0C3E && c <= 0x0C44) ||
4056 (c >= 0x0C46 && c <= 0x0C48) ||
4057 (c >= 0x0C4A && c <= 0x0C4D) ||
4058 (c >= 0x0C55 && c <= 0x0C56) ||
4059 (c >= 0x0C82 && c <= 0x0C83) ||
4060 (c >= 0x0CBE && c <= 0x0CC4) ||
4061 (c >= 0x0CC6 && c <= 0x0CC8) ||
4062 (c >= 0x0CCA && c <= 0x0CCD) ||
4063 (c >= 0x0CD5 && c <= 0x0CD6) ||
4064 (c >= 0x0D02 && c <= 0x0D03) ||
4065 (c >= 0x0D3E && c <= 0x0D43) ||
4066 (c >= 0x0D46 && c <= 0x0D48) ||
4067 (c >= 0x0D4A && c <= 0x0D4D) ||
4068 c == 0x0D57 ||
4069 c == 0x0E31 ||
4070 (c >= 0x0E34 && c <= 0x0E3A) ||
4071 (c >= 0x0E47 && c <= 0x0E4E) ||
4072 c == 0x0EB1 ||
4073 (c >= 0x0EB4 && c <= 0x0EB9) ||
4074 (c >= 0x0EBB && c <= 0x0EBC) ||
4075 (c >= 0x0EC8 && c <= 0x0ECD) ||
4076 (c >= 0x0F18 && c <= 0x0F19) ||
4077 c == 0x0F35 ||
4078 c == 0x0F37 ||
4079 c == 0x0F39 ||
4080 c == 0x0F3E ||
4081 c == 0x0F3F ||
4082 (c >= 0x0F71 && c <= 0x0F84) ||
4083 (c >= 0x0F86 && c <= 0x0F8B) ||
4084 (c >= 0x0F90 && c <= 0x0F95) ||
4085 c == 0x0F97 ||
4086 (c >= 0x0F99 && c <= 0x0FAD) ||
4087 (c >= 0x0FB1 && c <= 0x0FB7) ||
4088 c == 0x0FB9 ||
4089 (c >= 0x20D0 && c <= 0x20DC) ||
4090 c == 0x20E1 ||
4091 (c >= 0x302A && c <= 0x302F) ||
4092 c == 0x3099 ||
4093 c == 0x309A);
4097 * Indicates whether the specified Unicode character matches the Extender
4098 * production.
4100 public static boolean isExtender(int c)
4102 return (c == 0x00B7 ||
4103 c == 0x02D0 ||
4104 c == 0x02D1 ||
4105 c == 0x0387 ||
4106 c == 0x0640 ||
4107 c == 0x0E46 ||
4108 c == 0x0EC6 ||
4109 c == 0x3005 ||
4110 (c >= 0x3031 && c <= 0x3035) ||
4111 (c >= 0x309D && c <= 0x309E) ||
4112 (c >= 0x30FC && c <= 0x30FE));
4116 * Indicates whether the specified Unicode character matches the Char
4117 * production.
4119 public static boolean isChar(int c)
4121 return (c >= 0x20 && c < 0xd800) ||
4122 (c >= 0xe00 && c < 0xfffe) ||
4123 (c >= 0x10000 && c < 0x110000) ||
4124 c == 0xa || c == 0x9 || c == 0xd;
4128 * Interns the specified text or not, depending on the value of
4129 * stringInterning.
4131 private String intern(String text)
4133 return stringInterning ? text.intern() : text;
4137 * Report a parsing error.
4139 private void error(String message)
4140 throws XMLStreamException
4142 error(message, null);
4146 * Report a parsing error.
4148 private void error(String message, Object info)
4149 throws XMLStreamException
4151 if (info != null)
4153 if (info instanceof String)
4154 message += ": \"" + ((String) info) + "\"";
4155 else if (info instanceof Character)
4156 message += ": '" + ((Character) info) + "'";
4158 throw new XMLStreamException(message);
4162 * Perform validation of a start-element event.
4164 private void validateStartElement(String elementName)
4165 throws XMLStreamException
4167 if (currentContentModel == null)
4169 // root element
4170 // VC: Root Element Type
4171 if (!elementName.equals(doctype.rootName))
4172 error("root element name must match name in DTD");
4173 return;
4175 // VC: Element Valid
4176 switch (currentContentModel.type)
4178 case ContentModel.EMPTY:
4179 error("child element found in empty element", elementName);
4180 break;
4181 case ContentModel.ELEMENT:
4182 LinkedList ctx = (LinkedList) validationStack.getLast();
4183 ctx.add(elementName);
4184 break;
4185 case ContentModel.MIXED:
4186 MixedContentModel mm = (MixedContentModel) currentContentModel;
4187 if (!mm.containsName(elementName))
4188 error("illegal element for content model", elementName);
4189 break;
4194 * Perform validation of an end-element event.
4196 private void validateEndElement()
4197 throws XMLStreamException
4199 if (currentContentModel == null)
4201 // root element
4202 // VC: IDREF
4203 if (!idrefs.containsAll(ids))
4204 error("IDREF values must match the value of some ID attribute");
4205 return;
4207 // VC: Element Valid
4208 switch (currentContentModel.type)
4210 case ContentModel.ELEMENT:
4211 LinkedList ctx = (LinkedList) validationStack.getLast();
4212 ElementContentModel ecm = (ElementContentModel) currentContentModel;
4213 validateElementContent(ecm, ctx);
4214 break;
4219 * Perform validation of character data.
4221 private void validatePCData(String text)
4222 throws XMLStreamException
4224 // VC: Element Valid
4225 switch (currentContentModel.type)
4227 case ContentModel.EMPTY:
4228 error("character data found in empty element", text);
4229 break;
4230 case ContentModel.ELEMENT:
4231 boolean white = true;
4232 int len = text.length();
4233 for (int i = 0; i < len; i++)
4235 char c = text.charAt(i);
4236 if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
4238 white = false;
4239 break;
4242 if (!white)
4243 error("character data found in element with element content", text);
4244 else if (xmlStandalone == Boolean.TRUE && currentContentModel.external)
4245 // VC: Standalone Document Declaration
4246 error("whitespace in element content of externally declared " +
4247 "element in standalone document");
4248 break;
4253 * Validates the specified validation context (list of child elements)
4254 * against the element content model for the current element.
4256 private void validateElementContent(ElementContentModel model,
4257 LinkedList children)
4258 throws XMLStreamException
4260 // Use regular expression
4261 CPStringBuilder buf = new CPStringBuilder();
4262 for (Iterator i = children.iterator(); i.hasNext(); )
4264 buf.append((String) i.next());
4265 buf.append(' ');
4267 String c = buf.toString();
4268 String regex = createRegularExpression(model);
4269 if (!c.matches(regex))
4270 error("element content "+model.text+" does not match expression "+regex, c);
4274 * Creates the regular expression used to validate an element content
4275 * model.
4277 private String createRegularExpression(ElementContentModel model)
4279 if (model.regex == null)
4281 CPStringBuilder buf = new CPStringBuilder();
4282 buf.append('(');
4283 for (Iterator i = model.contentParticles.iterator(); i.hasNext(); )
4285 ContentParticle cp = (ContentParticle) i.next();
4286 if (cp.content instanceof String)
4288 buf.append('(');
4289 buf.append((String) cp.content);
4290 buf.append(' ');
4291 buf.append(')');
4292 if (cp.max == -1)
4294 if (cp.min == 0)
4295 buf.append('*');
4296 else
4297 buf.append('+');
4299 else if (cp.min == 0)
4300 buf.append('?');
4302 else
4304 ElementContentModel ecm = (ElementContentModel) cp.content;
4305 buf.append(createRegularExpression(ecm));
4307 if (model.or && i.hasNext())
4308 buf.append('|');
4310 buf.append(')');
4311 if (model.max == -1)
4313 if (model.min == 0)
4314 buf.append('*');
4315 else
4316 buf.append('+');
4318 else if (model.min == 0)
4319 buf.append('?');
4320 model.regex = buf.toString();
4322 return model.regex;
4326 * Performs validation of a document type declaration event.
4328 void validateDoctype()
4329 throws XMLStreamException
4331 for (Iterator i = doctype.entityIterator(); i.hasNext(); )
4333 Map.Entry entry = (Map.Entry) i.next();
4334 Object entity = entry.getValue();
4335 if (entity instanceof ExternalIds)
4337 ExternalIds ids = (ExternalIds) entity;
4338 if (ids.notationName != null)
4340 // VC: Notation Declared
4341 ExternalIds notation = doctype.getNotation(ids.notationName);
4342 if (notation == null)
4343 error("Notation name must match the declared name of a " +
4344 "notation", ids.notationName);
4351 * Simple test harness for reading an XML file.
4352 * args[0] is the filename of the XML file
4353 * If args[1] is "-x", enable XInclude processing
4355 public static void main(String[] args)
4356 throws Exception
4358 boolean validating = false;
4359 boolean namespaceAware = false;
4360 boolean xIncludeAware = false;
4361 int pos = 0;
4362 while (pos < args.length && args[pos].startsWith("-"))
4364 if ("-x".equals(args[pos]))
4365 xIncludeAware = true;
4366 else if ("-v".equals(args[pos]))
4367 validating = true;
4368 else if ("-n".equals(args[pos]))
4369 namespaceAware = true;
4370 pos++;
4372 if (pos >= args.length)
4374 System.out.println("Syntax: XMLParser [-n] [-v] [-x] <file> [<file2> [...]]");
4375 System.out.println("\t-n: use namespace aware mode");
4376 System.out.println("\t-v: use validating parser");
4377 System.out.println("\t-x: use XInclude aware mode");
4378 System.exit(2);
4380 while (pos < args.length)
4382 XMLParser p = new XMLParser(new java.io.FileInputStream(args[pos]),
4383 absolutize(null, args[pos]),
4384 validating, // validating
4385 namespaceAware, // namespaceAware
4386 true, // coalescing,
4387 true, // replaceERefs
4388 true, // externalEntities
4389 true, // supportDTD
4390 true, // baseAware
4391 true, // stringInterning
4392 true, // extendedEventTypes
4393 null,
4394 null);
4395 XMLStreamReader reader = p;
4396 if (xIncludeAware)
4397 reader = new XIncludeFilter(p, args[pos], true, true, true);
4400 int event;
4401 //do
4402 while (reader.hasNext())
4404 event = reader.next();
4405 Location loc = reader.getLocation();
4406 System.out.print(loc.getLineNumber() + ":" +
4407 loc.getColumnNumber() + " ");
4408 switch (event)
4410 case XMLStreamConstants.START_DOCUMENT:
4411 System.out.println("START_DOCUMENT version=" +
4412 reader.getVersion() +
4413 " encoding=" +
4414 reader.getEncoding());
4415 break;
4416 case XMLStreamConstants.END_DOCUMENT:
4417 System.out.println("END_DOCUMENT");
4418 break;
4419 case XMLStreamConstants.START_ELEMENT:
4420 System.out.println("START_ELEMENT " +
4421 reader.getName());
4422 int l = reader.getNamespaceCount();
4423 for (int i = 0; i < l; i++)
4424 System.out.println("\tnamespace " +
4425 reader.getNamespacePrefix(i) + "='" +
4426 reader.getNamespaceURI(i)+"'");
4427 l = reader.getAttributeCount();
4428 for (int i = 0; i < l; i++)
4429 System.out.println("\tattribute " +
4430 reader.getAttributeName(i) + "='" +
4431 reader.getAttributeValue(i) + "'");
4432 break;
4433 case XMLStreamConstants.END_ELEMENT:
4434 System.out.println("END_ELEMENT " + reader.getName());
4435 break;
4436 case XMLStreamConstants.CHARACTERS:
4437 System.out.println("CHARACTERS '" +
4438 encodeText(reader.getText()) + "'");
4439 break;
4440 case XMLStreamConstants.CDATA:
4441 System.out.println("CDATA '" +
4442 encodeText(reader.getText()) + "'");
4443 break;
4444 case XMLStreamConstants.SPACE:
4445 System.out.println("SPACE '" +
4446 encodeText(reader.getText()) + "'");
4447 break;
4448 case XMLStreamConstants.DTD:
4449 System.out.println("DTD " + reader.getText());
4450 break;
4451 case XMLStreamConstants.ENTITY_REFERENCE:
4452 System.out.println("ENTITY_REFERENCE " + reader.getText());
4453 break;
4454 case XMLStreamConstants.COMMENT:
4455 System.out.println("COMMENT '" +
4456 encodeText(reader.getText()) + "'");
4457 break;
4458 case XMLStreamConstants.PROCESSING_INSTRUCTION:
4459 System.out.println("PROCESSING_INSTRUCTION " +
4460 reader.getPITarget() + " " +
4461 reader.getPIData());
4462 break;
4463 case START_ENTITY:
4464 System.out.println("START_ENTITY " + reader.getText());
4465 break;
4466 case END_ENTITY:
4467 System.out.println("END_ENTITY " + reader.getText());
4468 break;
4469 default:
4470 System.out.println("Unknown event: " + event);
4474 catch (XMLStreamException e)
4476 Location l = reader.getLocation();
4477 System.out.println("At line "+l.getLineNumber()+
4478 ", column "+l.getColumnNumber()+
4479 " of "+l.getSystemId());
4480 throw e;
4482 pos++;
4487 * Escapes control characters in the specified text. For debugging.
4489 private static String encodeText(String text)
4491 CPStringBuilder b = new CPStringBuilder();
4492 int len = text.length();
4493 for (int i = 0; i < len; i++)
4495 char c = text.charAt(i);
4496 switch (c)
4498 case '\t':
4499 b.append("\\t");
4500 break;
4501 case '\n':
4502 b.append("\\n");
4503 break;
4504 case '\r':
4505 b.append("\\r");
4506 break;
4507 default:
4508 b.append(c);
4511 return b.toString();
4515 * An attribute instance.
4517 class Attribute
4521 * Attribute name.
4523 final String name;
4526 * Attribute type as declared in the DTD, or CDATA otherwise.
4528 final String type;
4531 * Whether the attribute was specified or defaulted.
4533 final boolean specified;
4536 * The attribute value.
4538 final String value;
4541 * The namespace prefix.
4543 final String prefix;
4546 * The namespace local-name.
4548 final String localName;
4550 Attribute(String name, String type, boolean specified, String value)
4552 this.name = name;
4553 this.type = type;
4554 this.specified = specified;
4555 this.value = value;
4556 int ci = name.indexOf(':');
4557 if (ci == -1)
4559 prefix = null;
4560 localName = intern(name);
4562 else
4564 prefix = intern(name.substring(0, ci));
4565 localName = intern(name.substring(ci + 1));
4569 public boolean equals(Object other)
4571 if (other instanceof Attribute)
4573 Attribute a = (Attribute) other;
4574 if (namespaceAware)
4576 if (!a.localName.equals(localName))
4577 return false;
4578 String auri = getNamespaceURI(a.prefix);
4579 String uri = getNamespaceURI(prefix);
4580 if (uri == null && (auri == null ||
4581 (input.xml11 && "".equals(auri))))
4582 return true;
4583 if (uri != null)
4585 if ("".equals(uri) && input.xml11 && "".equals(auri))
4586 return true;
4587 return uri.equals(auri);
4589 return false;
4591 else
4592 return a.name.equals(name);
4594 return false;
4597 public String toString()
4599 CPStringBuilder buf = new CPStringBuilder(getClass().getName());
4600 buf.append('[');
4601 buf.append("name=");
4602 buf.append(name);
4603 if (value != null)
4605 buf.append(",value=");
4606 buf.append(value);
4608 if (type != null)
4610 buf.append(",type=");
4611 buf.append(type);
4613 if (specified)
4614 buf.append(",specified");
4615 buf.append(']');
4616 return buf.toString();
4622 * Representation of a DTD.
4624 class Doctype
4628 * Name of the root element.
4630 final String rootName;
4633 * Public ID, if any, of external subset.
4635 final String publicId;
4638 * System ID (URL), if any, of external subset.
4640 final String systemId;
4643 * Map of element names to content models.
4645 private final LinkedHashMap elements = new LinkedHashMap();
4648 * Map of element names to maps of attribute declarations.
4650 private final LinkedHashMap attlists = new LinkedHashMap();
4653 * Map of entity names to entities (String or ExternalIds).
4655 private final LinkedHashMap entities = new LinkedHashMap();
4658 * Map of notation names to ExternalIds.
4660 private final LinkedHashMap notations = new LinkedHashMap();
4663 * Map of anonymous keys to comments.
4665 private final LinkedHashMap comments = new LinkedHashMap();
4668 * Map of anonymous keys to processing instructions (String[2]
4669 * containing {target, data}).
4671 private final LinkedHashMap pis = new LinkedHashMap();
4674 * List of keys to all markup entries in the DTD.
4676 private final LinkedList entries = new LinkedList();
4679 * Set of the entities defined in the external subset.
4681 private final HashSet externalEntities = new HashSet();
4684 * Set of the notations defined in the external subset.
4686 private final HashSet externalNotations = new HashSet();
4689 * Counter for making anonymous keys.
4691 private int anon = 1;
4694 * Constructor.
4696 Doctype(String rootName, String publicId, String systemId)
4698 this.rootName = rootName;
4699 this.publicId = publicId;
4700 this.systemId = systemId;
4704 * Adds an element declaration.
4705 * @param name the element name
4706 * @param text the content model text
4707 * @param model the parsed content model
4709 void addElementDecl(String name, String text, ContentModel model)
4711 if (elements.containsKey(name))
4712 return;
4713 model.text = text;
4714 model.external = (inputStack.size() != 1);
4715 elements.put(name, model);
4716 entries.add("E" + name);
4720 * Adds an attribute declaration.
4721 * @param ename the element name
4722 * @param aname the attribute name
4723 * @param decl the attribute declaration details
4725 void addAttributeDecl(String ename, String aname, AttributeDecl decl)
4727 LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4728 if (attlist == null)
4730 attlist = new LinkedHashMap();
4731 attlists.put(ename, attlist);
4733 else if (attlist.containsKey(aname))
4734 return;
4735 attlist.put(aname, decl);
4736 String key = "A" + ename;
4737 if (!entries.contains(key))
4738 entries.add(key);
4742 * Adds an entity declaration.
4743 * @param name the entity name
4744 * @param text the entity replacement text
4745 * @param inExternalSubset if we are in the exernal subset
4747 void addEntityDecl(String name, String text, boolean inExternalSubset)
4749 if (entities.containsKey(name))
4750 return;
4751 entities.put(name, text);
4752 entries.add("e" + name);
4753 if (inExternalSubset)
4754 externalEntities.add(name);
4758 * Adds an entity declaration.
4759 * @param name the entity name
4760 * @param ids the external IDs
4761 * @param inExternalSubset if we are in the exernal subset
4763 void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset)
4765 if (entities.containsKey(name))
4766 return;
4767 entities.put(name, ids);
4768 entries.add("e" + name);
4769 if (inExternalSubset)
4770 externalEntities.add(name);
4774 * Adds a notation declaration.
4775 * @param name the notation name
4776 * @param ids the external IDs
4777 * @param inExternalSubset if we are in the exernal subset
4779 void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset)
4781 if (notations.containsKey(name))
4782 return;
4783 notations.put(name, ids);
4784 entries.add("n" + name);
4785 if (inExternalSubset)
4786 externalNotations.add(name);
4790 * Adds a comment.
4792 void addComment(String text)
4794 String key = Integer.toString(anon++);
4795 comments.put(key, text);
4796 entries.add("c" + key);
4800 * Adds a processing instruction.
4802 void addPI(String target, String data)
4804 String key = Integer.toString(anon++);
4805 pis.put(key, new String[] {target, data});
4806 entries.add("p" + key);
4810 * Returns the content model for the specified element.
4811 * @param name the element name
4813 ContentModel getElementModel(String name)
4815 return (ContentModel) elements.get(name);
4819 * Returns the attribute definition for the given attribute
4820 * @param ename the element name
4821 * @param aname the attribute name
4823 AttributeDecl getAttributeDecl(String ename, String aname)
4825 LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4826 return (attlist == null) ? null : (AttributeDecl) attlist.get(aname);
4830 * Indicates whether the specified attribute was declared in the DTD.
4831 * @param ename the element name
4832 * @param aname the attribute name
4834 boolean isAttributeDeclared(String ename, String aname)
4836 LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4837 return (attlist == null) ? false : attlist.containsKey(aname);
4841 * Returns an iterator over the entries in the attribute list for the
4842 * given element.
4843 * @param ename the element name
4845 Iterator attlistIterator(String ename)
4847 LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4848 return (attlist == null) ? Collections.EMPTY_LIST.iterator() :
4849 attlist.entrySet().iterator();
4853 * Returns the entity (String or ExternalIds) for the given entity name.
4855 Object getEntity(String name)
4857 return entities.get(name);
4861 * Indicates whether the specified entity was declared in the external
4862 * subset.
4864 boolean isEntityExternal(String name)
4866 return externalEntities.contains(name);
4870 * Returns an iterator over the entity map entries.
4872 Iterator entityIterator()
4874 return entities.entrySet().iterator();
4878 * Returns the notation IDs for the given notation name.
4880 ExternalIds getNotation(String name)
4882 return (ExternalIds) notations.get(name);
4886 * Indicates whether the specified notation was declared in the external
4887 * subset.
4889 boolean isNotationExternal(String name)
4891 return externalNotations.contains(name);
4895 * Returns the comment associated with the specified (anonymous) key.
4897 String getComment(String key)
4899 return (String) comments.get(key);
4903 * Returns the processing instruction associated with the specified
4904 * (anonymous) key.
4906 String[] getPI(String key)
4908 return (String[]) pis.get(key);
4912 * Returns an iterator over the keys of the markup entries in this DTD,
4913 * in the order declared.
4915 Iterator entryIterator()
4917 return entries.iterator();
4923 * Combination of an ExternalID and an optional NDataDecl.
4925 class ExternalIds
4929 * The public ID.
4931 String publicId;
4934 * The system ID.
4936 String systemId;
4939 * The notation name declared with the NDATA keyword.
4941 String notationName;
4945 * A content model.
4947 abstract class ContentModel
4949 static final int EMPTY = 0;
4950 static final int ANY = 1;
4951 static final int ELEMENT = 2;
4952 static final int MIXED = 3;
4954 int min;
4955 int max;
4956 final int type;
4957 String text;
4958 boolean external;
4960 ContentModel(int type)
4962 this.type = type;
4963 min = 1;
4964 max = 1;
4970 * The EMPTY content model.
4972 class EmptyContentModel
4973 extends ContentModel
4976 EmptyContentModel()
4978 super(ContentModel.EMPTY);
4979 min = 0;
4980 max = 0;
4986 * The ANY content model.
4988 class AnyContentModel
4989 extends ContentModel
4992 AnyContentModel()
4994 super(ContentModel.ANY);
4995 min = 0;
4996 max = -1;
5002 * An element content model.
5004 class ElementContentModel
5005 extends ContentModel
5008 LinkedList contentParticles;
5009 boolean or;
5010 String regex; // regular expression cache
5012 ElementContentModel()
5014 super(ContentModel.ELEMENT);
5015 contentParticles = new LinkedList();
5018 void addContentParticle(ContentParticle cp)
5020 contentParticles.add(cp);
5025 class ContentParticle
5028 int min = 1;
5029 int max = 1;
5030 Object content; // Name (String) or ElementContentModel
5035 * A mixed content model.
5037 class MixedContentModel
5038 extends ContentModel
5041 private HashSet names;
5043 MixedContentModel()
5045 super(ContentModel.MIXED);
5046 names = new HashSet();
5049 void addName(String name)
5051 names.add(name);
5054 boolean containsName(String name)
5056 return names.contains(name);
5062 * An attribute definition.
5064 class AttributeDecl
5068 * The attribute type (CDATA, ID, etc).
5070 final String type;
5073 * The default value.
5075 final String value;
5078 * The value type (#FIXED, #IMPLIED, etc).
5080 final int valueType;
5083 * The enumeration text.
5085 final String enumeration;
5088 * The enumeration tokens.
5090 final HashSet values;
5093 * Whether this attribute declaration occurred in the external subset.
5095 final boolean external;
5097 AttributeDecl(String type, String value,
5098 int valueType, String enumeration,
5099 HashSet values, boolean external)
5101 this.type = type;
5102 this.value = value;
5103 this.valueType = valueType;
5104 this.enumeration = enumeration;
5105 this.values = values;
5106 this.external = external;
5112 * An XML input source.
5114 static class Input
5115 implements Location
5118 int line = 1, markLine;
5119 int column, markColumn;
5120 int offset, markOffset;
5121 final String publicId, systemId, name;
5122 final boolean report; // report start- and end-entity
5123 final boolean normalize; // normalize CR, etc to LF
5125 InputStream in;
5126 Reader reader;
5127 UnicodeReader unicodeReader;
5128 boolean initialized;
5129 boolean encodingDetected;
5130 String inputEncoding;
5131 boolean xml11;
5133 Input(InputStream in, Reader reader, String publicId, String systemId,
5134 String name, String inputEncoding, boolean report,
5135 boolean normalize)
5137 if (inputEncoding == null)
5138 inputEncoding = "UTF-8";
5139 this.inputEncoding = inputEncoding;
5140 this.publicId = publicId;
5141 this.systemId = systemId;
5142 this.name = name;
5143 this.report = report;
5144 this.normalize = normalize;
5145 if (in != null)
5147 if (reader != null)
5148 throw new IllegalStateException("both byte and char streams "+
5149 "specified");
5150 if (normalize)
5151 in = new CRLFInputStream(in);
5152 in = new BufferedInputStream(in);
5153 this.in = in;
5155 else
5157 this.reader = normalize ? new CRLFReader(reader) : reader;
5158 unicodeReader = new UnicodeReader(this.reader);
5160 initialized = false;
5163 // -- Location --
5165 public int getCharacterOffset()
5167 return offset;
5170 public int getColumnNumber()
5172 return column;
5175 public int getLineNumber()
5177 return line;
5180 public String getPublicId()
5182 return publicId;
5185 public String getSystemId()
5187 return systemId;
5190 void init()
5191 throws IOException
5193 if (initialized)
5194 return;
5195 if (in != null)
5196 detectEncoding();
5197 initialized = true;
5200 void mark(int len)
5201 throws IOException
5203 markOffset = offset;
5204 markLine = line;
5205 markColumn = column;
5206 if (unicodeReader != null)
5207 unicodeReader.mark(len);
5208 else
5209 in.mark(len);
5213 * Character read.
5215 int read()
5216 throws IOException
5218 offset++;
5219 int ret = (unicodeReader != null) ? unicodeReader.read() : in.read();
5220 if (normalize &&
5221 (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028))))
5223 // Normalize CR etc to LF
5224 ret = 0x0a;
5226 // Locator handling
5227 if (ret == 0x0a)
5229 line++;
5230 column = 0;
5232 else
5233 column++;
5234 return ret;
5238 * Block read.
5240 int read(int[] b, int off, int len)
5241 throws IOException
5243 int ret;
5244 if (unicodeReader != null)
5246 ret = unicodeReader.read(b, off, len);
5248 else
5250 byte[] b2 = new byte[len];
5251 ret = in.read(b2, 0, len);
5252 if (ret != -1)
5254 String s = new String(b2, 0, ret, inputEncoding);
5255 int[] c = UnicodeReader.toCodePointArray(s);
5256 ret = c.length;
5257 System.arraycopy(c, 0, b, off, ret);
5260 if (ret != -1)
5262 // Locator handling
5263 for (int i = 0; i < ret; i++)
5265 int c = b[off + i];
5266 if (normalize &&
5267 (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028))))
5269 // Normalize CR etc to LF
5270 c = 0x0a;
5271 b[off + i] = c;
5273 if (c == 0x0a)
5275 line++;
5276 column = 0;
5278 else
5279 column++;
5282 return ret;
5285 void reset()
5286 throws IOException
5288 if (unicodeReader != null)
5289 unicodeReader.reset();
5290 else
5291 in.reset();
5292 offset = markOffset;
5293 line = markLine;
5294 column = markColumn;
5297 // Detection of input encoding
5299 private static final int[] SIGNATURE_UCS_4_1234 =
5300 new int[] { 0x00, 0x00, 0x00, 0x3c };
5301 private static final int[] SIGNATURE_UCS_4_4321 =
5302 new int[] { 0x3c, 0x00, 0x00, 0x00 };
5303 private static final int[] SIGNATURE_UCS_4_2143 =
5304 new int[] { 0x00, 0x00, 0x3c, 0x00 };
5305 private static final int[] SIGNATURE_UCS_4_3412 =
5306 new int[] { 0x00, 0x3c, 0x00, 0x00 };
5307 private static final int[] SIGNATURE_UCS_2_12 =
5308 new int[] { 0xfe, 0xff };
5309 private static final int[] SIGNATURE_UCS_2_21 =
5310 new int[] { 0xff, 0xfe };
5311 private static final int[] SIGNATURE_UCS_2_12_NOBOM =
5312 new int[] { 0x00, 0x3c, 0x00, 0x3f };
5313 private static final int[] SIGNATURE_UCS_2_21_NOBOM =
5314 new int[] { 0x3c, 0x00, 0x3f, 0x00 };
5315 private static final int[] SIGNATURE_UTF_8 =
5316 new int[] { 0x3c, 0x3f, 0x78, 0x6d };
5317 private static final int[] SIGNATURE_UTF_8_BOM =
5318 new int[] { 0xef, 0xbb, 0xbf };
5321 * Detect the input encoding.
5323 private void detectEncoding()
5324 throws IOException
5326 int[] signature = new int[4];
5327 in.mark(4);
5328 for (int i = 0; i < 4; i++)
5329 signature[i] = in.read();
5330 in.reset();
5332 // 4-byte encodings
5333 if (equals(SIGNATURE_UCS_4_1234, signature))
5335 in.read();
5336 in.read();
5337 in.read();
5338 in.read();
5339 setInputEncoding("UTF-32BE");
5340 encodingDetected = true;
5342 else if (equals(SIGNATURE_UCS_4_4321, signature))
5344 in.read();
5345 in.read();
5346 in.read();
5347 in.read();
5348 setInputEncoding("UTF-32LE");
5349 encodingDetected = true;
5351 else if (equals(SIGNATURE_UCS_4_2143, signature) ||
5352 equals(SIGNATURE_UCS_4_3412, signature))
5353 throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering");
5355 // 2-byte encodings
5356 else if (equals(SIGNATURE_UCS_2_12, signature))
5358 in.read();
5359 in.read();
5360 setInputEncoding("UTF-16BE");
5361 encodingDetected = true;
5363 else if (equals(SIGNATURE_UCS_2_21, signature))
5365 in.read();
5366 in.read();
5367 setInputEncoding("UTF-16LE");
5368 encodingDetected = true;
5370 else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature))
5372 //setInputEncoding("UTF-16BE");
5373 throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5375 else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature))
5377 //setInputEncoding("UTF-16LE");
5378 throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5380 // ASCII-derived encodings
5381 else if (equals(SIGNATURE_UTF_8, signature))
5383 // UTF-8 input encoding implied, TextDecl
5385 else if (equals(SIGNATURE_UTF_8_BOM, signature))
5387 in.read();
5388 in.read();
5389 in.read();
5390 setInputEncoding("UTF-8");
5391 encodingDetected = true;
5395 private static boolean equals(int[] b1, int[] b2)
5397 for (int i = 0; i < b1.length; i++)
5399 if (b1[i] != b2[i])
5400 return false;
5402 return true;
5405 void setInputEncoding(String encoding)
5406 throws IOException
5408 if (encoding.equals(inputEncoding))
5409 return;
5410 if ("UTF-16".equalsIgnoreCase(encoding) &&
5411 inputEncoding.startsWith("UTF-16"))
5412 return;
5413 if (encodingDetected)
5414 throw new UnsupportedEncodingException("document is not in its " +
5415 "declared encoding " +
5416 inputEncoding +
5417 ": " + encoding);
5418 inputEncoding = encoding;
5419 finalizeEncoding();
5422 void finalizeEncoding()
5423 throws IOException
5425 if (reader != null)
5426 return;
5427 reader = new BufferedReader(new InputStreamReader(in, inputEncoding));
5428 unicodeReader = new UnicodeReader(reader);
5429 mark(1);