Imported GNU Classpath 0.20
[official-gcc.git] / libjava / classpath / gnu / xml / stream / XMLParser.java
blob6f10b9303dc7dae6064389b664cec484c88781e2
1 /* XMLParser.java --
2 Copyright (C) 2005 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version.
38 Partly derived from code which carried the following notice:
40 Copyright (c) 1997, 1998 by Microstar Software Ltd.
42 AElfred is free for both commercial and non-commercial use and
43 redistribution, provided that Microstar's copyright and disclaimer are
44 retained intact. You are free to modify AElfred for your own use and
45 to redistribute AElfred with your modifications, provided that the
46 modifications are clearly documented.
48 This program is distributed in the hope that it will be useful, but
49 WITHOUT ANY WARRANTY; without even the implied warranty of
50 merchantability or fitness for a particular purpose. Please use it AT
51 YOUR OWN RISK.
54 package gnu.xml.stream;
56 import java.io.BufferedInputStream;
57 import java.io.EOFException;
58 import java.io.File;
59 import java.io.InputStream;
60 import java.io.InputStreamReader;
61 import java.io.IOException;
62 import java.io.Reader;
63 import java.io.StringReader;
64 import java.io.UnsupportedEncodingException;
65 import java.net.MalformedURLException;
66 import java.net.URL;
67 import java.util.ArrayList;
68 import java.util.Collections;
69 import java.util.HashSet;
70 import java.util.Iterator;
71 import java.util.LinkedHashMap;
72 import java.util.LinkedList;
73 import java.util.Map;
74 import java.util.NoSuchElementException;
75 import java.util.StringTokenizer;
77 import javax.xml.XMLConstants;
78 import javax.xml.namespace.NamespaceContext;
79 import javax.xml.namespace.QName;
80 import javax.xml.stream.Location;
81 import javax.xml.stream.XMLInputFactory;
82 import javax.xml.stream.XMLReporter;
83 import javax.xml.stream.XMLResolver;
84 import javax.xml.stream.XMLStreamConstants;
85 import javax.xml.stream.XMLStreamException;
86 import javax.xml.stream.XMLStreamReader;
88 import gnu.java.net.CRLFInputStream;
90 /**
91 * An XML parser.
92 * This parser supports the following additional StAX properties:
93 * <table>
94 * <tr><td>gnu.xml.stream.stringInterning</td>
95 * <td>Boolean</td>
96 * <td>Indicates whether markup strings will be interned</td></tr>
97 * <tr><td>gnu.xml.stream.xmlBase</td>
98 * <td>Boolean</td>
99 * <td>Indicates whether XML Base processing will be performed</td></tr>
100 * <tr><td>gnu.xml.stream.baseURI</td>
101 * <td>String</td>
102 * <td>Returns the base URI of the current event</td></tr>
103 * </table>
105 * @see http://www.w3.org/TR/REC-xml/
106 * @see http://www.w3.org/TR/xml11/
107 * @see http://www.w3.org/TR/REC-xml-names
108 * @see http://www.w3.org/TR/xml-names11
109 * @see http://www.w3.org/TR/xmlbase/
111 * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
113 public class XMLParser
114 implements XMLStreamReader, NamespaceContext
117 // -- parser state machine states --
118 private static final int INIT = 0; // start state
119 private static final int PROLOG = 1; // in prolog
120 private static final int CONTENT = 2; // in content
121 private static final int EMPTY_ELEMENT = 3; // empty element state
122 private static final int MISC = 4; // in Misc (after root element)
124 // -- parameters for parsing literals --
125 private final static int LIT_ENTITY_REF = 2;
126 private final static int LIT_NORMALIZE = 4;
127 private final static int LIT_ATTRIBUTE = 8;
128 private final static int LIT_DISABLE_PE = 16;
129 private final static int LIT_DISABLE_CREF = 32;
130 private final static int LIT_DISABLE_EREF = 64;
131 private final static int LIT_PUBID = 256;
133 // -- types of attribute values --
134 final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
135 final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
136 final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
137 final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
138 final static int ATTRIBUTE_DEFAULT_FIXED = 34;
141 * The current input.
143 private Input input;
146 * Stack of inputs representing XML general entities.
147 * The input representing the XML input stream or reader is always the
148 * first element in this stack.
150 private LinkedList inputStack = new LinkedList();
153 * Stack of start-entity events to be reported.
155 private LinkedList startEntityStack = new LinkedList();
158 * Stack of end-entity events to be reported.
160 private LinkedList endEntityStack = new LinkedList();
163 * Current parser state within the main state machine.
165 private int state = INIT;
168 * The (type of the) current event.
170 private int event;
173 * Whether we are looking ahead. Used by hasNext.
175 private boolean lookahead;
178 * The element name stack. The first element in this stack will be the
179 * root element.
181 private LinkedList stack = new LinkedList();
184 * Stack of namespace contexts. These are maps specifying prefix-to-URI
185 * mappings. The first element in this stack is the most recent namespace
186 * context (i.e. the other way around from the element name stack).
188 private LinkedList namespaces = new LinkedList();
191 * The base-URI stack. This holds the base URI context for each element.
192 * The first element in this stack is the most recent context (i.e. the
193 * other way around from the element name stack).
195 private LinkedList bases = new LinkedList();
198 * The list of attributes for the current element, in the order defined in
199 * the XML stream.
201 private ArrayList attrs = new ArrayList();
204 * Buffer for text and character data.
206 private StringBuffer buf = new StringBuffer();
209 * Buffer for NMTOKEN strings (markup).
211 private StringBuffer nmtokenBuf = new StringBuffer();
214 * Buffer for string literals. (e.g. attribute values)
216 private StringBuffer literalBuf = new StringBuffer();
219 * Temporary Unicode character buffer used during character data reads.
221 private int[] tmpBuf = new int[1024];
224 * The element content model for the current element.
226 private ContentModel currentContentModel;
229 * The validation stack. This holds lists of the elements seen for each
230 * element, in order to determine whether the names and order of these
231 * elements match the content model for the element. The last entry in
232 * this stack represents the current element.
234 private LinkedList validationStack;
237 * These sets contain the IDs and the IDREFs seen in the document, to
238 * ensure that IDs are unique and that each IDREF refers to an ID in the
239 * document.
241 private HashSet ids, idrefs;
244 * The target and data associated with the current processing instruction
245 * event.
247 private String piTarget, piData;
250 * The XML version declared in the XML declaration.
252 private String xmlVersion;
255 * The encoding declared in the XML declaration.
257 private String xmlEncoding;
260 * The standalone value declared in the XML declaration.
262 private Boolean xmlStandalone;
265 * The document type definition.
267 Doctype doctype;
270 * State variables for determining parameter-entity expansion.
272 private boolean expandPE, peIsError;
275 * Whether this is a validating parser.
277 private final boolean validating;
280 * Whether strings representing markup will be interned.
282 private final boolean stringInterning;
285 * If true, CDATA sections will be merged with adjacent text nodes into a
286 * single event.
288 private final boolean coalescing;
291 * Whether to replace general entity references with their replacement
292 * text automatically during parsing.
293 * Otherwise entity-reference events will be issued.
295 private final boolean replaceERefs;
298 * Whether to support external entities.
300 private final boolean externalEntities;
303 * Whether to support DTDs.
305 private final boolean supportDTD;
308 * Whether to support XML namespaces. If true, namespace information will
309 * be available. Otherwise namespaces will simply be reported as ordinary
310 * attributes.
312 private final boolean namespaceAware;
315 * Whether to support XML Base. If true, URIs specified in xml:base
316 * attributes will be honoured when resolving external entities.
318 private final boolean baseAware;
321 * The reporter to receive parsing warnings.
323 final XMLReporter reporter;
326 * Callback interface for resolving external entities.
328 final XMLResolver resolver;
330 // -- Constants for testing the next kind of markup event --
331 private static final String TEST_START_ELEMENT = "<";
332 private static final String TEST_END_ELEMENT = "</";
333 private static final String TEST_COMMENT = "<!--";
334 private static final String TEST_PI = "<?";
335 private static final String TEST_CDATA = "<![CDATA[";
336 private static final String TEST_XML_DECL = "<?xml";
337 private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE";
338 private static final String TEST_ELEMENT_DECL = "<!ELEMENT";
339 private static final String TEST_ATTLIST_DECL = "<!ATTLIST";
340 private static final String TEST_ENTITY_DECL = "<!ENTITY";
341 private static final String TEST_NOTATION_DECL = "<!NOTATION";
342 private static final String TEST_KET = ">";
343 private static final String TEST_END_COMMENT = "--";
344 private static final String TEST_END_PI = "?>";
345 private static final String TEST_END_CDATA = "]]>";
348 * The general entities predefined by the XML specification.
350 private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap();
351 static
353 PREDEFINED_ENTITIES.put("amp", "&");
354 PREDEFINED_ENTITIES.put("lt", "<");
355 PREDEFINED_ENTITIES.put("gt", ">");
356 PREDEFINED_ENTITIES.put("apos", "'");
357 PREDEFINED_ENTITIES.put("quot", "\"");
361 * Creates a new XML parser for the given input stream.
362 * This constructor should be used where possible, as it allows the
363 * encoding of the XML data to be correctly determined from the stream.
364 * @param in the input stream
365 * @param systemId the URL from which the input stream was retrieved
366 * (necessary if there are external entities to be resolved)
367 * @param validating if the parser is to be a validating parser
368 * @param namespaceAware if the parser should support XML Namespaces
369 * @param coalescing if CDATA sections should be merged into adjacent text
370 * nodes
371 * @param replaceERefs if entity references should be automatically
372 * replaced by their replacement text (otherwise they will be reported as
373 * entity-reference events)
374 * @param externalEntities if external entities should be loaded
375 * @param supportDTD if support for the XML DTD should be enabled
376 * @param baseAware if the parser should support XML Base to resolve
377 * external entities
378 * @param stringInterning whether strings will be interned during parsing
379 * @param reporter the reporter to receive warnings during processing
380 * @param resolver the callback interface used to resolve external
381 * entities
383 public XMLParser(InputStream in, String systemId,
384 boolean validating,
385 boolean namespaceAware,
386 boolean coalescing,
387 boolean replaceERefs,
388 boolean externalEntities,
389 boolean supportDTD,
390 boolean baseAware,
391 boolean stringInterning,
392 XMLReporter reporter,
393 XMLResolver resolver)
395 this.validating = validating;
396 this.namespaceAware = namespaceAware;
397 this.coalescing = coalescing;
398 this.replaceERefs = replaceERefs;
399 this.externalEntities = externalEntities;
400 this.supportDTD = supportDTD;
401 this.baseAware = baseAware;
402 this.stringInterning = stringInterning;
403 this.reporter = reporter;
404 this.resolver = resolver;
405 if (validating)
407 validationStack = new LinkedList();
408 ids = new HashSet();
409 idrefs = new HashSet();
411 pushInput(new Input(in, null, null, systemId, null, null, false, true));
415 * Creates a new XML parser for the given character stream.
416 * This constructor is only available for compatibility with the JAXP
417 * APIs, which permit XML to be parsed from a character stream. Because
418 * the encoding specified by the character stream may conflict with that
419 * specified in the XML declaration, this method should be avoided where
420 * possible.
421 * @param in the input stream
422 * @param systemId the URL from which the input stream was retrieved
423 * (necessary if there are external entities to be resolved)
424 * @param validating if the parser is to be a validating parser
425 * @param namespaceAware if the parser should support XML Namespaces
426 * @param coalescing if CDATA sections should be merged into adjacent text
427 * nodes
428 * @param replaceERefs if entity references should be automatically
429 * replaced by their replacement text (otherwise they will be reported as
430 * entity-reference events)
431 * @param externalEntities if external entities should be loaded
432 * @param supportDTD if support for the XML DTD should be enabled
433 * @param baseAware if the parser should support XML Base to resolve
434 * external entities
435 * @param stringInterning whether strings will be interned during parsing
436 * @param reporter the reporter to receive warnings during processing
437 * @param resolver the callback interface used to resolve external
438 * entities
440 public XMLParser(Reader reader, String systemId,
441 boolean validating,
442 boolean namespaceAware,
443 boolean coalescing,
444 boolean replaceERefs,
445 boolean externalEntities,
446 boolean supportDTD,
447 boolean baseAware,
448 boolean stringInterning,
449 XMLReporter reporter,
450 XMLResolver resolver)
452 this.validating = validating;
453 this.namespaceAware = namespaceAware;
454 this.coalescing = coalescing;
455 this.replaceERefs = replaceERefs;
456 this.externalEntities = externalEntities;
457 this.supportDTD = supportDTD;
458 this.baseAware = baseAware;
459 this.stringInterning = stringInterning;
460 this.reporter = reporter;
461 this.resolver = resolver;
462 if (validating)
464 validationStack = new LinkedList();
465 ids = new HashSet();
466 idrefs = new HashSet();
468 pushInput(new Input(null, reader, null, systemId, null, null, false, true));
471 // -- NamespaceContext --
473 public String getNamespaceURI(String prefix)
475 if (XMLConstants.XML_NS_PREFIX.equals(prefix))
476 return XMLConstants.XML_NS_URI;
477 if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix))
478 return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
479 for (Iterator i = namespaces.iterator(); i.hasNext(); )
481 LinkedHashMap ctx = (LinkedHashMap) i.next();
482 String namespaceURI = (String) ctx.get(prefix);
483 if (namespaceURI != null)
484 return namespaceURI;
486 return null;
489 public String getPrefix(String namespaceURI)
491 if (XMLConstants.XML_NS_URI.equals(namespaceURI))
492 return XMLConstants.XML_NS_PREFIX;
493 if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
494 return XMLConstants.XMLNS_ATTRIBUTE;
495 for (Iterator i = namespaces.iterator(); i.hasNext(); )
497 LinkedHashMap ctx = (LinkedHashMap) i.next();
498 if (ctx.containsValue(namespaceURI))
500 for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
502 Map.Entry entry = (Map.Entry) i.next();
503 String uri = (String) entry.getValue();
504 if (uri.equals(namespaceURI))
505 return (String) entry.getKey();
509 return null;
512 public Iterator getPrefixes(String namespaceURI)
514 if (XMLConstants.XML_NS_URI.equals(namespaceURI))
515 return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator();
516 if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
517 return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator();
518 LinkedList acc = new LinkedList();
519 for (Iterator i = namespaces.iterator(); i.hasNext(); )
521 LinkedHashMap ctx = (LinkedHashMap) i.next();
522 if (ctx.containsValue(namespaceURI))
524 for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
526 Map.Entry entry = (Map.Entry) i.next();
527 String uri = (String) entry.getValue();
528 if (uri.equals(namespaceURI))
529 acc.add(entry.getKey());
533 return acc.iterator();
536 // -- XMLStreamReader --
538 public void close()
539 throws XMLStreamException
541 stack = null;
542 namespaces = null;
543 bases = null;
544 buf = null;
545 attrs = null;
546 doctype = null;
548 inputStack = null;
549 validationStack = null;
550 ids = null;
551 idrefs = null;
554 public NamespaceContext getNamespaceContext()
556 return this;
559 public int getAttributeCount()
561 return attrs.size();
564 public String getAttributeName(int index)
566 Attribute a = (Attribute) attrs.get(index);
567 return a.localName;
570 public String getAttributeNamespace(int index)
572 String prefix = getAttributePrefix(index);
573 return getNamespaceURI(prefix);
576 public String getAttributePrefix(int index)
578 Attribute a = (Attribute) attrs.get(index);
579 return a.prefix;
582 public QName getAttributeQName(int index)
584 Attribute a = (Attribute) attrs.get(index);
585 String namespaceURI = getNamespaceURI(a.prefix);
586 return new QName(namespaceURI, a.localName, a.prefix);
589 public String getAttributeType(int index)
591 Attribute a = (Attribute) attrs.get(index);
592 return a.type;
595 private String getAttributeType(String elementName, String attName)
597 if (doctype != null)
599 AttributeDecl att = doctype.getAttributeDecl(elementName, attName);
600 if (att != null)
601 return att.type;
603 return "CDATA";
606 public String getAttributeValue(int index)
608 Attribute a = (Attribute) attrs.get(index);
609 return a.value;
612 public String getAttributeValue(String namespaceURI, String localName)
614 for (Iterator i = attrs.iterator(); i.hasNext(); )
616 Attribute a = (Attribute) i.next();
617 if (a.localName.equals(localName))
619 String uri = getNamespaceURI(a.prefix);
620 if ((uri == null && namespaceURI == null) ||
621 (uri != null && uri.equals(namespaceURI)))
622 return a.value;
625 return null;
628 boolean isAttributeDeclared(int index)
630 if (doctype == null)
631 return false;
632 Attribute a = (Attribute) attrs.get(index);
633 String qn = ("".equals(a.prefix)) ? a.localName :
634 a.prefix + ":" + a.localName;
635 String elementName = buf.toString();
636 return doctype.isAttributeDeclared(elementName, qn);
639 public String getCharacterEncodingScheme()
641 return xmlEncoding;
644 public String getElementText()
645 throws XMLStreamException
647 if (event != XMLStreamConstants.START_ELEMENT)
648 throw new XMLStreamException("current event must be START_ELEMENT");
649 StringBuffer elementText = new StringBuffer();
650 int depth = stack.size();
651 while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth)
653 switch (next())
655 case XMLStreamConstants.CHARACTERS:
656 case XMLStreamConstants.SPACE:
657 elementText.append(buf.toString());
660 return elementText.toString();
663 public String getEncoding()
665 return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding;
668 public int getEventType()
670 return event;
673 public String getLocalName()
675 switch (event)
677 case XMLStreamConstants.START_ELEMENT:
678 case XMLStreamConstants.END_ELEMENT:
679 String qName = buf.toString();
680 int ci = qName.indexOf(':');
681 return (ci == -1) ? qName : qName.substring(ci + 1);
682 default:
683 return null;
687 public Location getLocation()
689 return input;
692 public QName getName()
694 switch (event)
696 case XMLStreamConstants.START_ELEMENT:
697 case XMLStreamConstants.END_ELEMENT:
698 String qName = buf.toString();
699 int ci = qName.indexOf(':');
700 String localName = (ci == -1) ? qName : qName.substring(ci + 1);
701 String prefix = (ci == -1) ?
702 (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
703 qName.substring(0, ci);
704 String namespaceURI = getNamespaceURI(prefix);
705 return new QName(namespaceURI, localName, prefix);
706 default:
707 return null;
711 public int getNamespaceCount()
713 if (!namespaceAware)
714 return 0;
715 switch (event)
717 case XMLStreamConstants.START_ELEMENT:
718 case XMLStreamConstants.END_ELEMENT:
719 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
720 return ctx.size();
721 default:
722 return 0;
726 public String getNamespacePrefix(int index)
728 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
729 int count = 0;
730 for (Iterator i = ctx.keySet().iterator(); i.hasNext(); )
732 String prefix = (String) i.next();
733 if (count++ == index)
734 return prefix;
736 return null;
739 public String getNamespaceURI()
741 switch (event)
743 case XMLStreamConstants.START_ELEMENT:
744 case XMLStreamConstants.END_ELEMENT:
745 String qName = buf.toString();
746 int ci = qName.indexOf(':');
747 if (ci == -1)
748 return null;
749 String prefix = qName.substring(0, ci);
750 return getNamespaceURI(prefix);
751 default:
752 return null;
756 public String getNamespaceURI(int index)
758 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
759 int count = 0;
760 for (Iterator i = ctx.values().iterator(); i.hasNext(); )
762 String uri = (String) i.next();
763 if (count++ == index)
764 return uri;
766 return null;
769 public String getPIData()
771 return piData;
774 public String getPITarget()
776 return piTarget;
779 public String getPrefix()
781 switch (event)
783 case XMLStreamConstants.START_ELEMENT:
784 case XMLStreamConstants.END_ELEMENT:
785 String qName = buf.toString();
786 int ci = qName.indexOf(':');
787 return (ci == -1) ?
788 (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
789 qName.substring(0, ci);
790 default:
791 return null;
795 public Object getProperty(String name)
796 throws IllegalArgumentException
798 if (name == null)
799 throw new IllegalArgumentException("name is null");
800 if (XMLInputFactory.ALLOCATOR.equals(name))
801 return null;
802 if (XMLInputFactory.IS_COALESCING.equals(name))
803 return coalescing ? Boolean.TRUE : Boolean.FALSE;
804 if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name))
805 return namespaceAware ? Boolean.TRUE : Boolean.FALSE;
806 if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name))
807 return replaceERefs ? Boolean.TRUE : Boolean.FALSE;
808 if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name))
809 return externalEntities ? Boolean.TRUE : Boolean.FALSE;
810 if (XMLInputFactory.IS_VALIDATING.equals(name))
811 return Boolean.FALSE;
812 if (XMLInputFactory.REPORTER.equals(name))
813 return reporter;
814 if (XMLInputFactory.RESOLVER.equals(name))
815 return resolver;
816 if (XMLInputFactory.SUPPORT_DTD.equals(name))
817 return supportDTD ? Boolean.TRUE : Boolean.FALSE;
818 if ("gnu.xml.stream.stringInterning".equals(name))
819 return stringInterning ? Boolean.TRUE : Boolean.FALSE;
820 if ("gnu.xml.stream.xmlBase".equals(name))
821 return baseAware ? Boolean.TRUE : Boolean.FALSE;
822 if ("gnu.xml.stream.baseURI".equals(name))
823 return getXMLBase();
824 return null;
827 public String getText()
829 return buf.toString();
832 public char[] getTextCharacters()
834 return buf.toString().toCharArray();
837 public int getTextCharacters(int sourceStart, char[] target,
838 int targetStart, int length)
839 throws XMLStreamException
841 length = Math.min(sourceStart + buf.length(), length);
842 int sourceEnd = sourceStart + length;
843 buf.getChars(sourceStart, sourceEnd, target, targetStart);
844 return length;
847 public int getTextLength()
849 return buf.length();
852 public int getTextStart()
854 return 0;
857 public String getVersion()
859 return (xmlVersion == null) ? "1.0" : xmlVersion;
862 public boolean hasName()
864 switch (event)
866 case XMLStreamConstants.START_ELEMENT:
867 case XMLStreamConstants.END_ELEMENT:
868 return true;
869 default:
870 return false;
874 public boolean hasText()
876 switch (event)
878 case XMLStreamConstants.CHARACTERS:
879 case XMLStreamConstants.SPACE:
880 return true;
881 default:
882 return false;
886 public boolean isAttributeSpecified(int index)
888 Attribute a = (Attribute) attrs.get(index);
889 return a.specified;
892 public boolean isCharacters()
894 return (event == XMLStreamConstants.CHARACTERS);
897 public boolean isEndElement()
899 return (event == XMLStreamConstants.END_ELEMENT);
902 public boolean isStandalone()
904 return Boolean.TRUE.equals(xmlStandalone);
907 public boolean isStartElement()
909 return (event == XMLStreamConstants.START_ELEMENT);
912 public boolean isWhiteSpace()
914 return (event == XMLStreamConstants.SPACE);
917 public int nextTag()
918 throws XMLStreamException
922 switch (next())
924 case XMLStreamConstants.START_ELEMENT:
925 case XMLStreamConstants.END_ELEMENT:
926 case XMLStreamConstants.CHARACTERS:
927 case XMLStreamConstants.SPACE:
928 case XMLStreamConstants.COMMENT:
929 case XMLStreamConstants.PROCESSING_INSTRUCTION:
930 break;
931 default:
932 throw new XMLStreamException("Unexpected event type: " + event);
935 while (event != XMLStreamConstants.START_ELEMENT &&
936 event != XMLStreamConstants.END_ELEMENT);
937 return event;
940 public void require(int type, String namespaceURI, String localName)
941 throws XMLStreamException
943 if (event != type)
944 throw new XMLStreamException("Current event type is " + event);
945 if (event == XMLStreamConstants.START_ELEMENT ||
946 event == XMLStreamConstants.END_ELEMENT)
948 String ln = getLocalName();
949 if (!ln.equals(localName))
950 throw new XMLStreamException("Current local-name is " + ln);
951 String uri = getNamespaceURI();
952 if ((uri == null && namespaceURI != null) ||
953 (uri != null && !uri.equals(namespaceURI)))
954 throw new XMLStreamException("Current namespace URI is " + uri);
958 public boolean standaloneSet()
960 return (xmlStandalone != null);
963 public boolean hasNext()
964 throws XMLStreamException
966 if (event == XMLStreamConstants.END_DOCUMENT)
967 return false;
968 if (!lookahead)
970 next();
971 lookahead = true;
973 return event != -1;
976 public int next()
977 throws XMLStreamException
979 if (lookahead)
981 lookahead = false;
982 return event;
984 if (event == XMLStreamConstants.END_ELEMENT)
986 // Pop namespace context
987 if (namespaceAware)
988 namespaces.removeFirst();
989 // Pop base context
990 if (baseAware)
991 bases.removeFirst();
993 if (!startEntityStack.isEmpty())
995 String entityName = (String) startEntityStack.removeFirst();
996 buf.setLength(0);
997 buf.append(entityName);
998 event = XMLStreamConstants.START_ENTITY;
999 return event;
1001 else if (!endEntityStack.isEmpty())
1003 String entityName = (String) endEntityStack.removeFirst();
1004 buf.setLength(0);
1005 buf.append(entityName);
1006 event = XMLStreamConstants.END_ENTITY;
1007 return event;
1011 if (!input.initialized)
1012 input.init();
1013 switch (state)
1015 case CONTENT:
1016 if (tryRead(TEST_END_ELEMENT))
1018 readEndElement();
1019 if (stack.isEmpty())
1020 state = MISC;
1021 event = XMLStreamConstants.END_ELEMENT;
1023 else if (tryRead(TEST_COMMENT))
1025 readComment(false);
1026 event = XMLStreamConstants.COMMENT;
1028 else if (tryRead(TEST_PI))
1030 readPI(false);
1031 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1033 else if (tryRead(TEST_CDATA))
1035 readCDSect();
1036 event = XMLStreamConstants.CDATA;
1038 else if (tryRead(TEST_START_ELEMENT))
1040 state = readStartElement();
1041 event = XMLStreamConstants.START_ELEMENT;
1043 else
1045 // Check for character reference or predefined entity
1046 mark(8);
1047 int c = readCh();
1048 if (c == 0x26) // '&'
1050 c = readCh();
1051 if (c == 0x23) // '#'
1053 reset();
1054 event = readCharData(null);
1056 else
1058 // entity reference
1059 reset();
1060 readCh(); // &
1061 readReference();
1062 String ref = buf.toString();
1063 String text = (String) PREDEFINED_ENTITIES.get(ref);
1064 if (text != null)
1066 event = readCharData(text);
1068 else if (replaceERefs && !isUnparsedEntity(ref))
1070 // this will report a start-entity event
1071 boolean external = false;
1072 if (doctype != null)
1074 Object entity = doctype.getEntity(ref);
1075 if (entity instanceof ExternalIds)
1076 external = true;
1078 expandEntity(ref, false, external);
1079 event = next();
1081 else
1083 event = XMLStreamConstants.ENTITY_REFERENCE;
1087 else
1089 reset();
1090 event = readCharData(null);
1091 if (validating && doctype != null)
1092 validatePCData(buf.toString());
1095 break;
1096 case EMPTY_ELEMENT:
1097 String elementName = (String) stack.removeLast();
1098 buf.setLength(0);
1099 buf.append(elementName);
1100 state = stack.isEmpty() ? MISC : CONTENT;
1101 event = XMLStreamConstants.END_ELEMENT;
1102 if (validating && doctype != null)
1103 endElementValidationHook();
1104 break;
1105 case INIT: // XMLDecl?
1106 if (tryRead(TEST_XML_DECL))
1107 readXMLDecl();
1108 input.finalizeEncoding();
1109 event = XMLStreamConstants.START_DOCUMENT;
1110 state = PROLOG;
1111 break;
1112 case PROLOG: // Misc* (doctypedecl Misc*)?
1113 skipWhitespace();
1114 if (doctype == null && tryRead(TEST_DOCTYPE_DECL))
1116 readDoctypeDecl();
1117 event = XMLStreamConstants.DTD;
1119 else if (tryRead(TEST_COMMENT))
1121 readComment(false);
1122 event = XMLStreamConstants.COMMENT;
1124 else if (tryRead(TEST_PI))
1126 readPI(false);
1127 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1129 else if (tryRead(TEST_START_ELEMENT))
1131 state = readStartElement();
1132 event = XMLStreamConstants.START_ELEMENT;
1134 else
1136 int c = readCh();
1137 error("no root element: U+" + Integer.toHexString(c));
1139 break;
1140 case MISC: // Comment | PI | S
1141 skipWhitespace();
1142 if (tryRead(TEST_COMMENT))
1144 readComment(false);
1145 event = XMLStreamConstants.COMMENT;
1147 else if (tryRead(TEST_PI))
1149 readPI(false);
1150 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1152 else
1154 if (event == XMLStreamConstants.END_DOCUMENT)
1155 throw new NoSuchElementException();
1156 int c = readCh();
1157 if (c != -1)
1158 error("Only comments and PIs may appear after " +
1159 "the root element");
1160 event = XMLStreamConstants.END_DOCUMENT;
1162 break;
1163 default:
1164 event = -1;
1166 return event;
1168 catch (IOException e)
1170 XMLStreamException e2 = new XMLStreamException();
1171 e2.initCause(e);
1172 throw e2;
1176 // package private
1179 * Returns the current element name.
1181 String getCurrentElement()
1183 return (String) stack.getLast();
1186 // private
1188 private void mark(int limit)
1189 throws IOException
1191 input.mark(limit);
1194 private void reset()
1195 throws IOException
1197 input.reset();
1200 private int read()
1201 throws IOException
1203 return input.read();
1206 private int read(int[] b, int off, int len)
1207 throws IOException
1209 return input.read(b, off, len);
1213 * Parsed character read.
1215 private int readCh()
1216 throws IOException, XMLStreamException
1218 int c = read();
1219 if (expandPE && c == 0x25) // '%'
1221 if (peIsError)
1222 error("PE reference within decl in internal subset.");
1223 expandPEReference();
1224 return readCh();
1226 return c;
1230 * Reads the next character, ensuring it is the character specified.
1231 * @param delim the character to match
1232 * @exception XMLStreamException if the next character is not the
1233 * specified one
1235 private void require(char delim)
1236 throws IOException, XMLStreamException
1238 mark(1);
1239 int c = readCh();
1240 if (delim != c)
1242 reset();
1243 error("required character (got U+" + Integer.toHexString(c) + ")",
1244 new Character(delim));
1249 * Reads the next few characters, ensuring they match the string specified.
1250 * @param delim the string to match
1251 * @exception XMLStreamException if the next characters do not match the
1252 * specified string
1254 private void require(String delim)
1255 throws IOException, XMLStreamException
1257 char[] chars = delim.toCharArray();
1258 int len = chars.length;
1259 mark(len);
1260 int off = 0;
1263 int l2 = read(tmpBuf, off, len - off);
1264 if (l2 == -1)
1266 reset();
1267 error("EOF before required string", delim);
1269 off += l2;
1271 while (off < len);
1272 for (int i = 0; i < chars.length; i++)
1274 if (chars[i] != tmpBuf[i])
1276 reset();
1277 error("required string", delim);
1283 * Try to read a single character. On failure, reset the stream.
1284 * @param delim the character to test
1285 * @return true if the character matched delim, false otherwise.
1287 private boolean tryRead(char delim)
1288 throws IOException, XMLStreamException
1290 mark(1);
1291 int c = readCh();
1292 if (delim != c)
1294 reset();
1295 return false;
1297 return true;
1301 * Tries to read the specified characters.
1302 * If successful, the stream is positioned after the last character,
1303 * otherwise it is reset.
1304 * @param test the string to test
1305 * @return true if the characters matched the test string, false otherwise.
1307 private boolean tryRead(String test)
1308 throws IOException
1310 char[] chars = test.toCharArray();
1311 int len = chars.length;
1312 mark(len);
1313 int count = 0;
1314 int l2 = read(tmpBuf, 0, len);
1315 if (l2 == -1)
1317 reset();
1318 return false;
1320 count += l2;
1321 while (count < len)
1323 // force read
1324 int c = read();
1325 if (c == -1)
1327 reset();
1328 return false;
1330 tmpBuf[count++] = (char) c;
1332 for (int i = 0; i < len; i++)
1334 if (chars[i] != tmpBuf[i])
1336 reset();
1337 return false;
1340 return true;
1344 * Reads characters until the specified test string is encountered.
1345 * @param delim the string delimiting the end of the characters
1347 private void readUntil(String delim)
1348 throws IOException, XMLStreamException
1350 int startLine = input.line;
1353 while (!tryRead(delim))
1355 int c = readCh();
1356 if (c == -1)
1357 throw new EOFException();
1358 else if (input.xml11)
1360 if (!isXML11Char(c) || isXML11RestrictedChar(c))
1361 error("illegal XML 1.1 character",
1362 "U+" + Integer.toHexString(c));
1364 else if (!isChar(c))
1365 error("illegal XML character",
1366 "U+" + Integer.toHexString(c));
1367 buf.append(Character.toChars(c));
1370 catch (EOFException e)
1372 error("end of input while looking for delimiter "+
1373 "(started on line " + startLine + ')', delim);
1378 * Reads any whitespace characters.
1379 * @return true if whitespace characters were read, false otherwise
1381 private boolean tryWhitespace()
1382 throws IOException, XMLStreamException
1384 boolean white;
1385 boolean ret = false;
1388 mark(1);
1389 int c = readCh();
1390 while (c == -1 && inputStack.size() > 1)
1392 popInput();
1393 c = readCh();
1395 white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1396 if (white)
1397 ret = true;
1399 while (white);
1400 reset();
1401 return ret;
1405 * Skip over any whitespace characters.
1407 private void skipWhitespace()
1408 throws IOException, XMLStreamException
1410 boolean white;
1413 mark(1);
1414 int c = readCh();
1415 while (c == -1 && inputStack.size() > 1)
1417 popInput();
1418 c = readCh();
1420 white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1422 while (white);
1423 reset();
1427 * Try to read as many whitespace characters as are available.
1428 * @exception XMLStreamException if no whitespace characters were seen
1430 private void requireWhitespace()
1431 throws IOException, XMLStreamException
1433 if (!tryWhitespace())
1434 error("whitespace required");
1438 * Returns the current base URI for resolving external entities.
1440 String getXMLBase()
1442 if (baseAware)
1444 for (Iterator i = bases.iterator(); i.hasNext(); )
1446 String base = (String) i.next();
1447 if (base != null)
1448 return base;
1451 return input.systemId;
1455 * Push the specified text input source.
1457 private void pushInput(String name, String text, boolean report,
1458 boolean normalize)
1459 throws IOException, XMLStreamException
1461 // Check for recursion
1462 if (name != null && !"".equals(name))
1464 for (Iterator i = inputStack.iterator(); i.hasNext(); )
1466 Input ctx = (Input) i.next();
1467 if (name.equals(ctx.name))
1468 error("entities may not be self-recursive", name);
1471 else
1472 report = false;
1473 pushInput(new Input(null, new StringReader(text), input.publicId,
1474 input.systemId, name, input.inputEncoding, report,
1475 normalize));
1479 * Push the specified external input source.
1481 private void pushInput(String name, ExternalIds ids, boolean report,
1482 boolean normalize)
1483 throws IOException, XMLStreamException
1485 if (!externalEntities)
1486 return;
1487 InputStream in = null;
1488 String url = absolutize(input.systemId, ids.systemId);
1489 // Check for recursion
1490 for (Iterator i = inputStack.iterator(); i.hasNext(); )
1492 Input ctx = (Input) i.next();
1493 if (url.equals(ctx.systemId))
1494 error("entities may not be self-recursive", url);
1495 if (name != null && !"".equals(name) && name.equals(ctx.name))
1496 error("entities may not be self-recursive", name);
1498 if (name == null || "".equals(name))
1499 report = false;
1500 if (in == null && url != null && resolver != null)
1502 if (resolver instanceof XMLResolver2)
1503 in = ((XMLResolver2) resolver).resolve(ids.publicId, url);
1504 else
1505 in = resolver.resolve(url);
1507 if (in == null)
1508 in = resolve(url);
1509 if (in == null)
1510 error("unable to resolve external entity",
1511 (ids.systemId != null) ? ids.systemId : ids.publicId);
1512 pushInput(new Input(in, null, ids.publicId, url, name, null, report,
1513 normalize));
1514 input.init();
1515 if (tryRead(TEST_XML_DECL))
1516 readTextDecl();
1517 input.finalizeEncoding();
1521 * Push the specified input source (general entity) onto the input stack.
1523 private void pushInput(Input input)
1525 if (input.report)
1526 startEntityStack.addFirst(input.name);
1527 inputStack.addLast(input);
1528 if (this.input != null)
1529 input.xml11 = this.input.xml11;
1530 this.input = input;
1534 * "Absolutize" a URL. This resolves a relative URL into an absolute one.
1535 * @param base the current base URL
1536 * @param href the (absolute or relative) URL to resolve
1538 static String absolutize(String base, String href)
1540 if (href == null)
1541 return null;
1542 int ci = href.indexOf(':');
1543 if (ci > 1 && isLowercaseAscii(href.substring(0, ci)))
1545 // href is absolute already
1546 return href;
1548 if (base == null)
1549 base = "";
1550 else
1552 int i = base.lastIndexOf('/');
1553 if (i != -1)
1554 base = base.substring(0, i + 1);
1555 else
1556 base = "";
1558 if ("".equals(base))
1560 // assume file URL relative to current directory
1561 base = System.getProperty("user.dir");
1562 if (base.charAt(0) == '/')
1563 base = base.substring(1);
1564 base = "file:///" + base.replace(File.separatorChar, '/');
1565 if (!base.endsWith("/"))
1566 base += "/";
1568 if (href.startsWith("/"))
1570 if (base.startsWith("file:"))
1571 return "file://" + href;
1572 int i = base.indexOf("://");
1573 if (i != -1)
1575 i = base.indexOf('/', i + 3);
1576 if (i != -1)
1577 base = base.substring(0, i);
1580 else
1582 while (href.startsWith(".."))
1584 int i = base.lastIndexOf('/', base.length() - 2);
1585 if (i != -1)
1586 base = base.substring(0, i + 1);
1587 href = href.substring(2);
1588 if (href.startsWith("/"))
1589 href = href.substring(1);
1592 return base + href;
1595 private static boolean isLowercaseAscii(String text)
1597 int len = text.length();
1598 for (int i = 0; i < len; i++)
1600 char c = text.charAt(i);
1601 if (c < 97 || c > 122)
1602 return false;
1604 return true;
1608 * Returns an input stream for the given URL.
1610 private InputStream resolve(String url)
1611 throws IOException
1615 return new URL(url).openStream();
1617 catch (MalformedURLException e)
1619 return null;
1624 * Pops the current input source (general entity) off the stack.
1626 private void popInput()
1628 Input old = (Input) inputStack.removeLast();
1629 if (old.report)
1630 endEntityStack.addFirst(old.name);
1631 input = (Input) inputStack.getLast();
1635 * Parse an entity text declaration.
1637 private void readTextDecl()
1638 throws IOException, XMLStreamException
1640 final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1641 requireWhitespace();
1642 if (tryRead("version"))
1644 readEq();
1645 String v = readLiteral(flags, false);
1646 if ("1.0".equals(v))
1647 input.xml11 = false;
1648 else if ("1.1".equals(v))
1650 Input i1 = (Input) inputStack.getFirst();
1651 if (!i1.xml11)
1652 error("external entity specifies later version number");
1653 input.xml11 = true;
1655 else
1656 throw new XMLStreamException("illegal XML version: " + v);
1657 requireWhitespace();
1659 require("encoding");
1660 readEq();
1661 String enc = readLiteral(flags, false);
1662 skipWhitespace();
1663 require("?>");
1664 input.setInputEncoding(enc);
1668 * Parse the XML declaration.
1670 private void readXMLDecl()
1671 throws IOException, XMLStreamException
1673 final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1675 requireWhitespace();
1676 require("version");
1677 readEq();
1678 xmlVersion = readLiteral(flags, false);
1679 if ("1.0".equals(xmlVersion))
1680 input.xml11 = false;
1681 else if ("1.1".equals(xmlVersion))
1682 input.xml11 = true;
1683 else
1684 throw new XMLStreamException("illegal XML version: " + xmlVersion);
1686 boolean white = tryWhitespace();
1688 if (tryRead("encoding"))
1690 if (!white)
1691 error("whitespace required before 'encoding='");
1692 readEq();
1693 xmlEncoding = readLiteral(flags, false);
1694 white = tryWhitespace();
1697 if (tryRead("standalone"))
1699 if (!white)
1700 error("whitespace required before 'standalone='");
1701 readEq();
1702 String standalone = readLiteral(flags, false);
1703 if ("yes".equals(standalone))
1704 xmlStandalone = Boolean.TRUE;
1705 else if ("no".equals(standalone))
1706 xmlStandalone = Boolean.FALSE;
1707 else
1708 error("standalone flag must be 'yes' or 'no'", standalone);
1711 skipWhitespace();
1712 require("?>");
1713 if (xmlEncoding != null)
1714 input.setInputEncoding(xmlEncoding);
1718 * Parse the DOCTYPE declaration.
1720 private void readDoctypeDecl()
1721 throws IOException, XMLStreamException
1723 if (!supportDTD)
1724 error("parser was configured not to support DTDs");
1725 requireWhitespace();
1726 String rootName = readNmtoken(true);
1727 skipWhitespace();
1728 ExternalIds ids = readExternalIds(false, true);
1729 doctype =
1730 this.new Doctype(rootName, ids.publicId, ids.systemId);
1732 // Parse internal subset first
1733 skipWhitespace();
1734 if (tryRead('['))
1736 while (true)
1738 expandPE = true;
1739 skipWhitespace();
1740 expandPE = false;
1741 if (tryRead(']'))
1742 break;
1743 else
1744 readMarkupdecl(false);
1747 skipWhitespace();
1748 require('>');
1750 // Parse external subset
1751 if (ids.systemId != null && externalEntities)
1753 pushInput("", ">", false, false);
1754 pushInput("[dtd]", ids, true, true);
1755 // loop until we get back to ">"
1756 while (true)
1758 expandPE = true;
1759 skipWhitespace();
1760 expandPE = false;
1761 mark(1);
1762 int c = readCh();
1763 if (c == 0x3e) // '>'
1764 break;
1765 else if (c == -1)
1766 popInput();
1767 else
1769 reset();
1770 expandPE = true;
1771 readMarkupdecl(true);
1772 expandPE = true;
1775 if (inputStack.size() != 2)
1776 error("external subset has unmatched '>'");
1777 popInput();
1779 checkDoctype();
1780 if (validating)
1781 validateDoctype();
1783 // Make rootName available for reading
1784 buf.setLength(0);
1785 buf.append(rootName);
1789 * Checks the well-formedness of the DTD.
1791 private void checkDoctype()
1792 throws XMLStreamException
1794 // TODO check entity recursion
1798 * Parse the markupdecl production.
1800 private void readMarkupdecl(boolean inExternalSubset)
1801 throws IOException, XMLStreamException
1803 boolean saved = expandPE;
1804 mark(1);
1805 require('<');
1806 reset();
1807 expandPE = false;
1808 if (tryRead(TEST_ELEMENT_DECL))
1810 expandPE = saved;
1811 readElementDecl();
1813 else if (tryRead(TEST_ATTLIST_DECL))
1815 expandPE = saved;
1816 readAttlistDecl();
1818 else if (tryRead(TEST_ENTITY_DECL))
1820 expandPE = saved;
1821 readEntityDecl(inExternalSubset);
1823 else if (tryRead(TEST_NOTATION_DECL))
1825 expandPE = saved;
1826 readNotationDecl(inExternalSubset);
1828 else if (tryRead(TEST_PI))
1830 readPI(true);
1831 expandPE = saved;
1833 else if (tryRead(TEST_COMMENT))
1835 readComment(true);
1836 expandPE = saved;
1838 else if (tryRead("<!["))
1840 // conditional section
1841 expandPE = saved;
1842 if (inputStack.size() < 2)
1843 error("conditional sections illegal in internal subset");
1844 skipWhitespace();
1845 if (tryRead("INCLUDE"))
1847 skipWhitespace();
1848 require('[');
1849 skipWhitespace();
1850 while (!tryRead("]]>"))
1852 readMarkupdecl(inExternalSubset);
1853 skipWhitespace();
1856 else if (tryRead("IGNORE"))
1858 skipWhitespace();
1859 require('[');
1860 expandPE = false;
1861 for (int nesting = 1; nesting > 0; )
1863 int c = readCh();
1864 switch (c)
1866 case 0x3c: // '<'
1867 if (tryRead("!["))
1868 nesting++;
1869 break;
1870 case 0x5d: // ']'
1871 if (tryRead("]>"))
1872 nesting--;
1873 break;
1874 case -1:
1875 throw new EOFException();
1878 expandPE = saved;
1880 else
1881 error("conditional section must begin with INCLUDE or IGNORE");
1883 else
1884 error("expected markup declaration");
1888 * Parse the elementdecl production.
1890 private void readElementDecl()
1891 throws IOException, XMLStreamException
1893 requireWhitespace();
1894 String name = readNmtoken(true);
1895 requireWhitespace();
1896 readContentspec(name);
1897 skipWhitespace();
1898 require('>');
1902 * Parse the contentspec production.
1904 private void readContentspec(String elementName)
1905 throws IOException, XMLStreamException
1907 if (tryRead("EMPTY"))
1908 doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel());
1909 else if (tryRead("ANY"))
1910 doctype.addElementDecl(elementName, "ANY", new AnyContentModel());
1911 else
1913 ContentModel model;
1914 StringBuffer acc = new StringBuffer();
1915 require('(');
1916 acc.append('(');
1917 skipWhitespace();
1918 if (tryRead("#PCDATA"))
1920 // mixed content
1921 acc.append("#PCDATA");
1922 MixedContentModel mm = new MixedContentModel();
1923 model = mm;
1924 skipWhitespace();
1925 if (tryRead(')'))
1927 acc.append(")");
1928 if (tryRead('*'))
1930 mm.min = 0;
1931 mm.max = -1;
1934 else
1936 while (!tryRead(")"))
1938 require('|');
1939 acc.append('|');
1940 skipWhitespace();
1941 String name = readNmtoken(true);
1942 acc.append(name);
1943 mm.addName(name);
1944 skipWhitespace();
1946 require('*');
1947 acc.append(")*");
1948 mm.min = 0;
1949 mm.max = -1;
1952 else
1953 model = readElements(acc);
1954 doctype.addElementDecl(elementName, acc.toString(), model);
1959 * Parses an element content model.
1961 private ElementContentModel readElements(StringBuffer acc)
1962 throws IOException, XMLStreamException
1964 int separator;
1965 ElementContentModel model = new ElementContentModel();
1967 // Parse first content particle
1968 skipWhitespace();
1969 model.addContentParticle(readContentParticle(acc));
1970 // End or separator
1971 skipWhitespace();
1972 int c = readCh();
1973 switch (c)
1975 case 0x29: // ')'
1976 acc.append(')');
1977 mark(1);
1978 c = readCh();
1979 switch (c)
1981 case 0x3f: // '?'
1982 acc.append('?');
1983 model.min = 0;
1984 model.max = 1;
1985 break;
1986 case 0x2a: // '*'
1987 acc.append('*');
1988 model.min = 0;
1989 model.max = -1;
1990 break;
1991 case 0x2b: // '+'
1992 acc.append('+');
1993 model.min = 1;
1994 model.max = -1;
1995 break;
1996 default:
1997 reset();
1999 return model; // done
2000 case 0x7c: // '|'
2001 model.or = true;
2002 // fall through
2003 case 0x2c: // ','
2004 separator = c;
2005 acc.append(Character.toChars(c));
2006 break;
2007 default:
2008 error("bad separator in content model",
2009 "U+" + Integer.toHexString(c));
2010 return model;
2012 // Parse subsequent content particles
2013 while (true)
2015 skipWhitespace();
2016 model.addContentParticle(readContentParticle(acc));
2017 skipWhitespace();
2018 c = readCh();
2019 if (c == 0x29) // ')'
2021 acc.append(')');
2022 break;
2024 else if (c != separator)
2026 error("bad separator in content model",
2027 "U+" + Integer.toHexString(c));
2028 return model;
2030 else
2031 acc.append(c);
2033 // Check for occurrence indicator
2034 mark(1);
2035 c = readCh();
2036 switch (c)
2038 case 0x3f: // '?'
2039 acc.append('?');
2040 model.min = 0;
2041 model.max = 1;
2042 break;
2043 case 0x2a: // '*'
2044 acc.append('*');
2045 model.min = 0;
2046 model.max = -1;
2047 break;
2048 case 0x2b: // '+'
2049 acc.append('+');
2050 model.min = 1;
2051 model.max = -1;
2052 break;
2053 default:
2054 reset();
2056 return model;
2060 * Parse a cp production.
2062 private ContentParticle readContentParticle(StringBuffer acc)
2063 throws IOException, XMLStreamException
2065 ContentParticle cp = new ContentParticle();
2066 if (tryRead('('))
2068 acc.append('(');
2069 cp.content = readElements(acc);
2071 else
2073 String name = readNmtoken(true);
2074 acc.append(name);
2075 cp.content = name;
2076 mark(1);
2077 int c = readCh();
2078 switch (c)
2080 case 0x3f: // '?'
2081 acc.append('?');
2082 cp.min = 0;
2083 cp.max = 1;
2084 break;
2085 case 0x2a: // '*'
2086 acc.append('*');
2087 cp.min = 0;
2088 cp.max = -1;
2089 break;
2090 case 0x2b: // '+'
2091 acc.append('+');
2092 cp.min = 1;
2093 cp.max = -1;
2094 break;
2095 default:
2096 reset();
2099 return cp;
2103 * Parse an attribute-list definition.
2105 private void readAttlistDecl()
2106 throws IOException, XMLStreamException
2108 requireWhitespace();
2109 String elementName = readNmtoken(true);
2110 boolean white = tryWhitespace();
2111 while (!tryRead('>'))
2113 if (!white)
2114 error("whitespace required before attribute definition");
2115 readAttDef(elementName);
2116 white = tryWhitespace();
2121 * Parse a single attribute definition.
2123 private void readAttDef(String elementName)
2124 throws IOException, XMLStreamException
2126 String name = readNmtoken(true);
2127 requireWhitespace();
2128 StringBuffer acc = new StringBuffer();
2129 HashSet values = new HashSet();
2130 String type = readAttType(acc, values);
2131 if (validating)
2133 if ("ID".equals(type))
2135 // VC: One ID per Element Type
2136 for (Iterator i = doctype.attlistIterator(elementName);
2137 i.hasNext(); )
2139 Map.Entry entry = (Map.Entry) i.next();
2140 AttributeDecl decl = (AttributeDecl) entry.getValue();
2141 if ("ID".equals(decl.type))
2142 error("element types must not have more than one ID " +
2143 "attribute");
2146 else if ("NOTATION".equals(type))
2148 // VC: One Notation Per Element Type
2149 for (Iterator i = doctype.attlistIterator(elementName);
2150 i.hasNext(); )
2152 Map.Entry entry = (Map.Entry) i.next();
2153 AttributeDecl decl = (AttributeDecl) entry.getValue();
2154 if ("NOTATION".equals(decl.type))
2155 error("element types must not have more than one NOTATION " +
2156 "attribute");
2158 // VC: No Notation on Empty Element
2159 ContentModel model = doctype.getElementModel(elementName);
2160 if (model != null && model.type == ContentModel.EMPTY)
2161 error("attributes of type NOTATION must not be declared on an " +
2162 "element declared EMPTY");
2165 String enumer = null;
2166 if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
2167 enumer = acc.toString();
2168 else
2169 values = null;
2170 requireWhitespace();
2171 readDefault(elementName, name, type, enumer, values);
2175 * Parse an attribute type.
2177 private String readAttType(StringBuffer acc, HashSet values)
2178 throws IOException, XMLStreamException
2180 if (tryRead('('))
2182 readEnumeration(false, acc, values);
2183 return "ENUMERATION";
2185 else
2187 String typeString = readNmtoken(true);
2188 if ("NOTATION".equals(typeString))
2190 readNotationType(acc, values);
2191 return typeString;
2193 else if ("CDATA".equals(typeString) ||
2194 "ID".equals(typeString) ||
2195 "IDREF".equals(typeString) ||
2196 "IDREFS".equals(typeString) ||
2197 "ENTITY".equals(typeString) ||
2198 "ENTITIES".equals(typeString) ||
2199 "NMTOKEN".equals(typeString) ||
2200 "NMTOKENS".equals(typeString))
2201 return typeString;
2202 else
2204 error("illegal attribute type", typeString);
2205 return null;
2211 * Parse an enumeration.
2213 private void readEnumeration(boolean isNames, StringBuffer acc,
2214 HashSet values)
2215 throws IOException, XMLStreamException
2217 acc.append('(');
2218 // first token
2219 skipWhitespace();
2220 String token = readNmtoken(isNames);
2221 acc.append(token);
2222 values.add(token);
2223 // subsequent tokens
2224 skipWhitespace();
2225 while (!tryRead(')'))
2227 require('|');
2228 acc.append('|');
2229 skipWhitespace();
2230 token = readNmtoken(isNames);
2231 // VC: No Duplicate Tokens
2232 if (validating && values.contains(token))
2233 error("duplicate token", token);
2234 acc.append(token);
2235 values.add(token);
2236 skipWhitespace();
2238 acc.append(')');
2242 * Parse a notation type for an attribute.
2244 private void readNotationType(StringBuffer acc, HashSet values)
2245 throws IOException, XMLStreamException
2247 requireWhitespace();
2248 require('(');
2249 readEnumeration(true, acc, values);
2253 * Parse the default value for an attribute.
2255 private void readDefault(String elementName, String name,
2256 String type, String enumeration, HashSet values)
2257 throws IOException, XMLStreamException
2259 int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
2260 int flags = LIT_ATTRIBUTE;
2261 String value = null, defaultType = null;
2262 boolean saved = expandPE;
2264 if (!"CDATA".equals(type))
2265 flags |= LIT_NORMALIZE;
2267 expandPE = false;
2268 if (tryRead('#'))
2270 if (tryRead("FIXED"))
2272 defaultType = "#FIXED";
2273 valueType = ATTRIBUTE_DEFAULT_FIXED;
2274 requireWhitespace();
2275 value = readLiteral(flags, false);
2277 else if (tryRead("REQUIRED"))
2279 defaultType = "#REQUIRED";
2280 valueType = ATTRIBUTE_DEFAULT_REQUIRED;
2282 else if (tryRead("IMPLIED"))
2284 defaultType = "#IMPLIED";
2285 valueType = ATTRIBUTE_DEFAULT_IMPLIED;
2287 else
2288 error("illegal keyword for attribute default value");
2290 else
2291 value = readLiteral(flags, false);
2292 expandPE = saved;
2293 if (validating)
2295 if ("ID".equals(type))
2297 // VC: Attribute Default Value Syntactically Correct
2298 if (value != null && !isNmtoken(value, true))
2299 error("default value must match Name production", value);
2300 // VC: ID Attribute Default
2301 if (valueType != ATTRIBUTE_DEFAULT_REQUIRED &&
2302 valueType != ATTRIBUTE_DEFAULT_IMPLIED)
2303 error("ID attributes must have a declared default of " +
2304 "#IMPLIED or #REQUIRED");
2306 else if (value != null)
2308 // VC: Attribute Default Value Syntactically Correct
2309 if ("IDREF".equals(type) || "ENTITY".equals(type))
2311 if (!isNmtoken(value, true))
2312 error("default value must match Name production", value);
2314 else if ("IDREFS".equals(type) || "ENTITIES".equals(type))
2316 StringTokenizer st = new StringTokenizer(value);
2317 while (st.hasMoreTokens())
2319 String token = st.nextToken();
2320 if (!isNmtoken(token, true))
2321 error("default value must match Name production", token);
2324 else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type))
2326 if (!isNmtoken(value, false))
2327 error("default value must match Nmtoken production", value);
2329 else if ("NMTOKENS".equals(type))
2331 StringTokenizer st = new StringTokenizer(value);
2332 while (st.hasMoreTokens())
2334 String token = st.nextToken();
2335 if (!isNmtoken(token, false))
2336 error("default value must match Nmtoken production",
2337 token);
2342 // Register attribute def
2343 AttributeDecl attribute =
2344 new AttributeDecl(type, value, valueType, enumeration, values,
2345 inputStack.size() != 1);
2346 doctype.addAttributeDecl(elementName, name, attribute);
2350 * Parse the EntityDecl production.
2352 private void readEntityDecl(boolean inExternalSubset)
2353 throws IOException, XMLStreamException
2355 int flags = 0;
2356 // Check if parameter entity
2357 boolean peFlag = false;
2358 expandPE = false;
2359 requireWhitespace();
2360 if (tryRead('%'))
2362 peFlag = true;
2363 requireWhitespace();
2365 expandPE = true;
2366 // Read entity name
2367 String name = readNmtoken(true);
2368 if (name.indexOf(':') != -1)
2369 error("illegal character ':' in entity name", name);
2370 if (peFlag)
2371 name = "%" + name;
2372 requireWhitespace();
2373 mark(1);
2374 int c = readCh();
2375 reset();
2376 if (c == 0x22 || c == 0x27) // " | '
2378 // Internal entity replacement text
2379 String value = readLiteral(flags | LIT_DISABLE_EREF, true);
2380 int ai = value.indexOf('&');
2381 while (ai != -1)
2383 int sci = value.indexOf(';', ai);
2384 if (sci == -1)
2385 error("malformed reference in entity value", value);
2386 String ref = value.substring(ai + 1, sci);
2387 int[] cp = UnicodeReader.toCodePointArray(ref);
2388 if (cp.length == 0)
2389 error("malformed reference in entity value", value);
2390 if (cp[0] == 0x23) // #
2392 if (cp.length == 1)
2393 error("malformed reference in entity value", value);
2394 if (cp[1] == 0x78) // 'x'
2396 if (cp.length == 2)
2397 error("malformed reference in entity value", value);
2398 for (int i = 2; i < cp.length; i++)
2400 int x = cp[i];
2401 if (x < 0x30 ||
2402 (x > 0x39 && x < 0x41) ||
2403 (x > 0x46 && x < 0x61) ||
2404 x > 0x66)
2405 error("malformed character reference in entity value",
2406 value);
2409 else
2411 for (int i = 1; i < cp.length; i++)
2413 int x = cp[i];
2414 if (x < 0x30 || x > 0x39)
2415 error("malformed character reference in entity value",
2416 value);
2420 else
2422 if (!isNameStartCharacter(cp[0]))
2423 error("malformed reference in entity value", value);
2424 for (int i = 1; i < cp.length; i++)
2426 if (!isNameCharacter(cp[i]))
2427 error("malformed reference in entity value", value);
2430 ai = value.indexOf('&', sci);
2432 doctype.addEntityDecl(name, value, inExternalSubset);
2434 else
2436 ExternalIds ids = readExternalIds(false, false);
2437 // Check for NDATA
2438 boolean white = tryWhitespace();
2439 if (!peFlag && tryRead("NDATA"))
2441 if (!white)
2442 error("whitespace required before NDATA");
2443 requireWhitespace();
2444 ids.notationName = readNmtoken(true);
2446 doctype.addEntityDecl(name, ids, inExternalSubset);
2448 // finish
2449 skipWhitespace();
2450 require('>');
2454 * Parse the NotationDecl production.
2456 private void readNotationDecl(boolean inExternalSubset)
2457 throws IOException, XMLStreamException
2459 requireWhitespace();
2460 String notationName = readNmtoken(true);
2461 if (notationName.indexOf(':') != -1)
2462 error("illegal character ':' in notation name", notationName);
2463 if (validating)
2465 // VC: Unique Notation Name
2466 ExternalIds notation = doctype.getNotation(notationName);
2467 if (notation != null)
2468 error("duplicate notation name", notationName);
2470 requireWhitespace();
2471 ExternalIds ids = readExternalIds(true, false);
2472 ids.notationName = notationName;
2473 doctype.addNotationDecl(notationName, ids, inExternalSubset);
2474 skipWhitespace();
2475 require('>');
2479 * Returns a tuple {publicId, systemId}.
2481 private ExternalIds readExternalIds(boolean inNotation, boolean isSubset)
2482 throws IOException, XMLStreamException
2484 int c;
2485 int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
2486 ExternalIds ids = new ExternalIds();
2488 if (tryRead("PUBLIC"))
2490 requireWhitespace();
2491 ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false);
2492 if (inNotation)
2494 skipWhitespace();
2495 mark(1);
2496 c = readCh();
2497 reset();
2498 if (c == 0x22 || c == 0x27) // " | '
2500 String href = readLiteral(flags, false);
2501 ids.systemId = absolutize(input.systemId, href);
2504 else
2506 requireWhitespace();
2507 String href = readLiteral(flags, false);
2508 ids.systemId = absolutize(input.systemId, href);
2510 // Check valid URI characters
2511 for (int i = 0; i < ids.publicId.length(); i++)
2513 char d = ids.publicId.charAt(i);
2514 if (d >= 'a' && d <= 'z')
2515 continue;
2516 if (d >= 'A' && d <= 'Z')
2517 continue;
2518 if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1)
2519 continue;
2520 error("illegal PUBLIC id character",
2521 "U+" + Integer.toHexString(d));
2524 else if (tryRead("SYSTEM"))
2526 requireWhitespace();
2527 String href = readLiteral(flags, false);
2528 ids.systemId = absolutize(input.systemId, href);
2530 else if (!isSubset)
2532 error("missing SYSTEM or PUBLIC keyword");
2534 if (ids.systemId != null && !inNotation)
2536 if (ids.systemId.indexOf('#') != -1)
2537 error("SYSTEM id has a URI fragment", ids.systemId);
2539 return ids;
2543 * Parse the start of an element.
2544 * @return the state of the parser afterwards (EMPTY_ELEMENT or CONTENT)
2546 private int readStartElement()
2547 throws IOException, XMLStreamException
2549 // Read element name
2550 String elementName = readNmtoken(true);
2551 attrs.clear();
2552 // Push namespace context
2553 if (namespaceAware)
2555 if (elementName.charAt(0) == ':' ||
2556 elementName.charAt(elementName.length() - 1) == ':')
2557 error("not a QName", elementName);
2558 namespaces.addFirst(new LinkedHashMap());
2560 // Read element content
2561 boolean white = tryWhitespace();
2562 mark(1);
2563 int c = readCh();
2564 while (c != 0x2f && c != 0x3e) // '/' | '>'
2566 // Read attribute
2567 reset();
2568 if (!white)
2569 error("need whitespace between attributes");
2570 readAttribute(elementName);
2571 white = tryWhitespace();
2572 mark(1);
2573 c = readCh();
2575 // supply defaulted attributes
2576 if (doctype != null)
2578 for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); )
2580 Map.Entry entry = (Map.Entry) i.next();
2581 String attName = (String) entry.getKey();
2582 AttributeDecl decl = (AttributeDecl) entry.getValue();
2583 if (validating)
2585 switch (decl.valueType)
2587 case ATTRIBUTE_DEFAULT_REQUIRED:
2588 // VC: Required Attribute
2589 if (decl.value == null && !attributeSpecified(attName))
2590 error("value for " + attName + " attribute is required");
2591 break;
2592 case ATTRIBUTE_DEFAULT_FIXED:
2593 // VC: Fixed Attribute Default
2594 for (Iterator j = attrs.iterator(); j.hasNext(); )
2596 Attribute a = (Attribute) j.next();
2597 if (attName.equals(a.name) &&
2598 !decl.value.equals(a.value))
2599 error("value for " + attName + " attribute must be " +
2600 decl.value);
2602 break;
2605 if (namespaceAware && attName.equals("xmlns"))
2607 LinkedHashMap ctx =
2608 (LinkedHashMap) namespaces.getFirst();
2609 if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2610 continue; // namespace was specified
2612 else if (namespaceAware && attName.startsWith("xmlns:"))
2614 LinkedHashMap ctx =
2615 (LinkedHashMap) namespaces.getFirst();
2616 if (ctx.containsKey(attName.substring(6)))
2617 continue; // namespace was specified
2619 else if (attributeSpecified(attName))
2620 continue;
2621 if (decl.value == null)
2622 continue;
2623 // VC: Standalone Document Declaration
2624 if (validating && decl.external && xmlStandalone == Boolean.TRUE)
2625 error("standalone must be 'no' if attributes inherit values " +
2626 "from externally declared markup declarations");
2627 Attribute attr =
2628 new Attribute(attName, decl.type, false, decl.value);
2629 if (namespaceAware)
2631 if (!addNamespace(attr))
2632 attrs.add(attr);
2634 else
2635 attrs.add(attr);
2638 if (baseAware)
2640 String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base");
2641 String base = getXMLBase();
2642 bases.addFirst(absolutize(base, uri));
2644 if (namespaceAware)
2646 // check prefix bindings
2647 int ci = elementName.indexOf(':');
2648 if (ci != -1)
2650 String prefix = elementName.substring(0, ci);
2651 String uri = getNamespaceURI(prefix);
2652 if (uri == null)
2653 error("unbound element prefix", prefix);
2654 else if (input.xml11 && "".equals(uri))
2655 error("XML 1.1 unbound element prefix", prefix);
2657 for (Iterator i = attrs.iterator(); i.hasNext(); )
2659 Attribute attr = (Attribute) i.next();
2660 if (attr.prefix != null &&
2661 !XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix))
2663 String uri = getNamespaceURI(attr.prefix);
2664 if (uri == null)
2665 error("unbound attribute prefix", attr.prefix);
2666 else if (input.xml11 && "".equals(uri))
2667 error("XML 1.1 unbound attribute prefix", attr.prefix);
2671 if (validating && doctype != null)
2673 validateStartElement(elementName);
2674 currentContentModel = doctype.getElementModel(elementName);
2675 if (currentContentModel == null)
2676 error("no element declaration", elementName);
2677 validationStack.add(new LinkedList());
2679 // make element name available for read
2680 buf.setLength(0);
2681 buf.append(elementName);
2682 // push element onto stack
2683 stack.addLast(elementName);
2684 switch (c)
2686 case 0x3e: // '>'
2687 return CONTENT;
2688 case 0x2f: // '/'
2689 require('>');
2690 return EMPTY_ELEMENT;
2692 return -1; // to satisfy compiler
2696 * Indicates whether the specified attribute name was specified for the
2697 * current element.
2699 private boolean attributeSpecified(String attName)
2701 for (Iterator j = attrs.iterator(); j.hasNext(); )
2703 Attribute a = (Attribute) j.next();
2704 if (attName.equals(a.name))
2705 return true;
2707 return false;
2711 * Parse an attribute.
2713 private void readAttribute(String elementName)
2714 throws IOException, XMLStreamException
2716 // Read attribute name
2717 String attributeName = readNmtoken(true);
2718 String type = getAttributeType(elementName, attributeName);
2719 readEq();
2720 // Read literal
2721 final int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF;
2722 String value = (type == null || "CDATA".equals(type)) ?
2723 readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false);
2724 // add attribute event
2725 Attribute attr = this.new Attribute(attributeName, type, true, value);
2726 if (namespaceAware)
2728 if (attributeName.charAt(0) == ':' ||
2729 attributeName.charAt(attributeName.length() - 1) == ':')
2730 error("not a QName", attributeName);
2731 else if (attributeName.equals("xmlns"))
2733 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2734 if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2735 error("duplicate default namespace");
2737 else if (attributeName.startsWith("xmlns:"))
2739 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2740 if (ctx.containsKey(attributeName.substring(6)))
2741 error("duplicate namespace", attributeName.substring(6));
2743 else if (attrs.contains(attr))
2744 error("duplicate attribute", attributeName);
2746 else if (attrs.contains(attr))
2747 error("duplicate attribute", attributeName);
2748 if (validating && doctype != null)
2750 // VC: Attribute Value Type
2751 AttributeDecl decl =
2752 doctype.getAttributeDecl(elementName, attributeName);
2753 if (decl == null)
2754 error("attribute must be declared", attributeName);
2755 if ("ENUMERATION".equals(decl.type))
2757 // VC: Enumeration
2758 if (!decl.values.contains(value))
2759 error("value does not match enumeration " + decl.enumeration,
2760 value);
2762 else if ("ID".equals(decl.type))
2764 // VC: ID
2765 if (!isNmtoken(value, true))
2766 error("ID values must match the Name production");
2767 if (ids.contains(value))
2768 error("Duplicate ID", value);
2769 ids.add(value);
2771 else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type))
2773 StringTokenizer st = new StringTokenizer(value);
2774 while (st.hasMoreTokens())
2776 String token = st.nextToken();
2777 // VC: IDREF
2778 if (!isNmtoken(token, true))
2779 error("IDREF values must match the Name production");
2780 idrefs.add(token);
2783 else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type))
2785 StringTokenizer st = new StringTokenizer(value);
2786 while (st.hasMoreTokens())
2788 String token = st.nextToken();
2789 // VC: Name Token
2790 if (!isNmtoken(token, false))
2791 error("NMTOKEN values must match the Nmtoken production");
2794 else if ("ENTITY".equals(decl.type))
2796 // VC: Entity Name
2797 if (!isNmtoken(value, true))
2798 error("ENTITY values must match the Name production");
2799 Object entity = doctype.getEntity(value);
2800 if (entity == null || !(entity instanceof ExternalIds) ||
2801 ((ExternalIds) entity).notationName == null)
2802 error("ENTITY values must match the name of an unparsed " +
2803 "entity declared in the DTD");
2805 else if ("NOTATION".equals(decl.type))
2807 if (!decl.values.contains(value))
2808 error("NOTATION values must match a declared notation name",
2809 value);
2810 // VC: Notation Attributes
2811 ExternalIds notation = doctype.getNotation(value);
2812 if (notation == null)
2813 error("NOTATION values must match the name of a notation " +
2814 "declared in the DTD", value);
2817 if (namespaceAware)
2819 if (!addNamespace(attr))
2820 attrs.add(attr);
2822 else
2823 attrs.add(attr);
2827 * Determines whether the specified attribute is a namespace declaration,
2828 * and adds it to the current namespace context if so. Returns false if
2829 * the attribute is an ordinary attribute.
2831 private boolean addNamespace(Attribute attr)
2832 throws XMLStreamException
2834 if ("xmlns".equals(attr.name))
2836 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2837 if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null)
2838 error("Duplicate default namespace declaration");
2839 if (XMLConstants.XML_NS_URI.equals(attr.value))
2840 error("can't bind XML namespace");
2841 if ("".equals(attr.value) && !input.xml11)
2842 error("illegal use of 1.1-style prefix unbinding in 1.0 document");
2843 ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value);
2844 return true;
2846 else if ("xmlns".equals(attr.prefix))
2848 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2849 if (ctx.get(attr.localName) != null)
2850 error("Duplicate namespace declaration for prefix",
2851 attr.localName);
2852 if (XMLConstants.XML_NS_PREFIX.equals(attr.localName))
2854 if (!XMLConstants.XML_NS_URI.equals(attr.value))
2855 error("can't redeclare xml prefix");
2856 else
2857 return false; // treat as attribute
2859 if (XMLConstants.XML_NS_URI.equals(attr.value))
2860 error("can't bind non-xml prefix to XML namespace");
2861 if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName))
2862 error("can't redeclare xmlns prefix");
2863 if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value))
2864 error("can't bind non-xmlns prefix to XML Namespace namespace");
2865 if ("".equals(attr.value) && !input.xml11)
2866 error("illegal use of 1.1-style prefix unbinding in 1.0 document");
2867 ctx.put(attr.localName, attr.value);
2868 return true;
2870 return false;
2874 * Parse a closing tag.
2876 private void readEndElement()
2877 throws IOException, XMLStreamException
2879 // pop element off stack
2880 String expected = (String) stack.removeLast();
2881 require(expected);
2882 skipWhitespace();
2883 require('>');
2884 // Make element name available
2885 buf.setLength(0);
2886 buf.append(expected);
2887 if (validating && doctype != null)
2888 endElementValidationHook();
2892 * Validate the end of an element.
2893 * Called on an end-element or empty element if validating.
2895 private void endElementValidationHook()
2896 throws XMLStreamException
2898 validateEndElement();
2899 validationStack.removeLast();
2900 if (stack.isEmpty())
2901 currentContentModel = null;
2902 else
2904 String parent = (String) stack.getLast();
2905 currentContentModel = doctype.getElementModel(parent);
2910 * Parse a comment.
2912 private void readComment(boolean inDTD)
2913 throws IOException, XMLStreamException
2915 boolean saved = expandPE;
2916 expandPE = false;
2917 buf.setLength(0);
2918 readUntil(TEST_END_COMMENT);
2919 require('>');
2920 expandPE = saved;
2921 if (inDTD)
2922 doctype.addComment(buf.toString());
2926 * Parse a processing instruction.
2928 private void readPI(boolean inDTD)
2929 throws IOException, XMLStreamException
2931 boolean saved = expandPE;
2932 expandPE = false;
2933 piTarget = readNmtoken(true);
2934 if (piTarget.indexOf(':') != -1)
2935 error("illegal character in PI target", new Character(':'));
2936 if ("xml".equalsIgnoreCase(piTarget))
2937 error("illegal PI target", piTarget);
2938 if (tryRead(TEST_END_PI))
2939 piData = null;
2940 else
2942 if (!tryWhitespace())
2943 error("whitespace required between PI target and data");
2944 buf.setLength(0);
2945 readUntil(TEST_END_PI);
2946 piData = buf.toString();
2948 expandPE = saved;
2949 if (inDTD)
2950 doctype.addPI(piTarget, piData);
2954 * Parse an entity reference.
2956 private void readReference()
2957 throws IOException, XMLStreamException
2959 buf.setLength(0);
2960 String entityName = readNmtoken(true);
2961 require(';');
2962 buf.setLength(0);
2963 buf.append(entityName);
2967 * Read an CDATA section.
2969 private void readCDSect()
2970 throws IOException, XMLStreamException
2972 buf.setLength(0);
2973 readUntil(TEST_END_CDATA);
2977 * Read character data.
2978 * @return the type of text read (CHARACTERS or SPACE)
2980 private int readCharData(String prefix)
2981 throws IOException, XMLStreamException
2983 boolean white = true;
2984 buf.setLength(0);
2985 if (prefix != null)
2986 buf.append(prefix);
2987 boolean done = false;
2988 boolean entities = false;
2989 while (!done)
2991 // Block read
2992 mark(tmpBuf.length);
2993 int len = read(tmpBuf, 0, tmpBuf.length);
2994 if (len == -1)
2996 if (inputStack.size() > 1)
2998 popInput();
2999 // report end-entity
3000 done = true;
3002 else
3003 throw new EOFException();
3005 for (int i = 0; i < len && !done; i++)
3007 int c = tmpBuf[i];
3008 switch (c)
3010 case 0x20:
3011 case 0x09:
3012 case 0x0a:
3013 case 0x0d:
3014 buf.append(Character.toChars(c));
3015 break; // whitespace
3016 case 0x26: // '&'
3017 reset();
3018 read(tmpBuf, 0, i);
3019 // character reference?
3020 mark(3);
3021 c = readCh(); // &
3022 c = readCh();
3023 if (c == 0x23) // '#'
3025 mark(1);
3026 c = readCh();
3027 boolean hex = (c == 0x78); // 'x'
3028 if (!hex)
3029 reset();
3030 char[] ch = readCharacterRef(hex ? 16 : 10);
3031 buf.append(ch, 0, ch.length);
3032 for (int j = 0; j < ch.length; j++)
3034 switch (ch[j])
3036 case 0x20:
3037 case 0x09:
3038 case 0x0a:
3039 case 0x0d:
3040 break; // whitespace
3041 default:
3042 white = false;
3046 else
3048 // entity reference
3049 reset();
3050 c = readCh(); // &
3051 String entityName = readNmtoken(true);
3052 require(';');
3053 String text =
3054 (String) PREDEFINED_ENTITIES.get(entityName);
3055 if (text != null)
3056 buf.append(text);
3057 else
3059 pushInput("", "&" + entityName + ";", false, false);
3060 done = true;
3061 break;
3064 // continue processing
3065 i = -1;
3066 mark(tmpBuf.length);
3067 len = read(tmpBuf, 0, tmpBuf.length);
3068 if (len == -1)
3070 if (inputStack.size() > 1)
3072 popInput();
3073 done = true;
3075 else
3076 throw new EOFException();
3078 entities = true;
3079 break; // end of text sequence
3080 case 0x3e: // '>'
3081 int l = buf.length();
3082 if (l > 1 &&
3083 buf.charAt(l - 1) == ']' &&
3084 buf.charAt(l - 2) == ']')
3085 error("Character data may not contain unescaped ']]>'");
3086 buf.append(Character.toChars(c));
3087 break;
3088 case 0x3c: // '<'
3089 reset();
3090 read(tmpBuf, 0, i);
3091 i = len;
3092 if (coalescing && tryRead(TEST_CDATA))
3093 readUntil(TEST_END_CDATA); // read CDATA section into buf
3094 else
3095 done = true; // end of text sequence
3096 break;
3097 default:
3098 if (input.xml11)
3100 if (!isXML11Char(c) || isXML11RestrictedChar(c))
3101 error("illegal XML 1.1 character",
3102 "U+" + Integer.toHexString(c));
3104 else if (!isChar(c))
3105 error("illegal XML character",
3106 "U+" + Integer.toHexString(c));
3107 white = false;
3108 buf.append(Character.toChars(c));
3111 // if text buffer >= 2MB, return it as a chunk
3112 // to avoid excessive memory use
3113 if (buf.length() >= 2097152)
3114 done = true;
3116 if (entities)
3117 normalizeCRLF(buf);
3118 return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS;
3122 * Expands the specified entity.
3124 private void expandEntity(String name, boolean inAttr, boolean normalize)
3125 throws IOException, XMLStreamException
3127 if (doctype != null)
3129 Object value = doctype.getEntity(name);
3130 if (value != null)
3132 if (xmlStandalone == Boolean.TRUE)
3134 // VC: Standalone Document Declaration
3135 if (doctype.isEntityExternal(name))
3136 error("reference to external entity in standalone document");
3137 else if (value instanceof ExternalIds)
3139 ExternalIds ids = (ExternalIds) value;
3140 if (ids.notationName != null &&
3141 doctype.isNotationExternal(ids.notationName))
3142 error("reference to external notation in " +
3143 "standalone document");
3146 if (value instanceof String)
3148 String text = (String) value;
3149 if (inAttr && text.indexOf('<') != -1)
3150 error("< in attribute value");
3151 pushInput(name, text, !inAttr, normalize);
3153 else if (inAttr)
3154 error("reference to external entity in attribute value", name);
3155 else
3156 pushInput(name, (ExternalIds) value, !inAttr, normalize);
3157 return;
3160 error("reference to undeclared entity", name);
3164 * Indicates whether the specified entity is unparsed.
3166 private boolean isUnparsedEntity(String name)
3168 if (doctype != null)
3170 Object value = doctype.getEntity(name);
3171 if (value != null && value instanceof ExternalIds)
3172 return ((ExternalIds) value).notationName != null;
3174 return false;
3178 * Read an equals sign.
3180 private void readEq()
3181 throws IOException, XMLStreamException
3183 skipWhitespace();
3184 require('=');
3185 skipWhitespace();
3189 * Character read for reading literals.
3190 * @param recognizePEs whether to recognize parameter-entity references
3192 private int literalReadCh(boolean recognizePEs)
3193 throws IOException, XMLStreamException
3195 int c = recognizePEs ? readCh() : read();
3196 while (c == -1)
3198 if (inputStack.size() > 1)
3200 inputStack.removeLast();
3201 input = (Input) inputStack.getLast();
3202 // Don't issue end-entity
3203 c = recognizePEs ? readCh() : read();
3205 else
3206 throw new EOFException();
3208 return c;
3212 * Read a string literal.
3214 private String readLiteral(int flags, boolean recognizePEs)
3215 throws IOException, XMLStreamException
3217 boolean saved = expandPE;
3218 int delim = readCh();
3219 if (delim != 0x27 && delim != 0x22)
3220 error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim));
3221 literalBuf.setLength(0);
3222 if ((flags & LIT_DISABLE_PE) != 0)
3223 expandPE = false;
3224 boolean entities = false;
3225 int inputStackSize = inputStack.size();
3228 int c = literalReadCh(recognizePEs);
3229 if (c == delim && inputStackSize == inputStack.size())
3230 break;
3231 switch (c)
3233 case 0x0a:
3234 case 0x0d:
3235 if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
3236 c = 0x20; // normalize to space
3237 break;
3238 case 0x09:
3239 if ((flags & LIT_ATTRIBUTE) != 0)
3240 c = 0x20; // normalize to space
3241 break;
3242 case 0x26: // '&'
3243 mark(2);
3244 c = readCh();
3245 if (c == 0x23) // '#'
3247 if ((flags & LIT_DISABLE_CREF) != 0)
3249 reset();
3250 c = 0x26; // '&'
3252 else
3254 mark(1);
3255 c = readCh();
3256 boolean hex = (c == 0x78); // 'x'
3257 if (!hex)
3258 reset();
3259 char[] ref = readCharacterRef(hex ? 16 : 10);
3260 for (int i = 0; i < ref.length; i++)
3262 char x = ref[i];
3263 if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0 &&
3264 (x == 0x0a || x == 0x0d))
3265 x = 0x20; // normalize
3266 else if ((flags & LIT_ATTRIBUTE) != 0 && x == 0x09)
3267 x = 0x20; // normalize
3268 literalBuf.append(x);
3270 entities = true;
3271 continue;
3274 else
3276 if ((flags & LIT_DISABLE_EREF) != 0)
3278 reset();
3279 c = 0x26; // '&'
3281 else
3283 reset();
3284 String entityName = readNmtoken(true);
3285 require(';');
3286 String text =
3287 (String) PREDEFINED_ENTITIES.get(entityName);
3288 if (text != null)
3289 literalBuf.append(text);
3290 else
3291 expandEntity(entityName,
3292 (flags & LIT_ATTRIBUTE) != 0,
3293 true);
3294 entities = true;
3295 continue;
3298 break;
3299 case 0x3c: // '<'
3300 if ((flags & LIT_ATTRIBUTE) != 0)
3301 error("attribute values may not contain '<'");
3302 break;
3303 case -1:
3304 if (inputStack.size() > 1)
3306 popInput();
3307 continue;
3309 throw new EOFException();
3310 default:
3311 if ((c < 0x0020 || c > 0xfffd) ||
3312 (c >= 0xd800 && c < 0xdc00) ||
3313 (input.xml11 && (c >= 0x007f) &&
3314 (c <= 0x009f) && (c != 0x0085)))
3315 error("illegal character", "U+" + Integer.toHexString(c));
3317 literalBuf.append(Character.toChars(c));
3319 while (true);
3320 expandPE = saved;
3321 if (entities)
3322 normalizeCRLF(literalBuf);
3323 if ((flags & LIT_NORMALIZE) > 0)
3324 literalBuf = normalize(literalBuf);
3325 return literalBuf.toString();
3329 * Performs attribute-value normalization of the text buffer.
3330 * This discards leading and trailing whitespace, and replaces sequences
3331 * of whitespace with a single space.
3333 private StringBuffer normalize(StringBuffer buf)
3335 StringBuffer acc = new StringBuffer();
3336 int len = buf.length();
3337 int avState = 0;
3338 for (int i = 0; i < len; i++)
3340 char c = buf.charAt(i);
3341 if (c == ' ')
3342 avState = (avState == 0) ? 0 : 1;
3343 else
3345 if (avState == 1)
3346 acc.append(' ');
3347 acc.append(c);
3348 avState = 2;
3351 return acc;
3355 * Replace any CR/LF pairs in the buffer with LF.
3356 * This may be necessary if combinations of CR or LF were declared as
3357 * (character) entity references in the input.
3359 private void normalizeCRLF(StringBuffer buf)
3361 int len = buf.length() - 1;
3362 for (int i = 0; i < len; i++)
3364 char c = buf.charAt(i);
3365 if (c == '\r' && buf.charAt(i + 1) == '\n')
3367 buf.deleteCharAt(i--);
3368 len--;
3374 * Parse and expand a parameter entity reference.
3376 private void expandPEReference()
3377 throws IOException, XMLStreamException
3379 String name = readNmtoken(true, new StringBuffer());
3380 require(';');
3381 mark(1); // ensure we don't reset to before the semicolon
3382 if (doctype != null)
3384 String entityName = "%" + name;
3385 Object entity = doctype.getEntity(entityName);
3386 if (entity != null)
3388 if (xmlStandalone == Boolean.TRUE)
3390 if (doctype.isEntityExternal(entityName))
3391 error("reference to external parameter entity in " +
3392 "standalone document");
3394 if (entity instanceof String)
3396 pushInput(name, (String) entity, false, input.normalize);
3397 //pushInput(name, " " + (String) entity + " ");
3399 else
3401 //pushInput("", " ");
3402 pushInput(name, (ExternalIds) entity, false, input.normalize);
3403 //pushInput("", " ");
3406 else
3407 error("reference to undeclared parameter entity", name);
3409 else
3410 error("reference to parameter entity without doctype", name);
3414 * Parse the digits in a character reference.
3415 * @param base the base of the digits (10 or 16)
3417 private char[] readCharacterRef(int base)
3418 throws IOException, XMLStreamException
3420 StringBuffer b = new StringBuffer();
3421 for (int c = readCh(); c != 0x3b && c != -1; c = readCh())
3422 b.append(Character.toChars(c));
3425 int ord = Integer.parseInt(b.toString(), base);
3426 if (input.xml11)
3428 if (!isXML11Char(ord))
3429 error("illegal XML 1.1 character reference " +
3430 "U+" + Integer.toHexString(ord));
3432 else
3434 if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d))
3435 || (ord >= 0xd800 && ord <= 0xdfff)
3436 || ord == 0xfffe || ord == 0xffff
3437 || ord > 0x0010ffff)
3438 error("illegal XML character reference " +
3439 "U+" + Integer.toHexString(ord));
3441 return Character.toChars(ord);
3443 catch (NumberFormatException e)
3445 error("illegal characters in character reference", b.toString());
3446 return null;
3451 * Parses an NMTOKEN or Name production.
3452 * @param isName if a Name, otherwise an NMTOKEN
3454 private String readNmtoken(boolean isName)
3455 throws IOException, XMLStreamException
3457 return readNmtoken(isName, nmtokenBuf);
3461 * Parses an NMTOKEN or Name production using the specified buffer.
3462 * @param isName if a Name, otherwise an NMTOKEN
3463 * @param buf the character buffer to use
3465 private String readNmtoken(boolean isName, StringBuffer buf)
3466 throws IOException, XMLStreamException
3468 buf.setLength(0);
3469 int c = readCh();
3470 if (isName)
3472 if (!isNameStartCharacter(c))
3473 error("not a name start character",
3474 "U+" + Integer.toHexString(c));
3476 else
3478 if (!isNameCharacter(c))
3479 error("not a name character",
3480 "U+" + Integer.toHexString(c));
3482 buf.append(Character.toChars(c));
3485 mark(1);
3486 c = readCh();
3487 switch (c)
3489 case 0x25: // '%'
3490 case 0x3c: // '<'
3491 case 0x3e: // '>'
3492 case 0x26: // '&'
3493 case 0x2c: // ','
3494 case 0x7c: // '|'
3495 case 0x2a: // '*'
3496 case 0x2b: // '+'
3497 case 0x3f: // '?'
3498 case 0x29: // ')'
3499 case 0x3d: // '='
3500 case 0x27: // '\''
3501 case 0x22: // '"'
3502 case 0x5b: // '['
3503 case 0x20: // ' '
3504 case 0x09: // '\t'
3505 case 0x0a: // '\n'
3506 case 0x0d: // '\r'
3507 case 0x3b: // ';'
3508 case 0x2f: // '/'
3509 case -1:
3510 reset();
3511 return intern(buf.toString());
3512 default:
3513 if (!isNameCharacter(c))
3514 error("not a name character",
3515 "U+" + Integer.toHexString(c));
3516 else
3517 buf.append(Character.toChars(c));
3520 while (true);
3524 * Indicates whether the specified Unicode character is an XML 1.1 Char.
3526 private boolean isXML11Char(int c)
3528 return ((c >= 0x0001 && c <= 0xD7FF) ||
3529 (c >= 0xE000 && c < 0xFFFD) || // NB exclude 0xfffd
3530 (c >= 0x10000 && c <= 0x10FFFF));
3534 * Indicates whether the specified Unicode character is an XML 1.1
3535 * RestrictedChar.
3537 private boolean isXML11RestrictedChar(int c)
3539 return ((c >= 0x0001 && c <= 0x0008) ||
3540 (c >= 0x000B && c <= 0x000C) ||
3541 (c >= 0x000E && c <= 0x001F) ||
3542 (c >= 0x007F && c <= 0x0084) ||
3543 (c >= 0x0086 && c <= 0x009F));
3547 * Indicates whether the specified text matches the Name or Nmtoken
3548 * production.
3550 private boolean isNmtoken(String text, boolean isName)
3554 int[] cp = UnicodeReader.toCodePointArray(text);
3555 if (cp.length == 0)
3556 return false;
3557 if (isName)
3559 if (!isNameStartCharacter(cp[0]))
3560 return false;
3562 else
3564 if (!isNameCharacter(cp[0]))
3565 return false;
3567 for (int i = 1; i < cp.length; i++)
3569 if (!isNameCharacter(cp[i]))
3570 return false;
3572 return true;
3574 catch (IOException e)
3576 return false;
3581 * Indicates whether the specified Unicode character is a Name start
3582 * character.
3584 private boolean isNameStartCharacter(int c)
3586 if (input.xml11)
3587 return ((c >= 0x0041 && c <= 0x005a) ||
3588 (c >= 0x0061 && c <= 0x007a) ||
3589 c == 0x3a |
3590 c == 0x5f |
3591 (c >= 0xC0 && c <= 0xD6) ||
3592 (c >= 0xD8 && c <= 0xF6) ||
3593 (c >= 0xF8 && c <= 0x2FF) ||
3594 (c >= 0x370 && c <= 0x37D) ||
3595 (c >= 0x37F && c <= 0x1FFF) ||
3596 (c >= 0x200C && c <= 0x200D) ||
3597 (c >= 0x2070 && c <= 0x218F) ||
3598 (c >= 0x2C00 && c <= 0x2FEF) ||
3599 (c >= 0x3001 && c <= 0xD7FF) ||
3600 (c >= 0xF900 && c <= 0xFDCF) ||
3601 (c >= 0xFDF0 && c <= 0xFFFD) ||
3602 (c >= 0x10000 && c <= 0xEFFFF));
3603 else
3604 return (c == 0x5f || c == 0x3a || isLetter(c));
3608 * Indicates whether the specified Unicode character is a Name non-initial
3609 * character.
3611 private boolean isNameCharacter(int c)
3613 if (input.xml11)
3614 return ((c >= 0x0041 && c <= 0x005a) ||
3615 (c >= 0x0061 && c <= 0x007a) ||
3616 (c >= 0x0030 && c <= 0x0039) ||
3617 c == 0x3a |
3618 c == 0x5f |
3619 c == 0x2d |
3620 c == 0x2e |
3621 c == 0xB7 |
3622 (c >= 0xC0 && c <= 0xD6) ||
3623 (c >= 0xD8 && c <= 0xF6) ||
3624 (c >= 0xF8 && c <= 0x2FF) ||
3625 (c >= 0x300 && c <= 0x37D) ||
3626 (c >= 0x37F && c <= 0x1FFF) ||
3627 (c >= 0x200C && c <= 0x200D) ||
3628 (c >= 0x203F && c <= 0x2040) ||
3629 (c >= 0x2070 && c <= 0x218F) ||
3630 (c >= 0x2C00 && c <= 0x2FEF) ||
3631 (c >= 0x3001 && c <= 0xD7FF) ||
3632 (c >= 0xF900 && c <= 0xFDCF) ||
3633 (c >= 0xFDF0 && c <= 0xFFFD) ||
3634 (c >= 0x10000 && c <= 0xEFFFF));
3635 else
3636 return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a ||
3637 isLetter(c) || isDigit(c) ||
3638 isCombiningChar(c) || isExtender(c));
3642 * Indicates whether the specified Unicode character matches the Letter
3643 * production.
3645 public static boolean isLetter(int c)
3647 if ((c >= 0x0041 && c <= 0x005A) ||
3648 (c >= 0x0061 && c <= 0x007A) ||
3649 (c >= 0x00C0 && c <= 0x00D6) ||
3650 (c >= 0x00D8 && c <= 0x00F6) ||
3651 (c >= 0x00F8 && c <= 0x00FF) ||
3652 (c >= 0x0100 && c <= 0x0131) ||
3653 (c >= 0x0134 && c <= 0x013E) ||
3654 (c >= 0x0141 && c <= 0x0148) ||
3655 (c >= 0x014A && c <= 0x017E) ||
3656 (c >= 0x0180 && c <= 0x01C3) ||
3657 (c >= 0x01CD && c <= 0x01F0) ||
3658 (c >= 0x01F4 && c <= 0x01F5) ||
3659 (c >= 0x01FA && c <= 0x0217) ||
3660 (c >= 0x0250 && c <= 0x02A8) ||
3661 (c >= 0x02BB && c <= 0x02C1) ||
3662 c == 0x0386 ||
3663 (c >= 0x0388 && c <= 0x038A) ||
3664 c == 0x038C ||
3665 (c >= 0x038E && c <= 0x03A1) ||
3666 (c >= 0x03A3 && c <= 0x03CE) ||
3667 (c >= 0x03D0 && c <= 0x03D6) ||
3668 c == 0x03DA ||
3669 c == 0x03DC ||
3670 c == 0x03DE ||
3671 c == 0x03E0 ||
3672 (c >= 0x03E2 && c <= 0x03F3) ||
3673 (c >= 0x0401 && c <= 0x040C) ||
3674 (c >= 0x040E && c <= 0x044F) ||
3675 (c >= 0x0451 && c <= 0x045C) ||
3676 (c >= 0x045E && c <= 0x0481) ||
3677 (c >= 0x0490 && c <= 0x04C4) ||
3678 (c >= 0x04C7 && c <= 0x04C8) ||
3679 (c >= 0x04CB && c <= 0x04CC) ||
3680 (c >= 0x04D0 && c <= 0x04EB) ||
3681 (c >= 0x04EE && c <= 0x04F5) ||
3682 (c >= 0x04F8 && c <= 0x04F9) ||
3683 (c >= 0x0531 && c <= 0x0556) ||
3684 c == 0x0559 ||
3685 (c >= 0x0561 && c <= 0x0586) ||
3686 (c >= 0x05D0 && c <= 0x05EA) ||
3687 (c >= 0x05F0 && c <= 0x05F2) ||
3688 (c >= 0x0621 && c <= 0x063A) ||
3689 (c >= 0x0641 && c <= 0x064A) ||
3690 (c >= 0x0671 && c <= 0x06B7) ||
3691 (c >= 0x06BA && c <= 0x06BE) ||
3692 (c >= 0x06C0 && c <= 0x06CE) ||
3693 (c >= 0x06D0 && c <= 0x06D3) ||
3694 c == 0x06D5 ||
3695 (c >= 0x06E5 && c <= 0x06E6) ||
3696 (c >= 0x0905 && c <= 0x0939) ||
3697 c == 0x093D ||
3698 (c >= 0x0958 && c <= 0x0961) ||
3699 (c >= 0x0985 && c <= 0x098C) ||
3700 (c >= 0x098F && c <= 0x0990) ||
3701 (c >= 0x0993 && c <= 0x09A8) ||
3702 (c >= 0x09AA && c <= 0x09B0) ||
3703 c == 0x09B2 ||
3704 (c >= 0x09B6 && c <= 0x09B9) ||
3705 (c >= 0x09DC && c <= 0x09DD) ||
3706 (c >= 0x09DF && c <= 0x09E1) ||
3707 (c >= 0x09F0 && c <= 0x09F1) ||
3708 (c >= 0x0A05 && c <= 0x0A0A) ||
3709 (c >= 0x0A0F && c <= 0x0A10) ||
3710 (c >= 0x0A13 && c <= 0x0A28) ||
3711 (c >= 0x0A2A && c <= 0x0A30) ||
3712 (c >= 0x0A32 && c <= 0x0A33) ||
3713 (c >= 0x0A35 && c <= 0x0A36) ||
3714 (c >= 0x0A38 && c <= 0x0A39) ||
3715 (c >= 0x0A59 && c <= 0x0A5C) ||
3716 c == 0x0A5E ||
3717 (c >= 0x0A72 && c <= 0x0A74) ||
3718 (c >= 0x0A85 && c <= 0x0A8B) ||
3719 c == 0x0A8D ||
3720 (c >= 0x0A8F && c <= 0x0A91) ||
3721 (c >= 0x0A93 && c <= 0x0AA8) ||
3722 (c >= 0x0AAA && c <= 0x0AB0) ||
3723 (c >= 0x0AB2 && c <= 0x0AB3) ||
3724 (c >= 0x0AB5 && c <= 0x0AB9) ||
3725 c == 0x0ABD ||
3726 c == 0x0AE0 ||
3727 (c >= 0x0B05 && c <= 0x0B0C) ||
3728 (c >= 0x0B0F && c <= 0x0B10) ||
3729 (c >= 0x0B13 && c <= 0x0B28) ||
3730 (c >= 0x0B2A && c <= 0x0B30) ||
3731 (c >= 0x0B32 && c <= 0x0B33) ||
3732 (c >= 0x0B36 && c <= 0x0B39) ||
3733 c == 0x0B3D ||
3734 (c >= 0x0B5C && c <= 0x0B5D) ||
3735 (c >= 0x0B5F && c <= 0x0B61) ||
3736 (c >= 0x0B85 && c <= 0x0B8A) ||
3737 (c >= 0x0B8E && c <= 0x0B90) ||
3738 (c >= 0x0B92 && c <= 0x0B95) ||
3739 (c >= 0x0B99 && c <= 0x0B9A) ||
3740 c == 0x0B9C ||
3741 (c >= 0x0B9E && c <= 0x0B9F) ||
3742 (c >= 0x0BA3 && c <= 0x0BA4) ||
3743 (c >= 0x0BA8 && c <= 0x0BAA) ||
3744 (c >= 0x0BAE && c <= 0x0BB5) ||
3745 (c >= 0x0BB7 && c <= 0x0BB9) ||
3746 (c >= 0x0C05 && c <= 0x0C0C) ||
3747 (c >= 0x0C0E && c <= 0x0C10) ||
3748 (c >= 0x0C12 && c <= 0x0C28) ||
3749 (c >= 0x0C2A && c <= 0x0C33) ||
3750 (c >= 0x0C35 && c <= 0x0C39) ||
3751 (c >= 0x0C60 && c <= 0x0C61) ||
3752 (c >= 0x0C85 && c <= 0x0C8C) ||
3753 (c >= 0x0C8E && c <= 0x0C90) ||
3754 (c >= 0x0C92 && c <= 0x0CA8) ||
3755 (c >= 0x0CAA && c <= 0x0CB3) ||
3756 (c >= 0x0CB5 && c <= 0x0CB9) ||
3757 c == 0x0CDE ||
3758 (c >= 0x0CE0 && c <= 0x0CE1) ||
3759 (c >= 0x0D05 && c <= 0x0D0C) ||
3760 (c >= 0x0D0E && c <= 0x0D10) ||
3761 (c >= 0x0D12 && c <= 0x0D28) ||
3762 (c >= 0x0D2A && c <= 0x0D39) ||
3763 (c >= 0x0D60 && c <= 0x0D61) ||
3764 (c >= 0x0E01 && c <= 0x0E2E) ||
3765 c == 0x0E30 ||
3766 (c >= 0x0E32 && c <= 0x0E33) ||
3767 (c >= 0x0E40 && c <= 0x0E45) ||
3768 (c >= 0x0E81 && c <= 0x0E82) ||
3769 c == 0x0E84 ||
3770 (c >= 0x0E87 && c <= 0x0E88) ||
3771 c == 0x0E8A ||
3772 c == 0x0E8D ||
3773 (c >= 0x0E94 && c <= 0x0E97) ||
3774 (c >= 0x0E99 && c <= 0x0E9F) ||
3775 (c >= 0x0EA1 && c <= 0x0EA3) ||
3776 c == 0x0EA5 ||
3777 c == 0x0EA7 ||
3778 (c >= 0x0EAA && c <= 0x0EAB) ||
3779 (c >= 0x0EAD && c <= 0x0EAE) ||
3780 c == 0x0EB0 ||
3781 (c >= 0x0EB2 && c <= 0x0EB3) ||
3782 c == 0x0EBD ||
3783 (c >= 0x0EC0 && c <= 0x0EC4) ||
3784 (c >= 0x0F40 && c <= 0x0F47) ||
3785 (c >= 0x0F49 && c <= 0x0F69) ||
3786 (c >= 0x10A0 && c <= 0x10C5) ||
3787 (c >= 0x10D0 && c <= 0x10F6) ||
3788 c == 0x1100 ||
3789 (c >= 0x1102 && c <= 0x1103) ||
3790 (c >= 0x1105 && c <= 0x1107) ||
3791 c == 0x1109 ||
3792 (c >= 0x110B && c <= 0x110C) ||
3793 (c >= 0x110E && c <= 0x1112) ||
3794 c == 0x113C ||
3795 c == 0x113E ||
3796 c == 0x1140 ||
3797 c == 0x114C ||
3798 c == 0x114E ||
3799 c == 0x1150 ||
3800 (c >= 0x1154 && c <= 0x1155) ||
3801 c == 0x1159 ||
3802 (c >= 0x115F && c <= 0x1161) ||
3803 c == 0x1163 ||
3804 c == 0x1165 ||
3805 c == 0x1167 ||
3806 c == 0x1169 ||
3807 (c >= 0x116D && c <= 0x116E) ||
3808 (c >= 0x1172 && c <= 0x1173) ||
3809 c == 0x1175 ||
3810 c == 0x119E ||
3811 c == 0x11A8 ||
3812 c == 0x11AB ||
3813 (c >= 0x11AE && c <= 0x11AF) ||
3814 (c >= 0x11B7 && c <= 0x11B8) ||
3815 c == 0x11BA ||
3816 (c >= 0x11BC && c <= 0x11C2) ||
3817 c == 0x11EB ||
3818 c == 0x11F0 ||
3819 c == 0x11F9 ||
3820 (c >= 0x1E00 && c <= 0x1E9B) ||
3821 (c >= 0x1EA0 && c <= 0x1EF9) ||
3822 (c >= 0x1F00 && c <= 0x1F15) ||
3823 (c >= 0x1F18 && c <= 0x1F1D) ||
3824 (c >= 0x1F20 && c <= 0x1F45) ||
3825 (c >= 0x1F48 && c <= 0x1F4D) ||
3826 (c >= 0x1F50 && c <= 0x1F57) ||
3827 c == 0x1F59 ||
3828 c == 0x1F5B ||
3829 c == 0x1F5D ||
3830 (c >= 0x1F5F && c <= 0x1F7D) ||
3831 (c >= 0x1F80 && c <= 0x1FB4) ||
3832 (c >= 0x1FB6 && c <= 0x1FBC) ||
3833 c == 0x1FBE ||
3834 (c >= 0x1FC2 && c <= 0x1FC4) ||
3835 (c >= 0x1FC6 && c <= 0x1FCC) ||
3836 (c >= 0x1FD0 && c <= 0x1FD3) ||
3837 (c >= 0x1FD6 && c <= 0x1FDB) ||
3838 (c >= 0x1FE0 && c <= 0x1FEC) ||
3839 (c >= 0x1FF2 && c <= 0x1FF4) ||
3840 (c >= 0x1FF6 && c <= 0x1FFC) ||
3841 c == 0x2126 ||
3842 (c >= 0x212A && c <= 0x212B) ||
3843 c == 0x212E ||
3844 (c >= 0x2180 && c <= 0x2182) ||
3845 (c >= 0x3041 && c <= 0x3094) ||
3846 (c >= 0x30A1 && c <= 0x30FA) ||
3847 (c >= 0x3105 && c <= 0x312C) ||
3848 (c >= 0xAC00 && c <= 0xD7A3))
3849 return true; // BaseChar
3850 if ((c >= 0x4e00 && c <= 0x9fa5) ||
3851 c == 0x3007 ||
3852 (c >= 0x3021 && c <= 0x3029))
3853 return true; // Ideographic
3854 return false;
3858 * Indicates whether the specified Unicode character matches the Digit
3859 * production.
3861 public static boolean isDigit(int c)
3863 return ((c >= 0x0030 && c <= 0x0039) ||
3864 (c >= 0x0660 && c <= 0x0669) ||
3865 (c >= 0x06F0 && c <= 0x06F9) ||
3866 (c >= 0x0966 && c <= 0x096F) ||
3867 (c >= 0x09E6 && c <= 0x09EF) ||
3868 (c >= 0x0A66 && c <= 0x0A6F) ||
3869 (c >= 0x0AE6 && c <= 0x0AEF) ||
3870 (c >= 0x0B66 && c <= 0x0B6F) ||
3871 (c >= 0x0BE7 && c <= 0x0BEF) ||
3872 (c >= 0x0C66 && c <= 0x0C6F) ||
3873 (c >= 0x0CE6 && c <= 0x0CEF) ||
3874 (c >= 0x0D66 && c <= 0x0D6F) ||
3875 (c >= 0x0E50 && c <= 0x0E59) ||
3876 (c >= 0x0ED0 && c <= 0x0ED9) ||
3877 (c >= 0x0F20 && c <= 0x0F29));
3881 * Indicates whether the specified Unicode character matches the
3882 * CombiningChar production.
3884 public static boolean isCombiningChar(int c)
3886 return ((c >= 0x0300 && c <= 0x0345) ||
3887 (c >= 0x0360 && c <= 0x0361) ||
3888 (c >= 0x0483 && c <= 0x0486) ||
3889 (c >= 0x0591 && c <= 0x05A1) ||
3890 (c >= 0x05A3 && c <= 0x05B9) ||
3891 (c >= 0x05BB && c <= 0x05BD) ||
3892 c == 0x05BF ||
3893 (c >= 0x05C1 && c <= 0x05C2) ||
3894 c == 0x05C4 ||
3895 (c >= 0x064B && c <= 0x0652) ||
3896 c == 0x0670 ||
3897 (c >= 0x06D6 && c <= 0x06DC) ||
3898 (c >= 0x06DD && c <= 0x06DF) ||
3899 (c >= 0x06E0 && c <= 0x06E4) ||
3900 (c >= 0x06E7 && c <= 0x06E8) ||
3901 (c >= 0x06EA && c <= 0x06ED) ||
3902 (c >= 0x0901 && c <= 0x0903) ||
3903 c == 0x093C ||
3904 (c >= 0x093E && c <= 0x094C) ||
3905 c == 0x094D ||
3906 (c >= 0x0951 && c <= 0x0954) ||
3907 (c >= 0x0962 && c <= 0x0963) ||
3908 (c >= 0x0981 && c <= 0x0983) ||
3909 c == 0x09BC ||
3910 c == 0x09BE ||
3911 c == 0x09BF ||
3912 (c >= 0x09C0 && c <= 0x09C4) ||
3913 (c >= 0x09C7 && c <= 0x09C8) ||
3914 (c >= 0x09CB && c <= 0x09CD) ||
3915 c == 0x09D7 ||
3916 (c >= 0x09E2 && c <= 0x09E3) ||
3917 c == 0x0A02 ||
3918 c == 0x0A3C ||
3919 c == 0x0A3E ||
3920 c == 0x0A3F ||
3921 (c >= 0x0A40 && c <= 0x0A42) ||
3922 (c >= 0x0A47 && c <= 0x0A48) ||
3923 (c >= 0x0A4B && c <= 0x0A4D) ||
3924 (c >= 0x0A70 && c <= 0x0A71) ||
3925 (c >= 0x0A81 && c <= 0x0A83) ||
3926 c == 0x0ABC ||
3927 (c >= 0x0ABE && c <= 0x0AC5) ||
3928 (c >= 0x0AC7 && c <= 0x0AC9) ||
3929 (c >= 0x0ACB && c <= 0x0ACD) ||
3930 (c >= 0x0B01 && c <= 0x0B03) ||
3931 c == 0x0B3C ||
3932 (c >= 0x0B3E && c <= 0x0B43) ||
3933 (c >= 0x0B47 && c <= 0x0B48) ||
3934 (c >= 0x0B4B && c <= 0x0B4D) ||
3935 (c >= 0x0B56 && c <= 0x0B57) ||
3936 (c >= 0x0B82 && c <= 0x0B83) ||
3937 (c >= 0x0BBE && c <= 0x0BC2) ||
3938 (c >= 0x0BC6 && c <= 0x0BC8) ||
3939 (c >= 0x0BCA && c <= 0x0BCD) ||
3940 c == 0x0BD7 ||
3941 (c >= 0x0C01 && c <= 0x0C03) ||
3942 (c >= 0x0C3E && c <= 0x0C44) ||
3943 (c >= 0x0C46 && c <= 0x0C48) ||
3944 (c >= 0x0C4A && c <= 0x0C4D) ||
3945 (c >= 0x0C55 && c <= 0x0C56) ||
3946 (c >= 0x0C82 && c <= 0x0C83) ||
3947 (c >= 0x0CBE && c <= 0x0CC4) ||
3948 (c >= 0x0CC6 && c <= 0x0CC8) ||
3949 (c >= 0x0CCA && c <= 0x0CCD) ||
3950 (c >= 0x0CD5 && c <= 0x0CD6) ||
3951 (c >= 0x0D02 && c <= 0x0D03) ||
3952 (c >= 0x0D3E && c <= 0x0D43) ||
3953 (c >= 0x0D46 && c <= 0x0D48) ||
3954 (c >= 0x0D4A && c <= 0x0D4D) ||
3955 c == 0x0D57 ||
3956 c == 0x0E31 ||
3957 (c >= 0x0E34 && c <= 0x0E3A) ||
3958 (c >= 0x0E47 && c <= 0x0E4E) ||
3959 c == 0x0EB1 ||
3960 (c >= 0x0EB4 && c <= 0x0EB9) ||
3961 (c >= 0x0EBB && c <= 0x0EBC) ||
3962 (c >= 0x0EC8 && c <= 0x0ECD) ||
3963 (c >= 0x0F18 && c <= 0x0F19) ||
3964 c == 0x0F35 ||
3965 c == 0x0F37 ||
3966 c == 0x0F39 ||
3967 c == 0x0F3E ||
3968 c == 0x0F3F ||
3969 (c >= 0x0F71 && c <= 0x0F84) ||
3970 (c >= 0x0F86 && c <= 0x0F8B) ||
3971 (c >= 0x0F90 && c <= 0x0F95) ||
3972 c == 0x0F97 ||
3973 (c >= 0x0F99 && c <= 0x0FAD) ||
3974 (c >= 0x0FB1 && c <= 0x0FB7) ||
3975 c == 0x0FB9 ||
3976 (c >= 0x20D0 && c <= 0x20DC) ||
3977 c == 0x20E1 ||
3978 (c >= 0x302A && c <= 0x302F) ||
3979 c == 0x3099 ||
3980 c == 0x309A);
3984 * Indicates whether the specified Unicode character matches the Extender
3985 * production.
3987 public static boolean isExtender(int c)
3989 return (c == 0x00B7 ||
3990 c == 0x02D0 ||
3991 c == 0x02D1 ||
3992 c == 0x0387 ||
3993 c == 0x0640 ||
3994 c == 0x0E46 ||
3995 c == 0x0EC6 ||
3996 c == 0x3005 ||
3997 (c >= 0x3031 && c <= 0x3035) ||
3998 (c >= 0x309D && c <= 0x309E) ||
3999 (c >= 0x30FC && c <= 0x30FE));
4003 * Indicates whether the specified Unicode character matches the Char
4004 * production.
4006 public static boolean isChar(int c)
4008 return (c >= 0x20 && c < 0xd800) ||
4009 (c >= 0xe00 && c < 0xfffd) || // NB exclude 0xfffd
4010 (c >= 0x10000 && c < 0x110000) ||
4011 c == 0xa || c == 0x9 || c == 0xd;
4015 * Interns the specified text or not, depending on the value of
4016 * stringInterning.
4018 private String intern(String text)
4020 return stringInterning ? text.intern() : text;
4024 * Report a parsing error.
4026 private void error(String message)
4027 throws XMLStreamException
4029 error(message, null);
4033 * Report a parsing error.
4035 private void error(String message, Object info)
4036 throws XMLStreamException
4038 if (info != null)
4040 if (info instanceof String)
4041 message += ": \"" + ((String) info) + "\"";
4042 else if (info instanceof Character)
4043 message += ": '" + ((Character) info) + "'";
4045 throw new XMLStreamException(message);
4049 * Perform validation of a start-element event.
4051 private void validateStartElement(String elementName)
4052 throws XMLStreamException
4054 if (currentContentModel == null)
4056 // root element
4057 // VC: Root Element Type
4058 if (!elementName.equals(doctype.rootName))
4059 error("root element name must match name in DTD");
4060 return;
4062 // VC: Element Valid
4063 switch (currentContentModel.type)
4065 case ContentModel.EMPTY:
4066 error("child element found in empty element", elementName);
4067 break;
4068 case ContentModel.ELEMENT:
4069 LinkedList ctx = (LinkedList) validationStack.getLast();
4070 ctx.add(elementName);
4071 break;
4072 case ContentModel.MIXED:
4073 MixedContentModel mm = (MixedContentModel) currentContentModel;
4074 if (!mm.containsName(elementName))
4075 error("illegal element for content model", elementName);
4076 break;
4081 * Perform validation of an end-element event.
4083 private void validateEndElement()
4084 throws XMLStreamException
4086 if (currentContentModel == null)
4088 // root element
4089 // VC: IDREF
4090 if (!idrefs.containsAll(ids))
4091 error("IDREF values must match the value of some ID attribute");
4092 return;
4094 // VC: Element Valid
4095 switch (currentContentModel.type)
4097 case ContentModel.ELEMENT:
4098 LinkedList ctx = (LinkedList) validationStack.getLast();
4099 ElementContentModel ecm = (ElementContentModel) currentContentModel;
4100 validateElementContent(ecm, ctx);
4101 break;
4106 * Perform validation of character data.
4108 private void validatePCData(String text)
4109 throws XMLStreamException
4111 // VC: Element Valid
4112 switch (currentContentModel.type)
4114 case ContentModel.EMPTY:
4115 error("character data found in empty element", text);
4116 break;
4117 case ContentModel.ELEMENT:
4118 boolean white = true;
4119 int len = text.length();
4120 for (int i = 0; i < len; i++)
4122 char c = text.charAt(i);
4123 if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
4125 white = false;
4126 break;
4129 if (!white)
4130 error("character data found in element with element content", text);
4131 else if (xmlStandalone == Boolean.TRUE && currentContentModel.external)
4132 // VC: Standalone Document Declaration
4133 error("whitespace in element content of externally declared " +
4134 "element in standalone document");
4135 break;
4140 * Validates the specified validation context (list of child elements)
4141 * against the element content model for the current element.
4143 private void validateElementContent(ElementContentModel model,
4144 LinkedList children)
4145 throws XMLStreamException
4147 // Use regular expression
4148 StringBuffer buf = new StringBuffer();
4149 for (Iterator i = children.iterator(); i.hasNext(); )
4151 buf.append((String) i.next());
4152 buf.append(' ');
4154 String c = buf.toString();
4155 String regex = createRegularExpression(model);
4156 if (!c.matches(regex))
4157 error("element content "+model.text+" does not match expression "+regex, c);
4161 * Creates the regular expression used to validate an element content
4162 * model.
4164 private String createRegularExpression(ElementContentModel model)
4166 if (model.regex == null)
4168 StringBuffer buf = new StringBuffer();
4169 buf.append('(');
4170 for (Iterator i = model.contentParticles.iterator(); i.hasNext(); )
4172 ContentParticle cp = (ContentParticle) i.next();
4173 if (cp.content instanceof String)
4175 buf.append('(');
4176 buf.append((String) cp.content);
4177 buf.append(' ');
4178 buf.append(')');
4179 if (cp.max == -1)
4181 if (cp.min == 0)
4182 buf.append('*');
4183 else
4184 buf.append('+');
4186 else if (cp.min == 0)
4187 buf.append('?');
4189 else
4191 ElementContentModel ecm = (ElementContentModel) cp.content;
4192 buf.append(createRegularExpression(ecm));
4194 if (model.or && i.hasNext())
4195 buf.append('|');
4197 buf.append(')');
4198 if (model.max == -1)
4200 if (model.min == 0)
4201 buf.append('*');
4202 else
4203 buf.append('+');
4205 else if (model.min == 0)
4206 buf.append('?');
4207 model.regex = buf.toString();
4209 return model.regex;
4213 * Performs validation of a document type declaration event.
4215 void validateDoctype()
4216 throws XMLStreamException
4218 for (Iterator i = doctype.entityIterator(); i.hasNext(); )
4220 Map.Entry entry = (Map.Entry) i.next();
4221 Object entity = entry.getValue();
4222 if (entity instanceof ExternalIds)
4224 ExternalIds ids = (ExternalIds) entity;
4225 if (ids.notationName != null)
4227 // VC: Notation Declared
4228 ExternalIds notation = doctype.getNotation(ids.notationName);
4229 if (notation == null)
4230 error("Notation name must match the declared name of a " +
4231 "notation", ids.notationName);
4238 * Simple test harness for reading an XML file.
4239 * args[0] is the filename of the XML file
4240 * If args[1] is "-x", enable XInclude processing
4242 public static void main(String[] args)
4243 throws Exception
4245 boolean xIncludeAware = false;
4246 if (args.length > 1 && "-x".equals(args[1]))
4247 xIncludeAware = true;
4248 XMLParser p = new XMLParser(new java.io.FileInputStream(args[0]),
4249 absolutize(null, args[0]),
4250 true, // validating
4251 true, // namespaceAware
4252 true, // coalescing,
4253 true, // replaceERefs
4254 true, // externalEntities
4255 true, // supportDTD
4256 true, // baseAware
4257 true, // stringInterning
4258 null,
4259 null);
4260 XMLStreamReader reader = p;
4261 if (xIncludeAware)
4262 reader = new XIncludeFilter(p, args[0], true, true, true);
4265 int event;
4266 //do
4267 while (reader.hasNext())
4269 event = reader.next();
4270 Location loc = reader.getLocation();
4271 System.out.print(loc.getLineNumber()+":"+loc.getColumnNumber()+" ");
4272 switch (event)
4274 case XMLStreamConstants.START_DOCUMENT:
4275 System.out.println("START_DOCUMENT version="+reader.getVersion()+
4276 " encoding="+reader.getEncoding());
4277 break;
4278 case XMLStreamConstants.END_DOCUMENT:
4279 System.out.println("END_DOCUMENT");
4280 break;
4281 case XMLStreamConstants.START_ELEMENT:
4282 System.out.println("START_ELEMENT "+reader.getName());
4283 int l = reader.getNamespaceCount();
4284 for (int i = 0; i < l; i++)
4285 System.out.println("\tnamespace "+reader.getNamespacePrefix(i)+
4286 "='"+reader.getNamespaceURI(i)+"'");
4287 l = reader.getAttributeCount();
4288 for (int i = 0; i < l; i++)
4289 System.out.println("\tattribute "+reader.getAttributeQName(i)+
4290 "='"+reader.getAttributeValue(i)+"'");
4291 break;
4292 case XMLStreamConstants.END_ELEMENT:
4293 System.out.println("END_ELEMENT "+reader.getName());
4294 break;
4295 case XMLStreamConstants.CHARACTERS:
4296 System.out.println("CHARACTERS '"+encodeText(reader.getText())+"'");
4297 break;
4298 case XMLStreamConstants.CDATA:
4299 System.out.println("CDATA '"+encodeText(reader.getText())+"'");
4300 break;
4301 case XMLStreamConstants.SPACE:
4302 System.out.println("SPACE '"+encodeText(reader.getText())+"'");
4303 break;
4304 case XMLStreamConstants.DTD:
4305 System.out.println("DTD "+reader.getText());
4306 break;
4307 case XMLStreamConstants.ENTITY_REFERENCE:
4308 System.out.println("ENTITY_REFERENCE "+reader.getText());
4309 break;
4310 case XMLStreamConstants.COMMENT:
4311 System.out.println("COMMENT '"+encodeText(reader.getText())+"'");
4312 break;
4313 case XMLStreamConstants.PROCESSING_INSTRUCTION:
4314 System.out.println("PROCESSING_INSTRUCTION "+reader.getPITarget()+
4315 " "+reader.getPIData());
4316 break;
4317 case XMLStreamConstants.START_ENTITY:
4318 System.out.println("START_ENTITY "+reader.getText());
4319 break;
4320 case XMLStreamConstants.END_ENTITY:
4321 System.out.println("END_ENTITY "+reader.getText());
4322 break;
4323 default:
4324 System.out.println("Unknown event: "+event);
4328 catch (XMLStreamException e)
4330 Location l = reader.getLocation();
4331 System.out.println("At line "+l.getLineNumber()+
4332 ", column "+l.getColumnNumber()+
4333 " of "+l.getLocationURI());
4334 throw e;
4339 * Escapes control characters in the specified text. For debugging.
4341 private static String encodeText(String text)
4343 StringBuffer b = new StringBuffer();
4344 int len = text.length();
4345 for (int i = 0; i < len; i++)
4347 char c = text.charAt(i);
4348 switch (c)
4350 case '\t':
4351 b.append("\\t");
4352 break;
4353 case '\n':
4354 b.append("\\n");
4355 break;
4356 case '\r':
4357 b.append("\\r");
4358 break;
4359 default:
4360 b.append(c);
4363 return b.toString();
4367 * An attribute instance.
4369 class Attribute
4373 * Attribute name.
4375 final String name;
4378 * Attribute type as declared in the DTD, or CDATA otherwise.
4380 final String type;
4383 * Whether the attribute was specified or defaulted.
4385 final boolean specified;
4388 * The attribute value.
4390 final String value;
4393 * The namespace prefix.
4395 final String prefix;
4398 * The namespace local-name.
4400 final String localName;
4402 Attribute(String name, String type, boolean specified, String value)
4404 this.name = name;
4405 this.type = type;
4406 this.specified = specified;
4407 this.value = value;
4408 int ci = name.indexOf(':');
4409 if (ci == -1)
4411 prefix = null;
4412 localName = intern(name);
4414 else
4416 prefix = intern(name.substring(0, ci));
4417 localName = intern(name.substring(ci + 1));
4421 public boolean equals(Object other)
4423 if (other instanceof Attribute)
4425 Attribute a = (Attribute) other;
4426 if (namespaceAware)
4428 if (!a.localName.equals(localName))
4429 return false;
4430 String auri = getNamespaceURI(a.prefix);
4431 String uri = getNamespaceURI(prefix);
4432 if (uri == null && (auri == null ||
4433 (input.xml11 && "".equals(auri))))
4434 return true;
4435 if (uri != null)
4437 if ("".equals(uri) && input.xml11 && "".equals(auri))
4438 return true;
4439 return uri.equals(auri);
4441 return false;
4443 else
4444 return a.name.equals(name);
4446 return false;
4452 * Representation of a DTD.
4454 class Doctype
4458 * Name of the root element.
4460 final String rootName;
4463 * Public ID, if any, of external subset.
4465 final String publicId;
4468 * System ID (URL), if any, of external subset.
4470 final String systemId;
4473 * Map of element names to content models.
4475 private final LinkedHashMap elements = new LinkedHashMap();
4478 * Map of element names to maps of attribute declarations.
4480 private final LinkedHashMap attlists = new LinkedHashMap();
4483 * Map of entity names to entities (String or ExternalIds).
4485 private final LinkedHashMap entities = new LinkedHashMap();
4488 * Map of notation names to ExternalIds.
4490 private final LinkedHashMap notations = new LinkedHashMap();
4493 * Map of anonymous keys to comments.
4495 private final LinkedHashMap comments = new LinkedHashMap();
4498 * Map of anonymous keys to processing instructions (String[2]
4499 * containing {target, data}).
4501 private final LinkedHashMap pis = new LinkedHashMap();
4504 * List of keys to all markup entries in the DTD.
4506 private final LinkedList entries = new LinkedList();
4509 * Set of the entities defined in the external subset.
4511 private final HashSet externalEntities = new HashSet();
4514 * Set of the notations defined in the external subset.
4516 private final HashSet externalNotations = new HashSet();
4519 * Counter for making anonymous keys.
4521 private int anon = 1;
4524 * Constructor.
4526 Doctype(String rootName, String publicId, String systemId)
4528 this.rootName = rootName;
4529 this.publicId = publicId;
4530 this.systemId = systemId;
4534 * Adds an element declaration.
4535 * @param name the element name
4536 * @param text the content model text
4537 * @param model the parsed content model
4539 void addElementDecl(String name, String text, ContentModel model)
4541 if (elements.containsKey(name))
4542 return;
4543 model.text = text;
4544 model.external = (inputStack.size() != 1);
4545 elements.put(name, model);
4546 entries.add("E" + name);
4550 * Adds an attribute declaration.
4551 * @param ename the element name
4552 * @param aname the attribute name
4553 * @param decl the attribute declaration details
4555 void addAttributeDecl(String ename, String aname, AttributeDecl decl)
4557 LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4558 if (attlist == null)
4560 attlist = new LinkedHashMap();
4561 attlists.put(ename, attlist);
4563 else if (attlist.containsKey(aname))
4564 return;
4565 attlist.put(aname, decl);
4566 String key = "A" + ename;
4567 if (!entries.contains(key))
4568 entries.add(key);
4572 * Adds an entity declaration.
4573 * @param name the entity name
4574 * @param text the entity replacement text
4575 * @param inExternalSubset if we are in the exernal subset
4577 void addEntityDecl(String name, String text, boolean inExternalSubset)
4579 if (entities.containsKey(name))
4580 return;
4581 entities.put(name, text);
4582 entries.add("e" + name);
4583 if (inExternalSubset)
4584 externalEntities.add(name);
4588 * Adds an entity declaration.
4589 * @param name the entity name
4590 * @param ids the external IDs
4591 * @param inExternalSubset if we are in the exernal subset
4593 void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset)
4595 if (entities.containsKey(name))
4596 return;
4597 entities.put(name, ids);
4598 entries.add("e" + name);
4599 if (inExternalSubset)
4600 externalEntities.add(name);
4604 * Adds a notation declaration.
4605 * @param name the notation name
4606 * @param ids the external IDs
4607 * @param inExternalSubset if we are in the exernal subset
4609 void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset)
4611 if (notations.containsKey(name))
4612 return;
4613 notations.put(name, ids);
4614 entries.add("n" + name);
4615 if (inExternalSubset)
4616 externalNotations.add(name);
4620 * Adds a comment.
4622 void addComment(String text)
4624 String key = Integer.toString(anon++);
4625 comments.put(key, text);
4626 entries.add("c" + key);
4630 * Adds a processing instruction.
4632 void addPI(String target, String data)
4634 String key = Integer.toString(anon++);
4635 pis.put(key, new String[] {target, data});
4636 entries.add("p" + key);
4640 * Returns the content model for the specified element.
4641 * @param name the element name
4643 ContentModel getElementModel(String name)
4645 return (ContentModel) elements.get(name);
4649 * Returns the attribute definition for the given attribute
4650 * @param ename the element name
4651 * @param aname the attribute name
4653 AttributeDecl getAttributeDecl(String ename, String aname)
4655 LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4656 return (attlist == null) ? null : (AttributeDecl) attlist.get(aname);
4660 * Indicates whether the specified attribute was declared in the DTD.
4661 * @param ename the element name
4662 * @param aname the attribute name
4664 boolean isAttributeDeclared(String ename, String aname)
4666 LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4667 return (attlist == null) ? false : attlist.containsKey(aname);
4671 * Returns an iterator over the entries in the attribute list for the
4672 * given element.
4673 * @param ename the element name
4675 Iterator attlistIterator(String ename)
4677 LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4678 return (attlist == null) ? Collections.EMPTY_LIST.iterator() :
4679 attlist.entrySet().iterator();
4683 * Returns the entity (String or ExternalIds) for the given entity name.
4685 Object getEntity(String name)
4687 return entities.get(name);
4691 * Indicates whether the specified entity was declared in the external
4692 * subset.
4694 boolean isEntityExternal(String name)
4696 return externalEntities.contains(name);
4700 * Returns an iterator over the entity map entries.
4702 Iterator entityIterator()
4704 return entities.entrySet().iterator();
4708 * Returns the notation IDs for the given notation name.
4710 ExternalIds getNotation(String name)
4712 return (ExternalIds) notations.get(name);
4716 * Indicates whether the specified notation was declared in the external
4717 * subset.
4719 boolean isNotationExternal(String name)
4721 return externalNotations.contains(name);
4725 * Returns the comment associated with the specified (anonymous) key.
4727 String getComment(String key)
4729 return (String) comments.get(key);
4733 * Returns the processing instruction associated with the specified
4734 * (anonymous) key.
4736 String[] getPI(String key)
4738 return (String[]) pis.get(key);
4742 * Returns an iterator over the keys of the markup entries in this DTD,
4743 * in the order declared.
4745 Iterator entryIterator()
4747 return entries.iterator();
4753 * Combination of an ExternalID and an optional NDataDecl.
4755 class ExternalIds
4759 * The public ID.
4761 String publicId;
4764 * The system ID.
4766 String systemId;
4769 * The notation name declared with the NDATA keyword.
4771 String notationName;
4775 * A content model.
4777 abstract class ContentModel
4779 static final int EMPTY = 0;
4780 static final int ANY = 1;
4781 static final int ELEMENT = 2;
4782 static final int MIXED = 3;
4784 int min;
4785 int max;
4786 final int type;
4787 String text;
4788 boolean external;
4790 ContentModel(int type)
4792 this.type = type;
4793 min = 1;
4794 max = 1;
4800 * The EMPTY content model.
4802 class EmptyContentModel
4803 extends ContentModel
4806 EmptyContentModel()
4808 super(ContentModel.EMPTY);
4809 min = 0;
4810 max = 0;
4816 * The ANY content model.
4818 class AnyContentModel
4819 extends ContentModel
4822 AnyContentModel()
4824 super(ContentModel.ANY);
4825 min = 0;
4826 max = -1;
4832 * An element content model.
4834 class ElementContentModel
4835 extends ContentModel
4838 LinkedList contentParticles;
4839 boolean or;
4840 String regex; // regular expression cache
4842 ElementContentModel()
4844 super(ContentModel.ELEMENT);
4845 contentParticles = new LinkedList();
4848 void addContentParticle(ContentParticle cp)
4850 contentParticles.add(cp);
4855 class ContentParticle
4858 int min = 1;
4859 int max = 1;
4860 Object content; // Name (String) or ElementContentModel
4865 * A mixed content model.
4867 class MixedContentModel
4868 extends ContentModel
4871 private HashSet names;
4873 MixedContentModel()
4875 super(ContentModel.MIXED);
4876 names = new HashSet();
4879 void addName(String name)
4881 names.add(name);
4884 boolean containsName(String name)
4886 return names.contains(name);
4892 * An attribute definition.
4894 class AttributeDecl
4898 * The attribute type (CDATA, ID, etc).
4900 final String type;
4903 * The default value.
4905 final String value;
4908 * The value type (#FIXED, #IMPLIED, etc).
4910 final int valueType;
4913 * The enumeration text.
4915 final String enumeration;
4918 * The enumeration tokens.
4920 final HashSet values;
4923 * Whether this attribute declaration occurred in the external subset.
4925 final boolean external;
4927 AttributeDecl(String type, String value,
4928 int valueType, String enumeration,
4929 HashSet values, boolean external)
4931 this.type = type;
4932 this.value = value;
4933 this.valueType = valueType;
4934 this.enumeration = enumeration;
4935 this.values = values;
4936 this.external = external;
4942 * Compatibility interface that can be used to resolve based on a public
4943 * ID, not just an URL.
4945 interface XMLResolver2
4946 extends XMLResolver
4949 InputStream resolve(String publicId, String systemId)
4950 throws XMLStreamException;
4955 * An XML input source.
4957 static class Input
4958 implements Location
4961 int line = 1, markLine;
4962 int column, markColumn;
4963 int offset, markOffset;
4964 final String publicId, systemId, name;
4965 final boolean report; // report start- and end-entity
4966 final boolean normalize; // normalize CR, etc to LF
4968 InputStream in;
4969 Reader reader;
4970 UnicodeReader unicodeReader;
4971 boolean initialized;
4972 String inputEncoding;
4973 boolean xml11;
4975 Input(InputStream in, Reader reader, String publicId, String systemId,
4976 String name, String inputEncoding, boolean report,
4977 boolean normalize)
4979 if (inputEncoding == null)
4980 inputEncoding = "UTF-8";
4981 this.inputEncoding = inputEncoding;
4982 this.publicId = publicId;
4983 this.systemId = systemId;
4984 this.name = name;
4985 this.report = report;
4986 this.normalize = normalize;
4987 if (in != null)
4989 if (reader != null)
4990 throw new IllegalStateException("both byte and char streams "+
4991 "specified");
4992 if (normalize)
4993 in = new CRLFInputStream(in);
4994 in = new BufferedInputStream(in);
4995 this.in = in;
4997 else
4999 this.reader = normalize ? new CRLFReader(reader) : reader;
5000 unicodeReader = new UnicodeReader(this.reader);
5002 initialized = false;
5005 // -- Location --
5007 public int getCharacterOffset()
5009 return offset;
5012 public int getColumnNumber()
5014 return column;
5017 public int getLineNumber()
5019 return line;
5022 public String getLocationURI()
5024 return systemId;
5027 void init()
5028 throws IOException
5030 if (initialized)
5031 return;
5032 if (in != null)
5033 detectEncoding();
5034 initialized = true;
5037 void mark(int len)
5038 throws IOException
5040 //System.out.println(" mark:"+len);
5041 markOffset = offset;
5042 markLine = line;
5043 markColumn = column;
5044 if (unicodeReader != null)
5045 unicodeReader.mark(len);
5046 else
5047 in.mark(len);
5051 * Character read.
5053 int read()
5054 throws IOException
5056 offset++;
5057 int ret = (unicodeReader != null) ? unicodeReader.read() : in.read();
5058 if (normalize &&
5059 (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028))))
5061 // Normalize CR etc to LF
5062 ret = 0x0a;
5064 // Locator handling
5065 if (ret == 0x0a)
5067 line++;
5068 column = 0;
5070 else
5071 column++;
5072 return ret;
5076 * Block read.
5078 int read(int[] b, int off, int len)
5079 throws IOException
5081 int ret;
5082 if (unicodeReader != null)
5083 ret = unicodeReader.read(b, off, len);
5084 else
5086 byte[] b2 = new byte[len];
5087 ret = in.read(b2, 0, len);
5088 if (ret != -1)
5090 String s = new String(b2, 0, ret, inputEncoding);
5091 int[] c = UnicodeReader.toCodePointArray(s);
5092 ret = c.length;
5093 System.arraycopy(c, 0, b, off, ret);
5096 if (ret != -1)
5098 // Locator handling
5099 for (int i = 0; i < ret; i++)
5101 int c = b[off + i];
5102 if (normalize &&
5103 (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028))))
5105 // Normalize CR etc to LF
5106 c = 0x0a;
5107 b[off + i] = c;
5109 if (c == 0x0a)
5111 line++;
5112 column = 0;
5114 else
5115 column++;
5118 return ret;
5121 void reset()
5122 throws IOException
5124 if (unicodeReader != null)
5125 unicodeReader.reset();
5126 else
5127 in.reset();
5128 offset = markOffset;
5129 line = markLine;
5130 column = markColumn;
5133 // Detection of input encoding
5135 private static final int[] SIGNATURE_UCS_4_1234 =
5136 new int[] { 0x00, 0x00, 0x00, 0x3c };
5137 private static final int[] SIGNATURE_UCS_4_4321 =
5138 new int[] { 0x3c, 0x00, 0x00, 0x00 };
5139 private static final int[] SIGNATURE_UCS_4_2143 =
5140 new int[] { 0x00, 0x00, 0x3c, 0x00 };
5141 private static final int[] SIGNATURE_UCS_4_3412 =
5142 new int[] { 0x00, 0x3c, 0x00, 0x00 };
5143 private static final int[] SIGNATURE_UCS_2_12 =
5144 new int[] { 0xfe, 0xff };
5145 private static final int[] SIGNATURE_UCS_2_21 =
5146 new int[] { 0xff, 0xfe };
5147 private static final int[] SIGNATURE_UCS_2_12_NOBOM =
5148 new int[] { 0x00, 0x3c, 0x00, 0x3f };
5149 private static final int[] SIGNATURE_UCS_2_21_NOBOM =
5150 new int[] { 0x3c, 0x00, 0x3f, 0x00 };
5151 private static final int[] SIGNATURE_UTF_8 =
5152 new int[] { 0x3c, 0x3f, 0x78, 0x6d };
5153 private static final int[] SIGNATURE_UTF_8_BOM =
5154 new int[] { 0xef, 0xbb, 0xbf };
5157 * Detect the input encoding.
5159 private void detectEncoding()
5160 throws IOException
5162 int[] signature = new int[4];
5163 in.mark(4);
5164 for (int i = 0; i < 4; i++)
5165 signature[i] = in.read();
5166 in.reset();
5168 // 4-byte encodings
5169 if (equals(SIGNATURE_UCS_4_1234, signature))
5171 in.read();
5172 in.read();
5173 in.read();
5174 in.read();
5175 setInputEncoding("UTF-32BE");
5177 else if (equals(SIGNATURE_UCS_4_4321, signature))
5179 in.read();
5180 in.read();
5181 in.read();
5182 in.read();
5183 setInputEncoding("UTF-32LE");
5185 else if (equals(SIGNATURE_UCS_4_2143, signature) ||
5186 equals(SIGNATURE_UCS_4_3412, signature))
5187 throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering");
5189 // 2-byte encodings
5190 else if (equals(SIGNATURE_UCS_2_12, signature))
5192 in.read();
5193 in.read();
5194 setInputEncoding("UTF-16BE");
5196 else if (equals(SIGNATURE_UCS_2_21, signature))
5198 in.read();
5199 in.read();
5200 setInputEncoding("UTF-16LE");
5202 else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature))
5204 //setInputEncoding("UTF-16BE");
5205 throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5207 else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature))
5209 //setInputEncoding("UTF-16LE");
5210 throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5212 // ASCII-derived encodings
5213 else if (equals(SIGNATURE_UTF_8, signature))
5215 // UTF-8 input encoding implied, TextDecl
5217 else if (equals(SIGNATURE_UTF_8_BOM, signature))
5219 in.read();
5220 in.read();
5221 in.read();
5222 setInputEncoding("UTF-8");
5226 private static boolean equals(int[] b1, int[] b2)
5228 for (int i = 0; i < b1.length; i++)
5230 if (b1[i] != b2[i])
5231 return false;
5233 return true;
5236 void setInputEncoding(String encoding)
5237 throws IOException
5239 if (encoding.equals(inputEncoding))
5240 return;
5241 if ("UTF-16".equalsIgnoreCase(encoding) &&
5242 inputEncoding.startsWith("UTF-16"))
5243 return;
5244 if (reader != null)
5245 throw new UnsupportedEncodingException("document is not in its " +
5246 "declared encoding " +
5247 inputEncoding +
5248 ": " + encoding);
5249 inputEncoding = encoding;
5250 finalizeEncoding();
5253 void finalizeEncoding()
5254 throws IOException
5256 if (reader != null)
5257 return;
5258 reader = new BufferedReader(new InputStreamReader(in, inputEncoding));
5259 unicodeReader = new UnicodeReader(reader);
5260 mark(1);