1 /* ValidationConsumer.java --
2 Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
38 package gnu
.xml
.pipeline
;
40 import java
.io
.IOException
;
41 import java
.io
.StringReader
;
42 import java
.io
.StringWriter
;
43 import java
.util
.EmptyStackException
;
44 import java
.util
.Enumeration
;
45 import java
.util
.Hashtable
;
46 import java
.util
.Stack
;
47 import java
.util
.StringTokenizer
;
48 import java
.util
.Vector
;
50 import org
.xml
.sax
.Attributes
;
51 import org
.xml
.sax
.EntityResolver
;
52 import org
.xml
.sax
.ErrorHandler
;
53 import org
.xml
.sax
.InputSource
;
54 import org
.xml
.sax
.Locator
;
55 import org
.xml
.sax
.SAXException
;
56 import org
.xml
.sax
.SAXParseException
;
57 import org
.xml
.sax
.XMLReader
;
58 import org
.xml
.sax
.helpers
.XMLReaderFactory
;
61 * This class checks SAX2 events to report validity errors; it works as
62 * both a filter and a terminus on an event pipeline. It relies on the
63 * producer of SAX events to: </p> <ol>
65 * <li> Conform to the specification of a non-validating XML parser that
66 * reads all external entities, reported using SAX2 events. </li>
68 * <li> Report ignorable whitespace as such (through the ContentHandler
69 * interface). This is, strictly speaking, optional for nonvalidating
70 * XML processors. </li>
72 * <li> Make SAX2 DeclHandler callbacks, with default
73 * attribute values already normalized (and without "<").</li>
75 * <li> Make SAX2 LexicalHandler startDTD() and endDTD ()
78 * <li> Act as if the <em>(URI)/namespace-prefixes</em> property were
79 * set to true, by providing XML 1.0 names and all <code>xmlns*</code>
80 * attributes (rather than omitting either or both). </li>
84 * <p> At this writing, the major SAX2 parsers (such as Ælfred2,
85 * Crimson, and Xerces) meet these requirements, and this validation
86 * module is used by the optional Ælfred2 validation support.
89 * <p> Note that because this is a layered validator, it has to duplicate some
90 * work that the parser is doing; there are also other cost to layering.
91 * However, <em>because of layering it doesn't need a parser</em> in order
92 * to work! You can use it with anything that generates SAX events, such
93 * as an application component that wants to detect invalid content in
94 * a changed area without validating an entire document, or which wants to
95 * ensure that it doesn't write invalid data to a communications partner.</p>
97 * <p> Also, note that because this is a layered validator, the line numbers
98 * reported for some errors may seem strange. For example, if an element does
99 * not permit character content, the validator
100 * will use the locator provided to it.
101 * That might reflect the last character of a <em>characters</em> event
102 * callback, rather than the first non-whitespace character. </p>
107 * <p> Of interest is the fact that unlike most currently known XML validators,
108 * this one can report some cases of non-determinism in element content models.
109 * It is a compile-time option, enabled by default. This will only report
110 * such XML errors if they relate to content actually appearing in a document;
111 * content models aren't aggressively scanned for non-deterministic structure.
112 * Documents which trigger such non-deterministic transitions may be handled
113 * differently by different validating parsers, without losing conformance
114 * to the XML specification. </p>
117 * <p> Current limitations of the validation performed are in roughly three
120 * <p> The first category represents constraints which demand violations
121 * of software layering: exposing lexical details, one of the first things
122 * that <em>application</em> programming interfaces (APIs) hide. These
123 * invariably relate to XML entity handling, and to historical oddities
124 * of the XML validation semantics. Curiously,
125 * recent (Autumn 1999) conformance testing showed that these constraints are
126 * among those handled worst by existing XML validating parsers. Arguments
127 * have been made that each of these VCs should be turned into WFCs (most
128 * of them) or discarded (popular for the standalone declaration); in short,
129 * that these are bugs in the XML specification (not all via SGML): </p><ul>
131 * <li> The <em>Proper Declaration/PE Nesting</em> and
132 * <em>Proper Group/PE Nesting</em> VCs can't be tested because they
133 * require access to particularly low level lexical level information.
134 * In essence, the reason XML isn't a simple thing to parse is that
135 * it's not a context free grammar, and these constraints elevate that
136 * SGML-derived context sensitivity to the level of a semantic rule.
138 * <li> The <em>Standalone Document Declaration</em> VC can't be
139 * tested. This is for two reasons. First, this flag isn't made
140 * available through SAX2. Second, it also requires breaking that
141 * lexical layering boundary. (If you ever wondered why classes
142 * in compiler construction or language design barely mention the
143 * existence of context-sensitive grammars, it's because of messy
144 * issues like these.)
146 * <li> The <em>Entity Declared</em> VC can't be tested, because it
147 * also requires breaking that lexical layering boundary! There's also
148 * another issue: the VC wording (and seemingly intent) is ambiguous.
149 * (This is still true in the "Second edition" XML spec.)
150 * Since there is a WFC of the same name, everyone's life would be
151 * easier if references to undeclared parsed entities were always well
152 * formedness errors, regardless of whether they're parameter entities
153 * or not. (Note that nonvalidating parsers are not required
154 * to report all such well formedness errors if they don't read external
155 * parameter entities, although currently most XML parsers read them
156 * in an attempt to avoid problems from inconsistent parser behavior.)
160 * <p> The second category of limitations on this validation represent
161 * constraints associated with information that is not guaranteed to be
162 * available (or in one case, <em>is guaranteed not to be available</em>,
163 * through the SAX2 API: </p><ul>
165 * <li> The <em>Unique Element Type Declaration</em> VC may not be
166 * reportable, if the underlying parser happens not to expose
167 * multiple declarations. (Ælfred2 reports these validity
168 * errors directly.)</li>
170 * <li> Similarly, the <em>Unique Notation Name</em> VC, added in the
171 * 14-January-2000 XML spec errata to restrict typing models used by
172 * elements, may not be reportable. (Ælfred reports these
173 * validity errors directly.) </li>
177 * <p> A third category relates to ease of implementation. (Think of this
178 * as "bugs".) The most notable issue here is character handling. Rather
179 * than attempting to implement the voluminous character tables in the XML
180 * specification (Appendix B), Unicode rules are used directly from
181 * the java.lang.Character class. Recent JVMs have begun to diverge from
182 * the original specification for that class (Unicode 2.0), meaning that
183 * different JVMs may handle that aspect of conformance differently.
186 * <p> Note that for some of the validity errors that SAX2 does not
187 * expose, a nonvalidating parser is permitted (by the XML specification)
188 * to report validity errors. When used with a parser that does so for
189 * the validity constraints mentioned above (or any other SAX2 event
190 * stream producer that does the same thing), overall conformance is
191 * substantially improved.
193 * @see gnu.xml.aelfred2.SAXDriver
194 * @see gnu.xml.aelfred2.XmlReader
196 * @author David Brownell
198 public final class ValidationConsumer
extends EventFilter
200 // report error if we happen to notice a non-deterministic choice?
201 // we won't report buggy content models; just buggy instances
202 private static final boolean warnNonDeterministic
= false;
204 // for tracking active content models
205 private String rootName
;
206 private Stack contentStack
= new Stack ();
208 // flags for "saved DTD" processing
209 private boolean disableDeclarations
;
210 private boolean disableReset
;
213 // most VCs get tested when we see element start tags. the per-element
214 // info (including attributes) recorded here duplicates that found inside
215 // many nonvalidating parsers, hence dual lookups etc ... that's why a
216 // layered validator isn't going to be as fast as a non-layered one.
219 // key = element name; value = ElementInfo
220 private Hashtable elements
= new Hashtable ();
222 // some VCs relate to ID/IDREF/IDREFS attributes
223 // key = id; value = boolean true (defd) or false (refd)
224 private Hashtable ids
= new Hashtable ();
226 // we just record declared notation and unparsed entity names.
227 // the implementation here is simple/slow; these features
228 // are seldom used, one hopes they'll wither away soon
229 private Vector notations
= new Vector (5, 5);
230 private Vector nDeferred
= new Vector (5, 5);
231 private Vector unparsed
= new Vector (5, 5);
232 private Vector uDeferred
= new Vector (5, 5);
234 // note: DocBk 3.1.7 XML defines over 2 dozen notations,
235 // used when defining unparsed entities for graphics
236 // (and maybe in other places)
241 * Creates a pipeline terminus which consumes all events passed to
242 * it; this will report validity errors as if they were fatal errors,
243 * unless an error handler is assigned.
245 * @see #setErrorHandler
247 // constructor used by PipelineFactory
248 // ... and want one taking system ID of an external subset
249 public ValidationConsumer ()
255 * Creates a pipeline filter which reports validity errors and then
256 * passes events on to the next consumer if they were not fatal.
258 * @see #setErrorHandler
260 // constructor used by PipelineFactory
261 // ... and want one taking system ID of an external subset
262 // (which won't send declaration events)
263 public ValidationConsumer (EventConsumer next
)
267 setContentHandler (this);
268 setDTDHandler (this);
269 try { setProperty (DECL_HANDLER
, this); }
270 catch (Exception e
) { /* "can't happen" */ }
271 try { setProperty (LEXICAL_HANDLER
, this); }
272 catch (Exception e
) { /* "can't happen" */ }
276 private static final String fakeRootName
277 = ":Nobody:in:their_Right.Mind_would:use:this-name:1x:";
280 * Creates a validation consumer which is preloaded with the DTD provided.
281 * It does this by constructing a document with that DTD, then parsing
282 * that document and recording its DTD declarations. Then it arranges
283 * not to modify that information.
285 * <p> The resulting validation consumer will only validate against
286 * the specified DTD, regardless of whether some other DTD is found
287 * in a document being parsed.
289 * @param rootName The name of the required root element; if this is
290 * null, any root element name will be accepted.
291 * @param publicId If non-null and there is a non-null systemId, this
292 * identifier provides an alternate access identifier for the DTD's
294 * @param systemId If non-null, this is a URI (normally URL) that
295 * may be used to access the DTD's external subset.
296 * @param internalSubset If non-null, holds literal markup declarations
297 * comprising the DTD's internal subset.
298 * @param resolver If non-null, this will be provided to the parser for
299 * use when resolving parameter entities (including any external subset).
300 * @param resolver If non-null, this will be provided to the parser for
301 * use when resolving parameter entities (including any external subset).
302 * @param minimalElement If non-null, a minimal valid document.
304 * @exception SAXNotSupportedException If the default SAX parser does
305 * not support the standard lexical or declaration handlers.
306 * @exception SAXParseException If the specified DTD has either
307 * well-formedness or validity errors
308 * @exception IOException If the specified DTD can't be read for
311 public ValidationConsumer (
315 String internalSubset
,
316 EntityResolver resolver
,
317 String minimalDocument
318 ) throws SAXException
, IOException
323 if (rootName
== null)
324 rootName
= fakeRootName
;
327 // Synthesize document with that DTD; is it possible to do
328 // better for the declaration of the root element?
330 // NOTE: can't use SAX2 to write internal subsets.
332 StringWriter writer
= new StringWriter ();
334 writer
.write ("<!DOCTYPE ");
335 writer
.write (rootName
);
336 if (systemId
!= null) {
337 writer
.write ("\n ");
338 if (publicId
!= null) {
339 writer
.write ("PUBLIC '");
340 writer
.write (publicId
);
341 writer
.write ("'\n\t'");
343 writer
.write ("SYSTEM '");
344 writer
.write (systemId
);
347 writer
.write (" [ ");
348 if (rootName
== fakeRootName
) {
349 writer
.write ("\n<!ELEMENT ");
350 writer
.write (rootName
);
351 writer
.write (" EMPTY>");
353 if (internalSubset
!= null)
354 writer
.write (internalSubset
);
355 writer
.write ("\n ]>");
357 if (minimalDocument
!= null) {
359 writer
.write (minimalDocument
);
363 writer
.write (rootName
);
364 writer
.write ("/>\n");
366 minimalDocument
= writer
.toString ();
373 producer
= XMLReaderFactory
.createXMLReader ();
374 bind (producer
, this);
376 if (resolver
!= null)
377 producer
.setEntityResolver (resolver
);
381 in
= new InputSource (new StringReader (minimalDocument
));
384 disableDeclarations
= true;
385 if (rootName
== fakeRootName
)
386 this.rootName
= null;
389 private void resetState ()
393 contentStack
.removeAllElements ();
397 notations
.removeAllElements ();
398 nDeferred
.removeAllElements ();
399 unparsed
.removeAllElements ();
400 uDeferred
.removeAllElements ();
405 private void warning (String description
)
408 ErrorHandler errHandler
= getErrorHandler ();
409 Locator locator
= getDocumentLocator ();
410 SAXParseException err
;
412 if (errHandler
== null)
416 err
= new SAXParseException (description
, null, null, -1, -1);
418 err
= new SAXParseException (description
, locator
);
419 errHandler
.warning (err
);
422 // package private (for ChildrenRecognizer)
423 private void error (String description
)
426 ErrorHandler errHandler
= getErrorHandler ();
427 Locator locator
= getDocumentLocator ();
428 SAXParseException err
;
431 err
= new SAXParseException (description
, null, null, -1, -1);
433 err
= new SAXParseException (description
, locator
);
434 if (errHandler
!= null)
435 errHandler
.error (err
);
436 else // else we always treat it as fatal!
440 private void fatalError (String description
)
443 ErrorHandler errHandler
= getErrorHandler ();
444 Locator locator
= getDocumentLocator ();
445 SAXParseException err
;
448 err
= new SAXParseException (description
, locator
);
450 err
= new SAXParseException (description
, null, null, -1, -1);
451 if (errHandler
!= null)
452 errHandler
.fatalError (err
);
453 // we always treat this as fatal, regardless of the handler
458 private static boolean isExtender (char c
)
460 // [88] Extender ::= ...
461 return c
== 0x00b7 || c
== 0x02d0 || c
== 0x02d1 || c
== 0x0387
462 || c
== 0x0640 || c
== 0x0e46 || c
== 0x0ec6 || c
== 0x3005
463 || (c
>= 0x3031 && c
<= 0x3035)
464 || (c
>= 0x309d && c
<= 0x309e)
465 || (c
>= 0x30fc && c
<= 0x30fe);
469 // use augmented Unicode rules, not full XML rules
470 private boolean isName (String name
, String context
, String id
)
473 char buf
[] = name
.toCharArray ();
476 if (!Character
.isUnicodeIdentifierStart (buf
[0])
477 && ":_".indexOf (buf
[0]) == -1)
480 int max
= buf
.length
;
481 for (int i
= 1; pass
&& i
< max
; i
++) {
483 if (!Character
.isUnicodeIdentifierPart (c
)
484 && ":-_.".indexOf (c
) == -1
491 error ("In " + context
+ " for " + id
492 + ", '" + name
+ "' is not a name");
493 return pass
; // true == OK
496 // use augmented Unicode rules, not full XML rules
497 private boolean isNmtoken (String nmtoken
, String context
, String id
)
500 char buf
[] = nmtoken
.toCharArray ();
502 int max
= buf
.length
;
504 // XXX make this share code with isName
506 for (int i
= 0; pass
&& i
< max
; i
++) {
508 if (!Character
.isUnicodeIdentifierPart (c
)
509 && ":-_.".indexOf (c
) == -1
515 error ("In " + context
+ " for " + id
516 + ", '" + nmtoken
+ "' is not a name token");
517 return pass
; // true == OK
520 private void checkEnumeration (String value
, String type
, String name
)
523 if (!hasMatch (value
, type
))
525 error ("Value '" + value
526 + "' for attribute '" + name
527 + "' is not permitted: " + type
);
530 // used to test enumerated attributes and mixed content models
532 static boolean hasMatch (String value
, String orList
)
534 int len
= value
.length ();
535 int max
= orList
.length () - len
;
538 (start
= orList
.indexOf (value
, start
)) != -1;
544 c
= orList
.charAt (start
- 1);
545 if (c
!= '|' && c
!= '('/*)*/)
547 c
= orList
.charAt (start
+ len
);
548 if (c
!= '|' && /*(*/ c
!= ')')
556 * <b>LexicalHandler</b> Records the declaration of the root
557 * element, so it can be verified later.
558 * Passed to the next consumer, unless this one was
559 * preloaded with a particular DTD.
561 public void startDTD (String name
, String publicId
, String systemId
)
564 if (disableDeclarations
)
568 super.startDTD (name
, publicId
, systemId
);
572 * <b>LexicalHandler</b> Verifies that all referenced notations
573 * and unparsed entities have been declared.
574 * Passed to the next consumer, unless this one was
575 * preloaded with a particular DTD.
577 public void endDTD ()
580 if (disableDeclarations
)
583 // this is a convenient hook for end-of-dtd checks, but we
584 // could also trigger it in the first startElement call.
585 // locator info is more appropriate here though.
587 // VC: Notation Declared (NDATA can refer to them before decls,
588 // as can NOTATION attribute enumerations and defaults)
589 int length
= nDeferred
.size ();
590 for (int i
= 0; i
< length
; i
++) {
591 String notation
= (String
) nDeferred
.elementAt (i
);
592 if (!notations
.contains (notation
)) {
593 error ("A declaration referred to notation '" + notation
594 + "' which was never declared");
597 nDeferred
.removeAllElements ();
599 // VC: Entity Name (attribute values can refer to them
600 // before they're declared); VC Attribute Default Legal
601 length
= uDeferred
.size ();
602 for (int i
= 0; i
< length
; i
++) {
603 String entity
= (String
) uDeferred
.elementAt (i
);
604 if (!unparsed
.contains (entity
)) {
605 error ("An attribute default referred to entity '" + entity
606 + "' which was never declared");
609 uDeferred
.removeAllElements ();
614 // These are interned, so we can rely on "==" to find the type of
615 // all attributes except enumerations ...
616 // "(this|or|that|...)" and "NOTATION (this|or|that|...)"
617 static final String types
[] = {
619 "ID", "IDREF", "IDREFS",
620 "NMTOKEN", "NMTOKENS",
626 * <b>DecllHandler</b> Records attribute declaration for later use
627 * in validating document content, and checks validity constraints
628 * that are applicable to attribute declarations.
629 * Passed to the next consumer, unless this one was
630 * preloaded with a particular DTD.
632 public void attributeDecl (
638 ) throws SAXException
640 if (disableDeclarations
)
643 ElementInfo info
= (ElementInfo
) elements
.get (eName
);
644 AttributeInfo ainfo
= new AttributeInfo ();
645 boolean checkOne
= false;
646 boolean interned
= false;
648 // cheap interning of type names and #FIXED, #REQUIRED
649 // for faster startElement (we can use "==")
650 for (int i
= 0; i
< types
.length
; i
++) {
651 if (types
[i
].equals (type
)) {
657 if ("#FIXED".equals (mode
))
659 else if ("#REQUIRED".equals (mode
))
666 // we might not have seen the content model yet
668 info
= new ElementInfo (eName
);
669 elements
.put (eName
, info
);
673 if (!("#REQUIRED" == mode
|| "#IMPLIED".equals (mode
))) {
674 // VC: ID Attribute Default
675 error ("ID attribute '" + aName
676 + "' must be #IMPLIED or #REQUIRED");
679 } else if (!interned
&& type
.startsWith ("NOTATION ")) {
682 // VC: Notation Attributes (notations must be declared)
683 StringTokenizer tokens
= new StringTokenizer (
684 type
.substring (10, type
.lastIndexOf (')')),
686 while (tokens
.hasMoreTokens ()) {
687 String token
= tokens
.nextToken ();
688 if (!notations
.contains (token
))
689 nDeferred
.addElement (token
);
693 for (Enumeration e
= info
.attributes
.keys ();
694 e
.hasMoreElements ();
697 AttributeInfo ainfo2
;
699 name
= (String
) e
.nextElement ();
700 ainfo2
= (AttributeInfo
) info
.attributes
.get (name
);
701 if (type
== ainfo2
.type
|| !interned
/* NOTATION */) {
702 // VC: One ID per Element Type
703 // VC: One Notation per Element TYpe
704 error ("Element '" + eName
705 + "' already has an attribute of type "
706 + (interned ?
"NOTATION" : type
)
709 + "' is a validity error");
714 // VC: Attribute Default Legal
717 if ("CDATA" == type
) {
718 // event source rejected '<'
720 } else if ("NMTOKEN" == type
) {
721 // VC: Name Token (is a nmtoken)
722 isNmtoken (value
, "attribute default", aName
);
724 } else if ("NMTOKENS" == type
) {
725 // VC: Name Token (is a nmtoken; at least one value)
726 StringTokenizer tokens
= new StringTokenizer (value
);
727 if (!tokens
.hasMoreTokens ())
728 error ("Default for attribute '" + aName
729 + "' must have at least one name token.");
731 String token
= tokens
.nextToken ();
732 isNmtoken (token
, "attribute default", aName
);
733 } while (tokens
.hasMoreTokens ());
735 } else if ("IDREF" == type
|| "ENTITY" == type
) {
736 // VC: Entity Name (is a name)
737 // VC: IDREF (is a name) (is declared)
738 isName (value
, "attribute default", aName
);
739 if ("ENTITY" == type
&& !unparsed
.contains (value
))
740 uDeferred
.addElement (value
);
742 } else if ("IDREFS" == type
|| "ENTITIES" == type
) {
743 // VC: Entity Name (is a name; at least one value)
744 // VC: IDREF (is a name; at least one value)
745 StringTokenizer names
= new StringTokenizer (value
);
746 if (!names
.hasMoreTokens ())
747 error ("Default for attribute '" + aName
748 + "' must have at least one name.");
750 String name
= names
.nextToken ();
751 isName (name
, "attribute default", aName
);
752 if ("ENTITIES" == type
&& !unparsed
.contains (name
))
753 uDeferred
.addElement (value
);
754 } while (names
.hasMoreTokens ());
756 } else if (type
.charAt (0) == '(' /*)*/ ) {
757 // VC: Enumeration (must match)
758 checkEnumeration (value
, type
, aName
);
760 } else if (!interned
&& checkOne
) { /* NOTATION */
761 // VC: Notation attributes (must be names)
762 isName (value
, "attribute default", aName
);
764 // VC: Notation attributes (must be declared)
765 if (!notations
.contains (value
))
766 nDeferred
.addElement (value
);
768 // VC: Enumeration (must match)
769 checkEnumeration (value
, type
, aName
);
771 } else if ("ID" != type
)
772 throw new RuntimeException ("illegal attribute type: " + type
);
775 if (info
.attributes
.get (aName
) == null)
776 info
.attributes
.put (aName
, ainfo
);
779 warning ("Element '" + eName
780 + "' already has an attribute named '" + aName + "'");
783 if ("xml:space".equals (aName
)) {
784 if (!("(default|preserve)".equals (type
)
785 || "(preserve|default)".equals (type
)
786 // these next two are arguable; XHTML's DTD doesn't
787 // deserve errors. After all, it's not like any
788 // illegal _value_ could pass ...
789 || "(preserve)".equals (type
)
790 || "(default)".equals (type
)
793 "xml:space attribute type must be like '(default|preserve)'"
794 + " not '" + type
+ "'"
798 super.attributeDecl (eName
, aName
, type
, mode
, value
);
802 * <b>DecllHandler</b> Records the element declaration for later use
803 * when checking document content, and checks validity constraints that
804 * apply to element declarations. Passed to the next consumer, unless
805 * this one was preloaded with a particular DTD.
807 public void elementDecl (String name
, String model
)
810 if (disableDeclarations
)
813 ElementInfo info
= (ElementInfo
) elements
.get (name
);
815 // we might have seen an attribute decl already
817 info
= new ElementInfo (name
);
818 elements
.put (name
, info
);
820 if (info
.model
!= null) {
821 // NOTE: not all parsers can report such duplicates.
822 // VC: Unique Element Type Declaration
823 error ("Element type '" + name
824 + "' was already declared.");
828 // VC: No Duplicate Types (in mixed content models)
829 if (model
.charAt (1) == '#') // (#PCDATA...
830 info
.getRecognizer (this);
832 super.elementDecl (name
, model
);
836 * <b>DecllHandler</b> passed to the next consumer, unless this
837 * one was preloaded with a particular DTD
839 public void internalEntityDecl (String name
, String value
)
842 if (!disableDeclarations
)
843 super.internalEntityDecl (name
, value
);
847 * <b>DecllHandler</b> passed to the next consumer, unless this
848 * one was preloaded with a particular DTD
850 public void externalEntityDecl (String name
,
851 String publicId
, String systemId
)
854 if (!disableDeclarations
)
855 super.externalEntityDecl (name
, publicId
, systemId
);
860 * <b>DTDHandler</b> Records the notation name, for checking
861 * NOTATIONS attribute values and declararations of unparsed
862 * entities. Passed to the next consumer, unless this one was
863 * preloaded with a particular DTD.
865 public void notationDecl (String name
, String publicId
, String systemId
)
868 if (disableDeclarations
)
871 notations
.addElement (name
);
872 super.notationDecl (name
, publicId
, systemId
);
876 * <b>DTDHandler</b> Records the entity name, for checking
877 * ENTITY and ENTITIES attribute values; records the notation
878 * name if it hasn't yet been declared. Passed to the next consumer,
879 * unless this one was preloaded with a particular DTD.
881 public void unparsedEntityDecl (
886 ) throws SAXException
888 if (disableDeclarations
)
891 unparsed
.addElement (name
);
892 if (!notations
.contains (notationName
))
893 nDeferred
.addElement (notationName
);
894 super.unparsedEntityDecl (name
, publicId
, systemId
, notationName
);
899 * <b>ContentHandler</b> Ensures that state from any previous parse
901 * Passed to the next consumer.
903 public void startDocument ()
907 super.startDocument ();
911 private static boolean isAsciiLetter (char c
)
913 return (c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z');
918 * <b>ContentHandler</b> Reports a fatal exception. Validating
919 * XML processors may not skip any entities.
921 public void skippedEntity (String name
)
924 fatalError ("may not skip entities");
928 * SAX2 doesn't expand non-PE refs in attribute defaults...
930 private String
expandDefaultRefs (String s
)
933 if (s
.indexOf ('&') < 0)
936 // FIXME: handle &#nn; &#xnn; &name;
937 String message
= "Can't expand refs in attribute default: " + s
;
944 * <b>ContentHandler</b> Performs validity checks against element
945 * (and document) content models, and attribute values.
946 * Passed to the next consumer.
948 public void startElement (
953 ) throws SAXException
956 // First check content model for the enclosing scope.
958 if (contentStack
.isEmpty ()) {
959 // VC: Root Element Type
960 if (!qName
.equals (rootName
)) {
961 if (rootName
== null)
962 warning ("This document has no DTD, can't be valid");
964 error ("Root element type '" + qName
965 + "' was declared to be '" + rootName
+ "'");
968 Recognizer state
= (Recognizer
) contentStack
.peek ();
971 Recognizer newstate
= state
.acceptElement (qName
);
973 if (newstate
== null)
974 error ("Element type '" + qName
975 + "' in element '" + state
.type
.name
976 + "' violates content model " + state
.type
.model
978 if (newstate
!= state
) {
980 contentStack
.push (newstate
);
986 // Then check that this element was declared, and push the
987 // object used to validate its content model onto our stack.
989 // This is where the recognizer gets created, if needed; if
990 // it's a "children" (elements) content model, an NDFA is
991 // created. (One recognizer is used per content type, no
992 // matter how complex that recognizer is.)
996 info
= (ElementInfo
) elements
.get (qName
);
997 if (info
== null || info
.model
== null) {
998 // VC: Element Valid (base clause)
999 error ("Element type '" + qName
+ "' was not declared");
1000 contentStack
.push (null);
1002 // for less diagnostic noise, fake a declaration.
1003 elementDecl (qName
, "ANY");
1005 contentStack
.push (info
.getRecognizer (this));
1008 // Then check each attribute present
1012 AttributeInfo ainfo
;
1015 len
= atts
.getLength ();
1019 for (int i
= 0; i
< len
; i
++) {
1020 aname
= atts
.getQName (i
);
1023 || (ainfo
= (AttributeInfo
) info
.attributes
.get (aname
))
1025 // VC: Attribute Value Type
1026 error ("Attribute '" + aname
1027 + "' was not declared for element type " + qName
);
1031 String value
= atts
.getValue (i
);
1033 // note that "==" for type names and "#FIXED" is correct
1034 // (and fast) since we've interned those literals.
1036 if ("#FIXED" == ainfo
.mode
) {
1037 String expanded
= expandDefaultRefs (ainfo
.value
);
1039 // VC: Fixed Attribute Default
1040 if (!value
.equals (expanded
)) {
1041 error ("Attribute '" + aname
1042 + "' must match " + expanded
1048 if ("CDATA" == ainfo
.type
)
1052 // For all other attribute types, there are various
1056 if ("ID" == ainfo
.type
) {
1057 // VC: ID (must be a name)
1058 if (isName (value
, "ID attribute", aname
)) {
1059 if (Boolean
.TRUE
== ids
.get (value
))
1060 // VC: ID (appears once)
1061 error ("ID attribute " + aname
1062 + " uses an ID value '" + value
1063 + "' which was already declared.");
1065 // any forward refs are no longer problems
1066 ids
.put (value
, Boolean
.TRUE
);
1071 if ("IDREF" == ainfo
.type
) {
1072 // VC: IDREF (value must be a name)
1073 if (isName (value
, "IDREF attribute", aname
)) {
1074 // VC: IDREF (must match some ID attribute)
1075 if (ids
.get (value
) == null)
1076 // new -- assume it's a forward ref
1077 ids
.put (value
, Boolean
.FALSE
);
1082 if ("IDREFS" == ainfo
.type
) {
1083 StringTokenizer tokens
= new StringTokenizer (value
, " ");
1085 if (!tokens
.hasMoreTokens ()) {
1086 // VC: IDREF (one or more values)
1087 error ("IDREFS attribute " + aname
1088 + " must have at least one ID ref");
1090 String id
= tokens
.nextToken ();
1092 // VC: IDREF (value must be a name)
1093 if (isName (id
, "IDREFS attribute", aname
)) {
1094 // VC: IDREF (must match some ID attribute)
1095 if (ids
.get (id
) == null)
1096 // new -- assume it's a forward ref
1097 ids
.put (id
, Boolean
.FALSE
);
1099 } while (tokens
.hasMoreTokens ());
1103 if ("NMTOKEN" == ainfo
.type
) {
1104 // VC: Name Token (is a name token)
1105 isNmtoken (value
, "NMTOKEN attribute", aname
);
1109 if ("NMTOKENS" == ainfo
.type
) {
1110 StringTokenizer tokens
= new StringTokenizer (value
, " ");
1112 if (!tokens
.hasMoreTokens ()) {
1113 // VC: Name Token (one or more values)
1114 error ("NMTOKENS attribute " + aname
1115 + " must have at least one name token");
1117 String token
= tokens
.nextToken ();
1119 // VC: Name Token (is a name token)
1120 isNmtoken (token
, "NMTOKENS attribute", aname
);
1121 } while (tokens
.hasMoreTokens ());
1125 if ("ENTITY" == ainfo
.type
) {
1126 if (!unparsed
.contains (value
))
1128 error ("Value of attribute '" + aname
1129 + "' refers to unparsed entity '" + value
1130 + "' which was not declared.");
1134 if ("ENTITIES" == ainfo
.type
) {
1135 StringTokenizer tokens
= new StringTokenizer (value
, " ");
1137 if (!tokens
.hasMoreTokens ()) {
1138 // VC: Entity Name (one or more values)
1139 error ("ENTITIES attribute " + aname
1140 + " must have at least one name token");
1142 String entity
= tokens
.nextToken ();
1144 if (!unparsed
.contains (entity
))
1146 error ("Value of attribute '" + aname
1147 + "' refers to unparsed entity '" + entity
1148 + "' which was not declared.");
1149 } while (tokens
.hasMoreTokens ());
1154 // check for enumerations last; more expensive
1156 if (ainfo
.type
.charAt (0) == '(' /*)*/
1157 || ainfo
.type
.startsWith ("NOTATION ")
1159 // VC: Enumeration (value must be defined)
1160 checkEnumeration (value
, ainfo
.type
, aname
);
1166 // Last, check that all #REQUIRED attributes were provided
1169 Hashtable table
= info
.attributes
;
1171 if (table
.size () != 0) {
1172 Enumeration e
= table
.keys ();
1174 // XXX table.keys uses the heap, bleech -- slows things
1176 while (e
.hasMoreElements ()) {
1177 aname
= (String
) e
.nextElement ();
1178 ainfo
= (AttributeInfo
) table
.get (aname
);
1180 // "#REQUIRED" mode was interned in attributeDecl
1181 if ("#REQUIRED" == ainfo
.mode
1182 && atts
.getValue (aname
) == null) {
1183 // VC: Required Attribute
1184 error ("Attribute '" + aname
+ "' must be specified "
1185 + "for element type " + qName
);
1190 super.startElement (uri
, localName
, qName
, atts
);
1194 * <b>ContentHandler</b> Reports a validity error if the element's content
1195 * model does not permit character data.
1196 * Passed to the next consumer.
1198 public void characters (char ch
[], int start
, int length
)
1203 if (contentStack
.empty ())
1206 state
= (Recognizer
) contentStack
.peek ();
1208 // NOTE: if this ever supports with SAX parsers that don't
1209 // report ignorable whitespace as such (only XP?), this class
1210 // needs to morph it into ignorableWhitespace() as needed ...
1212 if (state
!= null && !state
.acceptCharacters ())
1213 // VC: Element Valid (clauses three, four -- see recognizer)
1214 error ("Character content not allowed in element "
1217 super.characters (ch
, start
, length
);
1222 * <b>ContentHandler</b> Reports a validity error if the element's content
1223 * model does not permit end-of-element yet, or a well formedness error
1224 * if there was no matching startElement call.
1225 * Passed to the next consumer.
1227 public void endElement (String uri
, String localName
, String qName
)
1231 Recognizer state
= (Recognizer
) contentStack
.pop ();
1233 if (state
!= null && !state
.completed ())
1234 // VC: Element valid (clauses two, three, four; see Recognizer)
1235 error ("Premature end for element '"
1237 + "', content model "
1238 + state
.type
.model
);
1240 // could insist on match of start element, but that's
1241 // something the input stream must to guarantee.
1243 } catch (EmptyStackException e
) {
1244 fatalError ("endElement without startElement: " + qName
1247 : ( " { '" + uri
+ "', " + localName
+ " }")));
1249 super.endElement (uri
, localName
, qName
);
1253 * <b>ContentHandler</b> Checks whether all ID values that were
1254 * referenced have been declared, and releases all resources.
1255 * Passed to the next consumer.
1257 * @see #setDocumentLocator
1259 public void endDocument ()
1262 for (Enumeration idNames
= ids
.keys ();
1263 idNames
.hasMoreElements ();
1265 String id
= (String
) idNames
.nextElement ();
1267 if (Boolean
.FALSE
== ids
.get (id
)) {
1268 // VC: IDREF (must match ID)
1269 error ("Undeclared ID value '" + id
1270 + "' was referred to by an IDREF/IDREFS attribute");
1275 super.endDocument ();
1279 /** Holds per-element declarations */
1280 static private final class ElementInfo
1285 // key = attribute name; value = AttributeInfo
1286 Hashtable attributes
= new Hashtable (11);
1288 ElementInfo (String n
) { name
= n
; }
1290 private Recognizer recognizer
;
1292 // for validating content models: one per type, shared,
1293 // and constructed only on demand ... so unused elements do
1294 // not need to consume resources.
1295 Recognizer
getRecognizer (ValidationConsumer consumer
)
1298 if (recognizer
== null) {
1299 if ("ANY".equals (model
))
1301 else if ("EMPTY".equals (model
))
1302 recognizer
= new EmptyRecognizer (this);
1303 else if ('#' == model
.charAt (1))
1304 // n.b. this constructor does a validity check
1305 recognizer
= new MixedRecognizer (this, consumer
);
1307 recognizer
= new ChildrenRecognizer (this, consumer
);
1313 /** Holds per-attribute declarations */
1314 static private final class AttributeInfo
1317 String mode
; // #REQUIRED, etc (or null)
1318 String value
; // or null
1323 // Content model validation
1326 static private final Recognizer ANY
= new Recognizer (null);
1329 // Base class defines the calls used to validate content,
1330 // and supports the "ANY" content model
1331 static private class Recognizer
1333 final ElementInfo type
;
1335 Recognizer (ElementInfo t
) { type
= t
; }
1337 // return true iff character data is legal here
1338 boolean acceptCharacters ()
1340 // VC: Element Valid (third and fourth clauses)
1343 // null return = failure
1344 // otherwise, next state (like an FSM)
1345 // prerequisite: tested that name was declared
1346 Recognizer
acceptElement (String name
)
1348 // VC: Element Valid (fourth clause)
1351 // return true iff model is completed, can finish
1352 boolean completed ()
1354 // VC: Element Valid (fourth clause)
1357 public String
toString ()
1358 // n.b. "children" is the interesting case!
1359 { return (type
== null) ?
"ANY" : type
.model
; }
1362 // "EMPTY" content model -- no characters or elements
1363 private static final class EmptyRecognizer
extends Recognizer
1365 public EmptyRecognizer (ElementInfo type
)
1368 // VC: Element Valid (first clause)
1369 boolean acceptCharacters ()
1372 // VC: Element Valid (first clause)
1373 Recognizer
acceptElement (String name
)
1377 // "Mixed" content model -- ANY, but restricts elements
1378 private static final class MixedRecognizer
extends Recognizer
1380 private String permitted
[];
1382 // N.B. constructor tests for duplicated element names (VC)
1383 public MixedRecognizer (ElementInfo t
, ValidationConsumer v
)
1388 // (#PCDATA...)* or (#PCDATA) ==> ... or empty
1389 // with the "..." being "|elname|..."
1390 StringTokenizer tokens
= new StringTokenizer (
1391 t
.model
.substring (8, t
.model
.lastIndexOf (')')),
1393 Vector vec
= new Vector ();
1395 while (tokens
.hasMoreTokens ()) {
1396 String token
= tokens
.nextToken ();
1398 if (vec
.contains (token
))
1399 v
.error ("element " + token
1400 + " is repeated in mixed content model: "
1403 vec
.addElement (token
.intern ());
1405 permitted
= new String
[vec
.size ()];
1406 for (int i
= 0; i
< permitted
.length
; i
++)
1407 permitted
[i
] = (String
) vec
.elementAt (i
);
1409 // in one large machine-derived DTD sample, most of about
1410 // 250 mixed content models were empty, and 25 had ten or
1411 // more entries. 2 had over a hundred elements. Linear
1412 // search isn't obviously wrong.
1415 // VC: Element Valid (third clause)
1416 Recognizer
acceptElement (String name
)
1418 int length
= permitted
.length
;
1420 // first pass -- optimistic w.r.t. event source interning
1421 // (and document validity)
1422 for (int i
= 0; i
< length
; i
++)
1423 if (permitted
[i
] == name
)
1425 // second pass -- pessimistic w.r.t. event source interning
1426 for (int i
= 0; i
< length
; i
++)
1427 if (permitted
[i
].equals (name
))
1434 // recognizer loop flags, see later
1435 private static final int F_LOOPHEAD
= 0x01;
1436 private static final int F_LOOPNEXT
= 0x02;
1438 // for debugging -- used to label/count nodes in toString()
1439 private static int nodeCount
;
1442 * "Children" content model -- these are nodes in NDFA state graphs.
1443 * They work in fixed space. Note that these graphs commonly have
1444 * cycles, handling features such as zero-or-more and one-or-more.
1446 * <p>It's readonly, so only one copy is ever needed. The content model
1447 * stack may have any number of pointers into each graph, when a model
1448 * happens to be needed more than once due to element nesting. Since
1449 * traversing the graph just moves to another node, and never changes
1450 * it, traversals never interfere with each other.
1452 * <p>There is an option to report non-deterministic models. These are
1453 * always XML errors, but ones which are not often reported despite the
1454 * fact that they can lead to different validating parsers giving
1455 * different results for the same input. (The XML spec doesn't require
1456 * them to be reported.)
1458 * <p><b>FIXME</b> There's currently at least one known bug here, in that
1459 * it's not actually detecting the non-determinism it tries to detect.
1460 * (Of the "optional.xml" test, the once-or-twice-2* tests are all non-D;
1461 * maybe some others.) This may relate to the issue flagged below as
1462 * "should not" happen (but it was), which showed up when patching the
1463 * graph to have one exit node (or more EMPTY nodes).
1465 private static final class ChildrenRecognizer
extends Recognizer
1466 implements Cloneable
1468 // for reporting non-deterministic content models
1469 // ... a waste of space if we're not reporting those!
1470 // ... along with the 'model' member (in base class)
1471 private ValidationConsumer consumer
;
1473 // for CHOICE nodes -- each component is an arc that
1474 // accepts a different NAME (or is EMPTY indicating
1475 // NDFA termination).
1476 private Recognizer components
[];
1478 // for NAME/SEQUENCE nodes -- accepts that NAME and
1479 // then goes to the next node (CHOICE, NAME, EMPTY).
1480 private String name
;
1481 private Recognizer next
;
1483 // loops always point back to a CHOICE node. we mark such choice
1484 // nodes (F_LOOPHEAD) for diagnostics and faster deep cloning.
1485 // We also mark nodes before back pointers (F_LOOPNEXT), to ensure
1486 // termination when we patch sequences and loops.
1490 // prevent a needless indirection between 'this' and 'node'
1491 private void copyIn (ChildrenRecognizer node
)
1493 // model & consumer are already set
1494 components
= node
.components
;
1500 // used to construct top level "children" content models,
1501 public ChildrenRecognizer (ElementInfo type
, ValidationConsumer vc
)
1504 populate (type
.model
.toCharArray (), 0);
1505 patchNext (new EmptyRecognizer (type
), null);
1508 // used internally; populating is separate
1509 private ChildrenRecognizer (ValidationConsumer vc
, ElementInfo type
)
1517 // When rewriting some graph nodes we need deep clones in one case;
1518 // mostly shallow clones (what the JVM handles for us) are fine.
1520 private ChildrenRecognizer
shallowClone ()
1523 return (ChildrenRecognizer
) clone ();
1524 } catch (CloneNotSupportedException e
) {
1525 throw new Error ("clone");
1529 private ChildrenRecognizer
deepClone ()
1531 return deepClone (new Hashtable (37));
1534 private ChildrenRecognizer
deepClone (Hashtable table
)
1536 ChildrenRecognizer retval
;
1538 if ((flags
& F_LOOPHEAD
) != 0) {
1539 retval
= (ChildrenRecognizer
) table
.get (this);
1543 retval
= shallowClone ();
1544 table
.put (this, retval
);
1546 retval
= shallowClone ();
1549 if (next
instanceof ChildrenRecognizer
)
1550 retval
.next
= ((ChildrenRecognizer
)next
)
1552 else if (!(next
instanceof EmptyRecognizer
))
1553 throw new RuntimeException ("deepClone");
1556 if (components
!= null) {
1557 retval
.components
= new Recognizer
[components
.length
];
1558 for (int i
= 0; i
< components
.length
; i
++) {
1559 Recognizer temp
= components
[i
];
1562 retval
.components
[i
] = null;
1563 else if (temp
instanceof ChildrenRecognizer
)
1564 retval
.components
[i
] = ((ChildrenRecognizer
)temp
)
1566 else if (!(temp
instanceof EmptyRecognizer
))
1567 throw new RuntimeException ("deepClone");
1574 // connect subgraphs, first to next (sequencing)
1575 private void patchNext (Recognizer theNext
, Hashtable table
)
1577 // backpointers must not be repatched or followed
1578 if ((flags
& F_LOOPNEXT
) != 0)
1581 // XXX this table "shouldn't" be needed, right?
1582 // but some choice nodes looped if it isn't there.
1583 if (table
!= null && table
.get (this) != null)
1586 table
= new Hashtable ();
1592 else if (next
instanceof ChildrenRecognizer
) {
1593 ((ChildrenRecognizer
)next
).patchNext (theNext
, table
);
1594 } else if (!(next
instanceof EmptyRecognizer
))
1595 throw new RuntimeException ("patchNext");
1600 for (int i
= 0; i
< components
.length
; i
++) {
1601 if (components
[i
] == null)
1602 components
[i
] = theNext
;
1603 else if (components
[i
] instanceof ChildrenRecognizer
) {
1604 ((ChildrenRecognizer
)components
[i
])
1605 .patchNext (theNext
, table
);
1606 } else if (!(components
[i
] instanceof EmptyRecognizer
))
1607 throw new RuntimeException ("patchNext");
1610 if (table
!= null && (flags
& F_LOOPHEAD
) != 0)
1611 table
.put (this, this);
1615 * Parses a 'children' spec (or recursively 'cp') and makes this
1616 * become a regular graph node.
1618 * @return index after this particle
1620 private int populate (char parseBuf
[], int startPos
)
1622 int nextPos
= startPos
+ 1;
1625 if (nextPos
< 0 || nextPos
>= parseBuf
.length
)
1626 throw new IndexOutOfBoundsException ();
1628 // Grammar of the string is from the XML spec, but
1629 // with whitespace removed by the SAX parser.
1631 // children ::= (choice | seq) ('?' | '*' | '+')?
1632 // cp ::= (Name | choice | seq) ('?' | '*' | '+')?
1633 // choice ::= '(' cp ('|' choice)* ')'
1634 // seq ::= '(' cp (',' choice)* ')'
1636 // interior nodes only
1638 if (parseBuf
[startPos
] != '('/*)*/) {
1639 boolean done
= false;
1641 switch (c
= parseBuf
[nextPos
]) {
1642 case '?': case '*': case '+':
1652 name
= new String (parseBuf
, startPos
, nextPos
- startPos
);
1654 // interior OR toplevel nodes
1658 // collect everything as a separate list, and merge it
1659 // into "this" later if we can (SEQUENCE or singleton)
1660 ChildrenRecognizer first
;
1662 first
= new ChildrenRecognizer (consumer
, type
);
1663 nextPos
= first
.populate (parseBuf
, nextPos
);
1664 c
= parseBuf
[nextPos
++];
1666 if (c
== ',' || c
== '|') {
1667 ChildrenRecognizer current
= first
;
1671 if (separator
== '|') {
1673 v
.addElement (first
);
1677 ChildrenRecognizer link
;
1679 link
= new ChildrenRecognizer (consumer
, type
);
1680 nextPos
= link
.populate (parseBuf
, nextPos
);
1682 if (separator
== ',') {
1683 current
.patchNext (link
, null);
1686 v
.addElement (link
);
1688 c
= parseBuf
[nextPos
++];
1689 } while (c
== separator
);
1691 // choice ... collect everything into one array.
1692 if (separator
== '|') {
1693 // assert v.size() > 1
1694 components
= new Recognizer
[v
.size ()];
1695 for (int i
= 0; i
< components
.length
; i
++) {
1696 components
[i
] = (Recognizer
)
1699 // assert flags == 0
1701 // sequence ... merge into "this" to be smaller.
1705 // treat singletons like one-node sequences.
1710 throw new RuntimeException ("corrupt content model");
1714 // Arity is optional, and the root of all fun. We keep the
1715 // FSM state graph simple by only having NAME/SEQUENCE and
1716 // CHOICE nodes (or EMPTY to terminate a model), easily
1717 // evaluated. So we rewrite each node that has arity, using
1718 // those primitives. We create loops here, if needed.
1720 if (nextPos
< parseBuf
.length
) {
1721 c
= parseBuf
[nextPos
];
1722 if (c
== '?' || c
== '*' || c
== '+') {
1725 // Rewrite 'zero-or-one' "?" arity to a CHOICE:
1726 // - SEQUENCE (clone, what's next)
1727 // - or, what's next
1728 // Size cost: N --> N + 1
1730 Recognizer once
= shallowClone ();
1732 components
= new Recognizer
[2];
1733 components
[0] = once
;
1734 // components [1] initted to null
1740 // Rewrite 'zero-or-more' "*" arity to a CHOICE.
1741 // - LOOP (clone, back to this CHOICE)
1742 // - or, what's next
1743 // Size cost: N --> N + 1
1744 } else if (c
== '*') {
1745 ChildrenRecognizer loop
= shallowClone ();
1747 loop
.patchNext (this, null);
1748 loop
.flags
|= F_LOOPNEXT
;
1751 components
= new Recognizer
[2];
1752 components
[0] = loop
;
1753 // components [1] initted to null
1758 // Rewrite 'one-or-more' "+" arity to a SEQUENCE.
1759 // Basically (a)+ --> ((a),(a)*).
1762 // * LOOP (clone, back to the CHOICE)
1763 // * or, whatever's next
1764 // Size cost: N --> 2N + 1
1765 } else if (c
== '+') {
1766 ChildrenRecognizer loop
= deepClone ();
1767 ChildrenRecognizer choice
;
1769 choice
= new ChildrenRecognizer (consumer
, type
);
1770 loop
.patchNext (choice
, null);
1771 loop
.flags
|= F_LOOPNEXT
;
1772 choice
.flags
= F_LOOPHEAD
;
1774 choice
.components
= new Recognizer
[2];
1775 choice
.components
[0] = loop
;
1776 // choice.components [1] initted to null
1777 // choice.name, choice.next initted to null
1779 patchNext (choice
, null);
1787 // VC: Element Valid (second clause)
1788 boolean acceptCharacters ()
1791 // VC: Element Valid (second clause)
1792 Recognizer
acceptElement (String type
)
1797 if (name
.equals (type
))
1802 // CHOICE ... optionally reporting nondeterminism we
1803 // run across. we won't check out every transition
1804 // for nondeterminism; only the ones we follow.
1805 Recognizer retval
= null;
1807 for (int i
= 0; i
< components
.length
; i
++) {
1808 Recognizer temp
= components
[i
].acceptElement (type
);
1812 else if (!warnNonDeterministic
)
1814 else if (retval
== null)
1816 else if (retval
!= temp
)
1817 consumer
.error ("Content model " + this.type
.model
1818 + " is non-deterministic for " + type
);
1823 // VC: Element Valid (second clause)
1824 boolean completed ()
1827 // expecting a specific element
1831 // choice, some sequences
1832 for (int i
= 0; i
< components
.length
; i
++) {
1833 if (components
[i
].completed ())
1841 // FOR DEBUGGING ... flattens the graph for printing.
1843 public String toString ()
1845 StringBuffer buf = new StringBuffer ();
1847 // only one set of loop labels can be generated
1849 synchronized (ANY) {
1852 toString (buf, new Hashtable ());
1853 return buf.toString ();
1857 private void toString (StringBuffer buf, Hashtable table)
1859 // When we visit a node, label and count it.
1860 // Nodes are never visited/counted more than once.
1861 // For small models labels waste space, but if arity
1862 // mappings were used the savings are substantial.
1863 // (Plus, the output can be more readily understood.)
1864 String temp = (String) table.get (this);
1872 StringBuffer scratch = new StringBuffer (15);
1874 if ((flags & F_LOOPHEAD) != 0)
1875 scratch.append ("loop");
1877 scratch.append ("node");
1878 scratch.append ('-');
1879 scratch.append (++nodeCount);
1880 temp = scratch.toString ();
1882 table.put (this, temp);
1891 // n.b. some output encodings turn some name chars into '?'
1892 // e.g. with Japanese names and ASCII output
1894 if (components != null) // bug!
1898 else if (next instanceof EmptyRecognizer) // patch-to-next
1900 else if (next instanceof ChildrenRecognizer) {
1902 ((ChildrenRecognizer)next).toString (buf, table);
1910 for (int i = 0; i < components.length; i++) {
1913 if (components [i] instanceof EmptyRecognizer) {
1915 } else if (components [i] == null) { // patch-to-next
1918 ChildrenRecognizer r;
1920 r = (ChildrenRecognizer) components [i];
1921 r.toString (buf, table);