1 /* StreamSerializer.java --
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
38 package gnu
.xml
.transform
;
40 import java
.io
.ByteArrayOutputStream
;
41 import java
.io
.IOException
;
42 import java
.io
.OutputStream
;
43 import java
.io
.UnsupportedEncodingException
;
44 import java
.nio
.ByteBuffer
;
45 import java
.nio
.CharBuffer
;
46 import java
.nio
.charset
.Charset
;
47 import java
.nio
.charset
.CharsetEncoder
;
48 import java
.util
.Collection
;
49 import java
.util
.Collections
;
50 import java
.util
.HashMap
;
51 import java
.util
.HashSet
;
53 import javax
.xml
.XMLConstants
;
54 import org
.w3c
.dom
.Attr
;
55 import org
.w3c
.dom
.Document
;
56 import org
.w3c
.dom
.DocumentType
;
57 import org
.w3c
.dom
.NamedNodeMap
;
58 import org
.w3c
.dom
.Node
;
61 * Serializes a DOM node to an output stream.
63 * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
65 public class StreamSerializer
68 static final int SPACE
= 0x20;
69 static final int BANG
= 0x21; // !
70 static final int APOS
= 0x27; // '
71 static final int SLASH
= 0x2f; // /
72 static final int BRA
= 0x3c; // <
73 static final int KET
= 0x3e; // >
74 static final int EQ
= 0x3d; // =
77 * HTML 4.01 boolean attributes
79 static final Map HTML_BOOLEAN_ATTRIBUTES
= new HashMap();
86 HTML_BOOLEAN_ATTRIBUTES
.put("area", set
);
90 HTML_BOOLEAN_ATTRIBUTES
.put("img", set
);
94 HTML_BOOLEAN_ATTRIBUTES
.put("object", set
);
98 HTML_BOOLEAN_ATTRIBUTES
.put("hr", set
);
102 HTML_BOOLEAN_ATTRIBUTES
.put("dl", set
);
103 HTML_BOOLEAN_ATTRIBUTES
.put("ol", set
);
104 HTML_BOOLEAN_ATTRIBUTES
.put("ul", set
);
105 HTML_BOOLEAN_ATTRIBUTES
.put("dir", set
);
106 HTML_BOOLEAN_ATTRIBUTES
.put("menu", set
);
113 HTML_BOOLEAN_ATTRIBUTES
.put("input", set
);
118 HTML_BOOLEAN_ATTRIBUTES
.put("select", set
);
122 HTML_BOOLEAN_ATTRIBUTES
.put("optgroup", set
);
127 HTML_BOOLEAN_ATTRIBUTES
.put("option", set
);
132 HTML_BOOLEAN_ATTRIBUTES
.put("textarea", set
);
136 HTML_BOOLEAN_ATTRIBUTES
.put("button", set
);
140 HTML_BOOLEAN_ATTRIBUTES
.put("th", set
);
141 HTML_BOOLEAN_ATTRIBUTES
.put("td", set
);
145 HTML_BOOLEAN_ATTRIBUTES
.put("frame", set
);
149 HTML_BOOLEAN_ATTRIBUTES
.put("script", set
);
152 protected final String encoding
;
153 final Charset charset
;
154 final CharsetEncoder encoder
;
156 final Map namespaces
;
157 protected String eol
;
158 Collection cdataSectionElements
= Collections
.EMPTY_SET
;
160 protected boolean discardDefaultContent
;
161 protected boolean xmlDeclaration
= true;
163 public StreamSerializer()
165 this(Stylesheet
.OUTPUT_XML
, null, null);
168 public StreamSerializer(String encoding
)
170 this(Stylesheet
.OUTPUT_XML
, encoding
, null);
173 public StreamSerializer(int mode
, String encoding
, String eol
)
176 if (encoding
== null)
180 this.encoding
= encoding
.intern();
181 charset
= Charset
.forName(this.encoding
);
182 encoder
= charset
.newEncoder();
183 this.eol
= (eol
!= null) ? eol
: System
.getProperty("line.separator");
184 namespaces
= new HashMap();
187 void setCdataSectionElements(Collection c
)
189 cdataSectionElements
= c
;
192 public void serialize(final Node node
, final OutputStream out
)
195 serialize(node
, out
, false);
198 void serialize(Node node
, final OutputStream out
,
199 boolean convertToCdata
)
204 Node next
= node
.getNextSibling();
205 doSerialize(node
, out
, convertToCdata
);
210 private void doSerialize(final Node node
, final OutputStream out
,
211 boolean convertToCdata
)
216 throw new NullPointerException("no output stream");
218 String value
, prefix
;
220 String uri
= node
.getNamespaceURI();
221 boolean defined
= false;
222 short nt
= node
.getNodeType();
223 if (convertToCdata
&& nt
== Node
.TEXT_NODE
)
225 nt
= Node
.CDATA_SECTION_NODE
;
229 case Node
.ATTRIBUTE_NODE
:
230 prefix
= node
.getPrefix();
231 if (XMLConstants
.XMLNS_ATTRIBUTE_NS_URI
.equals(uri
) ||
232 XMLConstants
.XMLNS_ATTRIBUTE
.equals(prefix
) ||
233 (prefix
!= null && prefix
.startsWith("xmlns:")))
235 String nsuri
= node
.getNodeValue();
236 if (isDefined(nsuri
))
240 String name
= node
.getLocalName();
243 name
= node
.getNodeName();
247 else if (uri
!= null && !isDefined(uri
))
249 prefix
= define(uri
, prefix
);
250 String nsname
= (prefix
== null) ?
"xmlns" : "xmlns:" + prefix
;
252 out
.write(encodeText(nsname
));
254 String nsvalue
= "'" + encode(uri
, true, true) + "'";
255 out
.write(nsvalue
.getBytes(encoding
));
259 String a_nodeName
= node
.getNodeName();
260 out
.write(encodeText(a_nodeName
));
261 String a_nodeValue
= node
.getNodeValue();
262 if (mode
== Stylesheet
.OUTPUT_HTML
&&
263 a_nodeName
.equals(a_nodeValue
) &&
264 isHTMLBoolean((Attr
) node
, a_nodeName
))
269 value
= "'" + encode(a_nodeValue
, true, true) + "'";
270 out
.write(encodeText(value
));
272 case Node
.ELEMENT_NODE
:
273 value
= node
.getNodeName();
275 out
.write(encodeText(value
));
276 if (uri
!= null && !isDefined(uri
))
278 prefix
= define(uri
, node
.getPrefix());
279 String nsname
= (prefix
== null) ?
"xmlns" : "xmlns:" + prefix
;
281 out
.write(encodeText(nsname
));
283 String nsvalue
= "'" + encode(uri
, true, true) + "'";
284 out
.write(encodeText(nsvalue
));
287 NamedNodeMap attrs
= node
.getAttributes();
290 int len
= attrs
.getLength();
291 for (int i
= 0; i
< len
; i
++)
293 Attr attr
= (Attr
) attrs
.item(i
);
294 if (discardDefaultContent
&& !attr
.getSpecified())
300 serialize(attr
, out
, false);
304 convertToCdata
= cdataSectionElements
.contains(value
);
305 children
= node
.getFirstChild();
306 if (children
== null)
314 serialize(children
, out
, convertToCdata
);
317 out
.write(encodeText(value
));
322 value
= node
.getNodeValue();
323 if (!"yes".equals(node
.getUserData("disable-output-escaping")))
325 value
= encode(value
, false, false);
327 out
.write(encodeText(value
));
329 case Node
.CDATA_SECTION_NODE
:
330 value
= "<![CDATA[" + node
.getNodeValue() + "]]>";
331 out
.write(encodeText(value
));
333 case Node
.COMMENT_NODE
:
334 value
= "<!--" + node
.getNodeValue() + "-->";
335 out
.write(encodeText(value
));
336 Node cp
= node
.getParentNode();
337 if (cp
!= null && cp
.getNodeType() == Node
.DOCUMENT_NODE
)
339 out
.write(encodeText(eol
));
342 case Node
.DOCUMENT_NODE
:
343 case Node
.DOCUMENT_FRAGMENT_NODE
:
344 if (mode
== Stylesheet
.OUTPUT_XML
)
346 if ("UTF-16".equalsIgnoreCase(encoding
))
351 if (!"yes".equals(node
.getUserData("omit-xml-declaration")) &&
354 Document doc
= (node
instanceof Document
) ?
355 (Document
) node
: null;
356 String version
= (doc
!= null) ? doc
.getXmlVersion() : null;
359 version
= (String
) node
.getUserData("version");
367 out
.write("xml version='".getBytes("US-ASCII"));
368 out
.write(version
.getBytes("US-ASCII"));
370 if (!("UTF-8".equalsIgnoreCase(encoding
)))
372 out
.write(" encoding='".getBytes("US-ASCII"));
373 out
.write(encoding
.getBytes("US-ASCII"));
376 if ((doc
!= null && doc
.getXmlStandalone()) ||
377 "yes".equals(node
.getUserData("standalone")))
379 out
.write(" standalone='yes'".getBytes("US-ASCII"));
383 out
.write(encodeText(eol
));
385 // TODO warn if not outputting the declaration would be a
388 else if (mode
== Stylesheet
.OUTPUT_HTML
)
390 // Ensure that encoding is accessible
391 String mediaType
= (String
) node
.getUserData("media-type");
392 if (mediaType
== null)
394 mediaType
= "text/html";
396 String contentType
= mediaType
+ "; charset=" +
397 ((encoding
.indexOf(' ') != -1) ?
398 "\"" + encoding
+ "\"" :
400 Document doc
= (node
instanceof Document
) ?
(Document
) node
:
401 node
.getOwnerDocument();
403 for (Node ctx
= node
.getFirstChild(); ctx
!= null;
404 ctx
= ctx
.getNextSibling())
406 if (ctx
.getNodeType() == Node
.ELEMENT_NODE
)
414 html
= doc
.createElement("html");
415 node
.appendChild(html
);
418 for (Node ctx
= html
.getFirstChild(); ctx
!= null;
419 ctx
= ctx
.getNextSibling())
421 if (ctx
.getNodeType() == Node
.ELEMENT_NODE
)
423 String name
= ctx
.getLocalName();
426 name
= ctx
.getNodeName();
428 if ("head".equalsIgnoreCase(name
))
437 head
= doc
.createElement("head");
439 for (Node ctx
= html
.getFirstChild(); ctx
!= null;
440 ctx
= ctx
.getNextSibling())
442 if (ctx
.getNodeType() == Node
.ELEMENT_NODE
)
450 html
.insertBefore(head
, c1
);
454 html
.appendChild(head
);
458 Node metaContent
= null;
459 for (Node ctx
= head
.getFirstChild(); ctx
!= null;
460 ctx
= ctx
.getNextSibling())
462 if (ctx
.getNodeType() == Node
.ELEMENT_NODE
)
464 String name
= ctx
.getLocalName();
467 name
= ctx
.getNodeName();
469 if ("meta".equalsIgnoreCase(name
))
471 NamedNodeMap metaAttrs
= ctx
.getAttributes();
472 int len
= metaAttrs
.getLength();
473 String httpEquiv
= null;
475 for (int i
= 0; i
< len
; i
++)
477 Node attr
= metaAttrs
.item(i
);
478 String attrName
= attr
.getNodeName();
479 if ("http-equiv".equalsIgnoreCase(attrName
))
481 httpEquiv
= attr
.getNodeValue();
483 else if ("content".equalsIgnoreCase(attrName
))
488 if ("Content-Type".equalsIgnoreCase(httpEquiv
))
491 metaContent
= content
;
499 meta
= doc
.createElement("meta");
501 Node first
= head
.getFirstChild();
504 head
.appendChild(meta
);
508 head
.insertBefore(meta
, first
);
510 Node metaHttpEquiv
= doc
.createAttribute("http-equiv");
511 meta
.getAttributes().setNamedItem(metaHttpEquiv
);
512 metaHttpEquiv
.setNodeValue("Content-Type");
514 if (metaContent
== null)
516 metaContent
= doc
.createAttribute("content");
517 meta
.getAttributes().setNamedItem(metaContent
);
519 metaContent
.setNodeValue(contentType
);
522 children
= node
.getFirstChild();
523 if (children
!= null)
525 serialize(children
, out
, convertToCdata
);
528 case Node
.DOCUMENT_TYPE_NODE
:
529 DocumentType doctype
= (DocumentType
) node
;
532 out
.write(encodeText("DOCTYPE "));
533 value
= doctype
.getNodeName();
534 out
.write(encodeText(value
));
535 String publicId
= doctype
.getPublicId();
536 if (publicId
!= null)
538 out
.write(encodeText(" PUBLIC "));
540 out
.write(encodeText(publicId
));
543 String systemId
= doctype
.getSystemId();
544 if (systemId
!= null)
546 out
.write(encodeText(" SYSTEM "));
548 out
.write(encodeText(systemId
));
551 String internalSubset
= doctype
.getInternalSubset();
552 if (internalSubset
!= null)
554 out
.write(encodeText(internalSubset
));
557 out
.write(eol
.getBytes(encoding
));
559 case Node
.ENTITY_REFERENCE_NODE
:
560 value
= "&" + node
.getNodeValue() + ";";
561 out
.write(encodeText(value
));
563 case Node
.PROCESSING_INSTRUCTION_NODE
:
564 value
= "<?" + node
.getNodeName() + " " + node
.getNodeValue() + "?>";
565 out
.write(encodeText(value
));
566 Node pp
= node
.getParentNode();
567 if (pp
!= null && pp
.getNodeType() == Node
.DOCUMENT_NODE
)
569 out
.write(encodeText(eol
));
579 boolean isDefined(String uri
)
581 return XMLConstants
.XML_NS_URI
.equals(uri
) ||
582 XMLConstants
.XMLNS_ATTRIBUTE_NS_URI
.equals(uri
) ||
583 namespaces
.containsKey(uri
);
586 String
define(String uri
, String prefix
)
588 while (namespaces
.containsValue(prefix
))
590 // Fabricate new prefix
591 prefix
= prefix
+ "_";
593 namespaces
.put(uri
, prefix
);
597 void undefine(String uri
)
599 namespaces
.remove(uri
);
602 final byte[] encodeText(String text
)
606 if (!encoder
.canEncode(text
))
608 // Check each character
609 StringBuffer buf
= new StringBuffer();
610 int len
= text
.length();
611 for (int i
= 0; i
< len
; i
++)
613 char c
= text
.charAt(i
);
614 if (encoder
.canEncode(c
))
620 // Replace with character entity reference
621 String hex
= Integer
.toHexString((int) c
);
627 text
= buf
.toString();
629 ByteBuffer encoded
= encoder
.encode(CharBuffer
.wrap(text
));
630 int len
= encoded
.limit() - encoded
.position();
631 if (encoded
.hasArray())
633 byte[] ret
= encoded
.array();
634 if (ret
.length
> len
)
637 byte[] ret2
= new byte[len
];
638 System
.arraycopy(ret
, 0, ret2
, 0, len
);
644 byte[] ret
= new byte[len
];
645 encoded
.get(ret
, 0, len
);
649 String
encode(String text
, boolean encodeCtl
, boolean inAttr
)
651 int len
= text
.length();
652 StringBuffer buf
= null;
653 for (int i
= 0; i
< len
; i
++)
655 char c
= text
.charAt(i
);
660 buf
= new StringBuffer(text
.substring(0, i
));
668 buf
= new StringBuffer(text
.substring(0, i
));
674 if (mode
== Stylesheet
.OUTPUT_HTML
&& (i
+ 1) < len
&&
675 text
.charAt(i
+ 1) == '{')
686 buf
= new StringBuffer(text
.substring(0, i
));
691 else if (c
== '\'' && inAttr
)
695 buf
= new StringBuffer(text
.substring(0, i
));
697 if (mode
== Stylesheet
.OUTPUT_HTML
)
698 // HTML does not define ', use character entity ref
699 buf
.append("'");
701 buf
.append("'");
703 else if (c
== '"' && inAttr
)
707 buf
= new StringBuffer(text
.substring(0, i
));
709 buf
.append(""");
717 buf
= new StringBuffer(text
.substring(0, i
));
724 else if (buf
!= null)
729 else if (buf
!= null)
734 return (buf
== null) ? text
: buf
.toString();
737 String
toString(Node node
)
739 ByteArrayOutputStream out
= new ByteArrayOutputStream();
742 serialize(node
, out
);
743 return new String(out
.toByteArray(), encoding
);
745 catch (IOException e
)
747 throw new RuntimeException(e
.getMessage());
751 boolean isHTMLBoolean(Attr attr
, String attrName
)
753 attrName
= attrName
.toLowerCase();
754 Node element
= attr
.getOwnerElement();
755 String elementName
= element
.getLocalName();
756 if (elementName
== null)
758 elementName
= element
.getNodeName();
760 elementName
= elementName
.toLowerCase();
761 Collection attributes
=
762 (Collection
) HTML_BOOLEAN_ATTRIBUTES
.get(elementName
);
763 return (attributes
!= null && attributes
.contains(attrName
));