Remove old autovect-branch by moving to "dead" directory.
[official-gcc.git] / old-autovect-branch / libjava / classpath / gnu / xml / transform / StreamSerializer.java
blob74b10057c122a8e9f83e430d130236f6fd25438c
1 /* StreamSerializer.java --
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
38 package gnu.xml.transform;
40 import java.io.ByteArrayOutputStream;
41 import java.io.IOException;
42 import java.io.OutputStream;
43 import java.io.UnsupportedEncodingException;
44 import java.nio.ByteBuffer;
45 import java.nio.CharBuffer;
46 import java.nio.charset.Charset;
47 import java.nio.charset.CharsetEncoder;
48 import java.util.Collection;
49 import java.util.Collections;
50 import java.util.HashMap;
51 import java.util.HashSet;
52 import java.util.Map;
53 import javax.xml.XMLConstants;
54 import org.w3c.dom.Attr;
55 import org.w3c.dom.Document;
56 import org.w3c.dom.DocumentType;
57 import org.w3c.dom.NamedNodeMap;
58 import org.w3c.dom.Node;
60 /**
61 * Serializes a DOM node to an output stream.
63 * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
65 public class StreamSerializer
68 static final int SPACE = 0x20;
69 static final int BANG = 0x21; // !
70 static final int APOS = 0x27; // '
71 static final int SLASH = 0x2f; // /
72 static final int BRA = 0x3c; // <
73 static final int KET = 0x3e; // >
74 static final int EQ = 0x3d; // =
76 /**
77 * HTML 4.01 boolean attributes
79 static final Map HTML_BOOLEAN_ATTRIBUTES = new HashMap();
80 static
82 HashSet set;
84 set = new HashSet();
85 set.add("nohref");
86 HTML_BOOLEAN_ATTRIBUTES.put("area", set);
88 set = new HashSet();
89 set.add("ismap");
90 HTML_BOOLEAN_ATTRIBUTES.put("img", set);
92 set = new HashSet();
93 set.add("declare");
94 HTML_BOOLEAN_ATTRIBUTES.put("object", set);
96 set = new HashSet();
97 set.add("noshade");
98 HTML_BOOLEAN_ATTRIBUTES.put("hr", set);
100 set = new HashSet();
101 set.add("compact");
102 HTML_BOOLEAN_ATTRIBUTES.put("dl", set);
103 HTML_BOOLEAN_ATTRIBUTES.put("ol", set);
104 HTML_BOOLEAN_ATTRIBUTES.put("ul", set);
105 HTML_BOOLEAN_ATTRIBUTES.put("dir", set);
106 HTML_BOOLEAN_ATTRIBUTES.put("menu", set);
108 set = new HashSet();
109 set.add("checked");
110 set.add("disabled");
111 set.add("readonly");
112 set.add("ismap");
113 HTML_BOOLEAN_ATTRIBUTES.put("input", set);
115 set = new HashSet();
116 set.add("multiple");
117 set.add("disabled");
118 HTML_BOOLEAN_ATTRIBUTES.put("select", set);
120 set = new HashSet();
121 set.add("disabled");
122 HTML_BOOLEAN_ATTRIBUTES.put("optgroup", set);
124 set = new HashSet();
125 set.add("selected");
126 set.add("disabled");
127 HTML_BOOLEAN_ATTRIBUTES.put("option", set);
129 set = new HashSet();
130 set.add("disabled");
131 set.add("readonly");
132 HTML_BOOLEAN_ATTRIBUTES.put("textarea", set);
134 set = new HashSet();
135 set.add("disabled");
136 HTML_BOOLEAN_ATTRIBUTES.put("button", set);
138 set = new HashSet();
139 set.add("nowrap");
140 HTML_BOOLEAN_ATTRIBUTES.put("th", set);
141 HTML_BOOLEAN_ATTRIBUTES.put("td", set);
143 set = new HashSet();
144 set.add("noresize");
145 HTML_BOOLEAN_ATTRIBUTES.put("frame", set);
147 set = new HashSet();
148 set.add("defer");
149 HTML_BOOLEAN_ATTRIBUTES.put("script", set);
152 protected final String encoding;
153 final Charset charset;
154 final CharsetEncoder encoder;
155 final int mode;
156 final Map namespaces;
157 protected String eol;
158 Collection cdataSectionElements = Collections.EMPTY_SET;
160 protected boolean discardDefaultContent;
161 protected boolean xmlDeclaration = true;
163 public StreamSerializer()
165 this(Stylesheet.OUTPUT_XML, null, null);
168 public StreamSerializer(String encoding)
170 this(Stylesheet.OUTPUT_XML, encoding, null);
173 public StreamSerializer(int mode, String encoding, String eol)
175 this.mode = mode;
176 if (encoding == null)
178 encoding = "UTF-8";
180 this.encoding = encoding.intern();
181 charset = Charset.forName(this.encoding);
182 encoder = charset.newEncoder();
183 this.eol = (eol != null) ? eol : System.getProperty("line.separator");
184 namespaces = new HashMap();
187 void setCdataSectionElements(Collection c)
189 cdataSectionElements = c;
192 public void serialize(final Node node, final OutputStream out)
193 throws IOException
195 serialize(node, out, false);
198 void serialize(Node node, final OutputStream out,
199 boolean convertToCdata)
200 throws IOException
202 while (node != null)
204 Node next = node.getNextSibling();
205 doSerialize(node, out, convertToCdata);
206 node = next;
210 private void doSerialize(final Node node, final OutputStream out,
211 boolean convertToCdata)
212 throws IOException
214 if (out == null)
216 throw new NullPointerException("no output stream");
218 String value, prefix;
219 Node children;
220 String uri = node.getNamespaceURI();
221 boolean defined = false;
222 short nt = node.getNodeType();
223 if (convertToCdata && nt == Node.TEXT_NODE)
225 nt = Node.CDATA_SECTION_NODE;
227 switch (nt)
229 case Node.ATTRIBUTE_NODE:
230 prefix = node.getPrefix();
231 if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri) ||
232 XMLConstants.XMLNS_ATTRIBUTE.equals(prefix) ||
233 (prefix != null && prefix.startsWith("xmlns:")))
235 String nsuri = node.getNodeValue();
236 if (isDefined(nsuri))
238 break;
240 String name = node.getLocalName();
241 if (name == null)
243 name = node.getNodeName();
245 define(nsuri, name);
247 else if (uri != null && !isDefined(uri))
249 prefix = define(uri, prefix);
250 String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix;
251 out.write(SPACE);
252 out.write(encodeText(nsname));
253 out.write(EQ);
254 String nsvalue = "'" + encode(uri, true, true) + "'";
255 out.write(nsvalue.getBytes(encoding));
256 defined = true;
258 out.write(SPACE);
259 String a_nodeName = node.getNodeName();
260 out.write(encodeText(a_nodeName));
261 String a_nodeValue = node.getNodeValue();
262 if (mode == Stylesheet.OUTPUT_HTML &&
263 a_nodeName.equals(a_nodeValue) &&
264 isHTMLBoolean((Attr) node, a_nodeName))
266 break;
268 out.write(EQ);
269 value = "'" + encode(a_nodeValue, true, true) + "'";
270 out.write(encodeText(value));
271 break;
272 case Node.ELEMENT_NODE:
273 value = node.getNodeName();
274 out.write(BRA);
275 out.write(encodeText(value));
276 if (uri != null && !isDefined(uri))
278 prefix = define(uri, node.getPrefix());
279 String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix;
280 out.write(SPACE);
281 out.write(encodeText(nsname));
282 out.write(EQ);
283 String nsvalue = "'" + encode(uri, true, true) + "'";
284 out.write(encodeText(nsvalue));
285 defined = true;
287 NamedNodeMap attrs = node.getAttributes();
288 if (attrs != null)
290 int len = attrs.getLength();
291 for (int i = 0; i < len; i++)
293 Attr attr = (Attr) attrs.item(i);
294 if (discardDefaultContent && !attr.getSpecified())
296 // NOOP
298 else
300 serialize(attr, out, false);
304 convertToCdata = cdataSectionElements.contains(value);
305 children = node.getFirstChild();
306 if (children == null)
308 out.write(SLASH);
309 out.write(KET);
311 else
313 out.write(KET);
314 serialize(children, out, convertToCdata);
315 out.write(BRA);
316 out.write(SLASH);
317 out.write(encodeText(value));
318 out.write(KET);
320 break;
321 case Node.TEXT_NODE:
322 value = node.getNodeValue();
323 if (!"yes".equals(node.getUserData("disable-output-escaping")))
325 value = encode(value, false, false);
327 out.write(encodeText(value));
328 break;
329 case Node.CDATA_SECTION_NODE:
330 value = "<![CDATA[" + node.getNodeValue() + "]]>";
331 out.write(encodeText(value));
332 break;
333 case Node.COMMENT_NODE:
334 value = "<!--" + node.getNodeValue() + "-->";
335 out.write(encodeText(value));
336 Node cp = node.getParentNode();
337 if (cp != null && cp.getNodeType() == Node.DOCUMENT_NODE)
339 out.write(encodeText(eol));
341 break;
342 case Node.DOCUMENT_NODE:
343 case Node.DOCUMENT_FRAGMENT_NODE:
344 if (mode == Stylesheet.OUTPUT_XML)
346 if ("UTF-16".equalsIgnoreCase(encoding))
348 out.write(0xfe);
349 out.write(0xff);
351 if (!"yes".equals(node.getUserData("omit-xml-declaration")) &&
352 xmlDeclaration)
354 Document doc = (node instanceof Document) ?
355 (Document) node : null;
356 String version = (doc != null) ? doc.getXmlVersion() : null;
357 if (version == null)
359 version = (String) node.getUserData("version");
361 if (version == null)
363 version = "1.0";
365 out.write(BRA);
366 out.write(0x3f);
367 out.write("xml version='".getBytes("US-ASCII"));
368 out.write(version.getBytes("US-ASCII"));
369 out.write(APOS);
370 if (!("UTF-8".equalsIgnoreCase(encoding)))
372 out.write(" encoding='".getBytes("US-ASCII"));
373 out.write(encoding.getBytes("US-ASCII"));
374 out.write(APOS);
376 if ((doc != null && doc.getXmlStandalone()) ||
377 "yes".equals(node.getUserData("standalone")))
379 out.write(" standalone='yes'".getBytes("US-ASCII"));
381 out.write(0x3f);
382 out.write(KET);
383 out.write(encodeText(eol));
385 // TODO warn if not outputting the declaration would be a
386 // problem
388 else if (mode == Stylesheet.OUTPUT_HTML)
390 // Ensure that encoding is accessible
391 String mediaType = (String) node.getUserData("media-type");
392 if (mediaType == null)
394 mediaType = "text/html";
396 String contentType = mediaType + "; charset=" +
397 ((encoding.indexOf(' ') != -1) ?
398 "\"" + encoding + "\"" :
399 encoding);
400 Document doc = (node instanceof Document) ? (Document) node :
401 node.getOwnerDocument();
402 Node html = null;
403 for (Node ctx = node.getFirstChild(); ctx != null;
404 ctx = ctx.getNextSibling())
406 if (ctx.getNodeType() == Node.ELEMENT_NODE)
408 html = ctx;
409 break;
412 if (html == null)
414 html = doc.createElement("html");
415 node.appendChild(html);
417 Node head = null;
418 for (Node ctx = html.getFirstChild(); ctx != null;
419 ctx = ctx.getNextSibling())
421 if (ctx.getNodeType() == Node.ELEMENT_NODE)
423 String name = ctx.getLocalName();
424 if (name == null)
426 name = ctx.getNodeName();
428 if ("head".equalsIgnoreCase(name))
430 head = ctx;
431 break;
435 if (head == null)
437 head = doc.createElement("head");
438 Node c1 = null;
439 for (Node ctx = html.getFirstChild(); ctx != null;
440 ctx = ctx.getNextSibling())
442 if (ctx.getNodeType() == Node.ELEMENT_NODE)
444 c1 = ctx;
445 break;
448 if (c1 != null)
450 html.insertBefore(head, c1);
452 else
454 html.appendChild(head);
457 Node meta = null;
458 Node metaContent = null;
459 for (Node ctx = head.getFirstChild(); ctx != null;
460 ctx = ctx.getNextSibling())
462 if (ctx.getNodeType() == Node.ELEMENT_NODE)
464 String name = ctx.getLocalName();
465 if (name == null)
467 name = ctx.getNodeName();
469 if ("meta".equalsIgnoreCase(name))
471 NamedNodeMap metaAttrs = ctx.getAttributes();
472 int len = metaAttrs.getLength();
473 String httpEquiv = null;
474 Node content = null;
475 for (int i = 0; i < len; i++)
477 Node attr = metaAttrs.item(i);
478 String attrName = attr.getNodeName();
479 if ("http-equiv".equalsIgnoreCase(attrName))
481 httpEquiv = attr.getNodeValue();
483 else if ("content".equalsIgnoreCase(attrName))
485 content = attr;
488 if ("Content-Type".equalsIgnoreCase(httpEquiv))
490 meta = ctx;
491 metaContent = content;
492 break;
497 if (meta == null)
499 meta = doc.createElement("meta");
500 // Insert first
501 Node first = head.getFirstChild();
502 if (first == null)
504 head.appendChild(meta);
506 else
508 head.insertBefore(meta, first);
510 Node metaHttpEquiv = doc.createAttribute("http-equiv");
511 meta.getAttributes().setNamedItem(metaHttpEquiv);
512 metaHttpEquiv.setNodeValue("Content-Type");
514 if (metaContent == null)
516 metaContent = doc.createAttribute("content");
517 meta.getAttributes().setNamedItem(metaContent);
519 metaContent.setNodeValue(contentType);
520 // phew
522 children = node.getFirstChild();
523 if (children != null)
525 serialize(children, out, convertToCdata);
527 break;
528 case Node.DOCUMENT_TYPE_NODE:
529 DocumentType doctype = (DocumentType) node;
530 out.write(BRA);
531 out.write(BANG);
532 out.write(encodeText("DOCTYPE "));
533 value = doctype.getNodeName();
534 out.write(encodeText(value));
535 String publicId = doctype.getPublicId();
536 if (publicId != null)
538 out.write(encodeText(" PUBLIC "));
539 out.write(APOS);
540 out.write(encodeText(publicId));
541 out.write(APOS);
543 String systemId = doctype.getSystemId();
544 if (systemId != null)
546 out.write(encodeText(" SYSTEM "));
547 out.write(APOS);
548 out.write(encodeText(systemId));
549 out.write(APOS);
551 String internalSubset = doctype.getInternalSubset();
552 if (internalSubset != null)
554 out.write(encodeText(internalSubset));
556 out.write(KET);
557 out.write(eol.getBytes(encoding));
558 break;
559 case Node.ENTITY_REFERENCE_NODE:
560 value = "&" + node.getNodeValue() + ";";
561 out.write(encodeText(value));
562 break;
563 case Node.PROCESSING_INSTRUCTION_NODE:
564 value = "<?" + node.getNodeName() + " " + node.getNodeValue() + "?>";
565 out.write(encodeText(value));
566 Node pp = node.getParentNode();
567 if (pp != null && pp.getNodeType() == Node.DOCUMENT_NODE)
569 out.write(encodeText(eol));
571 break;
573 if (defined)
575 undefine(uri);
579 boolean isDefined(String uri)
581 return XMLConstants.XML_NS_URI.equals(uri) ||
582 XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri) ||
583 namespaces.containsKey(uri);
586 String define(String uri, String prefix)
588 while (namespaces.containsValue(prefix))
590 // Fabricate new prefix
591 prefix = prefix + "_";
593 namespaces.put(uri, prefix);
594 return prefix;
597 void undefine(String uri)
599 namespaces.remove(uri);
602 final byte[] encodeText(String text)
603 throws IOException
605 encoder.reset();
606 if (!encoder.canEncode(text))
608 // Check each character
609 StringBuffer buf = new StringBuffer();
610 int len = text.length();
611 for (int i = 0; i < len; i++)
613 char c = text.charAt(i);
614 if (encoder.canEncode(c))
616 buf.append(c);
618 else
620 // Replace with character entity reference
621 String hex = Integer.toHexString((int) c);
622 buf.append("&#x");
623 buf.append(hex);
624 buf.append(';');
627 text = buf.toString();
629 ByteBuffer encoded = encoder.encode(CharBuffer.wrap(text));
630 int len = encoded.limit() - encoded.position();
631 if (encoded.hasArray())
633 byte[] ret = encoded.array();
634 if (ret.length > len)
636 // Why?
637 byte[] ret2 = new byte[len];
638 System.arraycopy(ret, 0, ret2, 0, len);
639 ret = ret2;
641 return ret;
643 encoded.flip();
644 byte[] ret = new byte[len];
645 encoded.get(ret, 0, len);
646 return ret;
649 String encode(String text, boolean encodeCtl, boolean inAttr)
651 int len = text.length();
652 StringBuffer buf = null;
653 for (int i = 0; i < len; i++)
655 char c = text.charAt(i);
656 if (c == '<')
658 if (buf == null)
660 buf = new StringBuffer(text.substring(0, i));
662 buf.append("&lt;");
664 else if (c == '>')
666 if (buf == null)
668 buf = new StringBuffer(text.substring(0, i));
670 buf.append("&gt;");
672 else if (c == '&')
674 if (mode == Stylesheet.OUTPUT_HTML && (i + 1) < len &&
675 text.charAt(i + 1) == '{')
677 if (buf != null)
679 buf.append(c);
682 else
684 if (buf == null)
686 buf = new StringBuffer(text.substring(0, i));
688 buf.append("&amp;");
691 else if (c == '\'' && inAttr)
693 if (buf == null)
695 buf = new StringBuffer(text.substring(0, i));
697 if (mode == Stylesheet.OUTPUT_HTML)
698 // HTML does not define &apos;, use character entity ref
699 buf.append("&#x27;");
700 else
701 buf.append("&apos;");
703 else if (c == '"' && inAttr)
705 if (buf == null)
707 buf = new StringBuffer(text.substring(0, i));
709 buf.append("&quot;");
711 else if (encodeCtl)
713 if (c < 0x20)
715 if (buf == null)
717 buf = new StringBuffer(text.substring(0, i));
719 buf.append('&');
720 buf.append('#');
721 buf.append((int) c);
722 buf.append(';');
724 else if (buf != null)
726 buf.append(c);
729 else if (buf != null)
731 buf.append(c);
734 return (buf == null) ? text : buf.toString();
737 String toString(Node node)
739 ByteArrayOutputStream out = new ByteArrayOutputStream();
742 serialize(node, out);
743 return new String(out.toByteArray(), encoding);
745 catch (IOException e)
747 throw new RuntimeException(e.getMessage());
751 boolean isHTMLBoolean(Attr attr, String attrName)
753 attrName = attrName.toLowerCase();
754 Node element = attr.getOwnerElement();
755 String elementName = element.getLocalName();
756 if (elementName == null)
758 elementName = element.getNodeName();
760 elementName = elementName.toLowerCase();
761 Collection attributes =
762 (Collection) HTML_BOOLEAN_ATTRIBUTES.get(elementName);
763 return (attributes != null && attributes.contains(attrName));