src/java/org/apache/poi/hssf/record/RecordInputStream.java

   1
   2 /* ====================================================================
   3    Licensed to the Apache Software Foundation (ASF) under one or more
   4    contributor license agreements.  See the NOTICE file distributed with
   5    this work for additional information regarding copyright ownership.
   6    The ASF licenses this file to You under the Apache License, Version 2.0
   7    (the "License"); you may not use this file except in compliance with
   8    the License.  You may obtain a copy of the License at
   9
  10        http://www.apache.org/licenses/LICENSE-2.0
  11
  12    Unless required by applicable law or agreed to in writing, software
  13    distributed under the License is distributed on an "AS IS" BASIS,
  14    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15    See the License for the specific language governing permissions and
  16    limitations under the License.
  17 ==================================================================== */
  18
  19
  20 package org.apache.poi.hssf.record;
  21
  22 import org.apache.poi.util.LittleEndian;
  23
  24 import java.io.IOException;
  25 import java.io.InputStream;
  26 import java.io.ByteArrayOutputStream;
  27
  28 /**
  29  * Title:  Record Input Stream<P>
  30  * Description:  Wraps a stream and provides helper methods for the construction of records.<P>
  31  *
  32  * @author Jason Height (jheight @ apache dot org)
  33  */
  34
  35 public class RecordInputStream extends InputStream
  36 {
  37   /** Maximum size of a single record (minus the 4 byte header) without a continue*/
  38   public final static short MAX_RECORD_DATA_SIZE = 8224;
  39
  40   private InputStream in;
  41   protected short currentSid;
  42   protected short currentLength = -1;
  43   protected short nextSid = -1;
  44
  45   protected byte[] data = new byte[MAX_RECORD_DATA_SIZE];
  46   protected short recordOffset;
  47   protected long pos;
  48
  49   private boolean autoContinue = true;
  50
  51   public RecordInputStream(InputStream in) throws RecordFormatException  {
  52     this.in = in;
  53     try {
  54       nextSid = LittleEndian.readShort(in);
  55       //Dont increment the pos just yet (technically we are at the start of
  56       //the record stream until nextRecord is called).
  57     } catch (IOException ex) {
  58       throw new RecordFormatException("Error reading bytes", ex);
  59     }
  60   }
  61
  62   /** This method will read a byte from the current record*/
  63   public int read() throws IOException {
  64     checkRecordPosition();
  65
  66     byte result = data[recordOffset];
  67     recordOffset += 1;
  68     pos += 1;
  69     return result;
  70   }
  71
  72   public short getSid() {
  73     return currentSid;
  74   }
  75
  76   public short getLength() {
  77     return currentLength;
  78   }
  79
  80   public short getRecordOffset() {
  81     return recordOffset;
  82   }
  83
  84   public long getPos() {
  85     return pos;
  86   }
  87
  88   public boolean hasNextRecord() {
  89     return (nextSid != 0);
  90   }
  91
  92   /** Moves to the next record in the stream.
  93    *
  94    * <i>Note: The auto continue flag is reset to true</i>
  95    */
  96
  97   public void nextRecord() throws RecordFormatException {
  98     if ((currentLength != -1) && (currentLength != recordOffset)) {
  99       System.out.println("WARN. Unread "+remaining()+" bytes of record 0x"+Integer.toHexString(currentSid));
 100     }
 101     currentSid = nextSid;
 102     pos += LittleEndian.SHORT_SIZE;
 103     autoContinue = true;
 104     try {
 105       recordOffset = 0;
 106       currentLength = LittleEndian.readShort(in);
 107       if (currentLength > MAX_RECORD_DATA_SIZE)
 108         throw new RecordFormatException("The content of an excel record cannot exceed "+MAX_RECORD_DATA_SIZE+" bytes");
 109       pos += LittleEndian.SHORT_SIZE;
 110       in.read(data, 0, currentLength);
 111
 112       //Read the Sid of the next record
 113       nextSid = LittleEndian.readShort(in);
 114     } catch (IOException ex) {
 115       throw new RecordFormatException("Error reading bytes", ex);
 116     }
 117   }
 118
 119   public void setAutoContinue(boolean enable) {
 120     this.autoContinue = enable;
 121   }
 122
 123   public boolean getAutoContinue() {
 124     return autoContinue;
 125   }
 126
 127   protected void checkRecordPosition() {
 128     if (remaining() <= 0) {
 129       if (isContinueNext() && autoContinue) {
 130         nextRecord();
 131       }
 132       else throw new ArrayIndexOutOfBoundsException();
 133     }
 134   }
 135
 136   /**
 137    * Reads an 8 bit, signed value
 138    */
 139   public byte readByte() {
 140     checkRecordPosition();
 141
 142     byte result = data[recordOffset];
 143     recordOffset += 1;
 144     pos += 1;
 145     return result;
 146   }
 147
 148   /**
 149    * Reads a 16 bit, signed value
 150    */
 151   public short readShort() {
 152     checkRecordPosition();
 153
 154     short result = LittleEndian.getShort(data, recordOffset);
 155     recordOffset += LittleEndian.SHORT_SIZE;
 156     pos += LittleEndian.SHORT_SIZE;
 157     return result;
 158   }
 159
 160   public int readInt() {
 161     checkRecordPosition();
 162
 163     int result = LittleEndian.getInt(data, recordOffset);
 164     recordOffset += LittleEndian.INT_SIZE;
 165     pos += LittleEndian.INT_SIZE;
 166     return result;
 167   }
 168
 169   public long readLong() {
 170     checkRecordPosition();
 171
 172     long result = LittleEndian.getLong(data, recordOffset);
 173     recordOffset += LittleEndian.LONG_SIZE;
 174     pos += LittleEndian.LONG_SIZE;
 175     return result;
 176   }
 177
 178   /**
 179    * Reads an 8 bit, unsigned value
 180    */
 181   public short readUByte() {
 182           short s = readByte();
 183           if(s < 0) {
 184                   s += 256;
 185           }
 186           return s;
 187   }
 188
 189   /**
 190    * Reads a 16 bit,un- signed value.
 191    * @return
 192    */
 193   public int readUShort() {
 194     checkRecordPosition();
 195
 196     int result = LittleEndian.getUShort(data, recordOffset);
 197     recordOffset += LittleEndian.SHORT_SIZE;
 198     pos += LittleEndian.SHORT_SIZE;
 199     return result;
 200   }
 201
 202   byte[] NAN_data = null;
 203   public double readDouble() {
 204     checkRecordPosition();
 205     //Reset NAN data
 206     NAN_data = null;
 207     double result = LittleEndian.getDouble(data, recordOffset);
 208     //Excel represents NAN in several ways, at this point in time we do not often
 209     //know the sequence of bytes, so as a hack we store the NAN byte sequence
 210     //so that it is not corrupted.
 211     if (Double.isNaN(result)) {
 212       NAN_data = new byte[8];
 213       System.arraycopy(data, recordOffset, NAN_data, 0, 8);
 214     }
 215
 216     recordOffset += LittleEndian.DOUBLE_SIZE;
 217     pos += LittleEndian.DOUBLE_SIZE;
 218     return result;
 219   }
 220
 221   public byte[] getNANData() {
 222     if (NAN_data == null)
 223       throw new RecordFormatException("Do NOT call getNANData without calling readDouble that returns NaN");
 224     return NAN_data;
 225   }
 226
 227   public short[] readShortArray() {
 228     checkRecordPosition();
 229
 230     short[] arr = LittleEndian.getShortArray(data, recordOffset);
 231     final int size = (2 * (arr.length +1));
 232     recordOffset += size;
 233     pos += size;
 234
 235     return arr;
 236   }
 237
 238   /**
 239    *  given a byte array of 16-bit unicode characters, compress to 8-bit and
 240    *  return a string
 241    *
 242    * { 0x16, 0x00 } -0x16
 243    *
 244    * @param length the length of the final string
 245    * @return                                     the converted string
 246    * @exception  IllegalArgumentException        if len is too large (i.e.,
 247    *      there is not enough data in string to create a String of that
 248    *      length)
 249    */
 250   public String readUnicodeLEString(int length) {
 251     if ((length < 0) || (((remaining() / 2) < length) && !isContinueNext())) {
 252             throw new IllegalArgumentException("Illegal length - asked for " + length + " but only " + (remaining()/2) + " left!");
 253     }
 254
 255     StringBuffer buf = new StringBuffer(length);
 256     for (int i=0;i<length;i++) {
 257       if ((remaining() == 0) && (isContinueNext())){
 258         nextRecord();
 259         int compressByte = readByte();
 260         if(compressByte != 1) throw new IllegalArgumentException("compressByte in continue records must be 1 while reading unicode LE string");
 261       }
 262       char ch = (char)readShort();
 263       buf.append(ch);
 264     }
 265     return buf.toString();
 266   }
 267
 268   public String readCompressedUnicode(int length) {
 269     if ((length < 0) || ((remaining() < length) && !isContinueNext())) {
 270             throw new IllegalArgumentException("Illegal length " + length);
 271     }
 272
 273     StringBuffer buf = new StringBuffer(length);
 274     for (int i=0;i<length;i++) {
 275       if ((remaining() == 0) && (isContinueNext())) {
 276         nextRecord();
 277           int compressByte = readByte();
 278           if(compressByte != 0) throw new IllegalArgumentException("compressByte in continue records must be 0 while reading compressed unicode");
 279       }
 280       byte b = readByte();
 281       //Typecast direct to char from byte with high bit set causes all ones
 282       //in the high byte of the char (which is of course incorrect)
 283       char ch = (char)( (short)0xff & (short)b );
 284       buf.append(ch);
 285     }
 286     return buf.toString();
 287   }
 288
 289   /** Returns an excel style unicode string from the bytes reminaing in the record.
 290    * <i>Note:</i> Unicode strings differ from <b>normal</b> strings due to the addition of
 291    * formatting information.
 292    *
 293    * @return The unicode string representation of the remaining bytes.
 294    */
 295   public UnicodeString readUnicodeString() {
 296     return new UnicodeString(this);
 297   }
 298
 299   /** Returns the remaining bytes for the current record.
 300    *
 301    * @return The remaining bytes of the current record.
 302    */
 303   public byte[] readRemainder() {
 304     int size = remaining();
 305     byte[] result = new byte[size];
 306     System.arraycopy(data, recordOffset, result, 0, size);
 307     recordOffset += size;
 308     pos += size;
 309     return result;
 310   }
 311
 312   /** Reads all byte data for the current record, including any
 313    *  that overlaps into any following continue records.
 314    *
 315    *  @deprecated Best to write a input stream that wraps this one where there is
 316    *  special sub record that may overlap continue records.
 317    */
 318   public byte[] readAllContinuedRemainder() {
 319     //Using a ByteArrayOutputStream is just an easy way to get a
 320     //growable array of the data.
 321     ByteArrayOutputStream out = new ByteArrayOutputStream(2*MAX_RECORD_DATA_SIZE);
 322
 323     while (isContinueNext()) {
 324       byte[] b = readRemainder();
 325       out.write(b, 0, b.length);
 326       nextRecord();
 327     }
 328     byte[] b = readRemainder();
 329     out.write(b, 0, b.length);
 330
 331     return out.toByteArray();
 332   }
 333
 334   /** The remaining number of bytes in the <i>current</i> record.
 335    *
 336    * @return The number of bytes remaining in the current record
 337    */
 338   public int remaining() {
 339     return (currentLength - recordOffset);
 340   }
 341
 342   /** Returns true iif a Continue record is next in the excel stream
 343    *
 344    * @return True when a ContinueRecord is next.
 345    */
 346   public boolean isContinueNext() {
 347     return (nextSid == ContinueRecord.sid);
 348   }
 349 }