net/filter/gzip_filter.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "net/filter/gzip_filter.h"
   6
   7 #include "base/logging.h"
   8 #include "net/filter/gzip_header.h"
   9 #include "third_party/zlib/zlib.h"
  10
  11 namespace net {
  12
  13 GZipFilter::GZipFilter(FilterType type)
  14     : Filter(type),
  15       decoding_status_(DECODING_UNINITIALIZED),
  16       decoding_mode_(DECODE_MODE_UNKNOWN),
  17       gzip_header_status_(GZIP_CHECK_HEADER_IN_PROGRESS),
  18       zlib_header_added_(false),
  19       gzip_footer_bytes_(0),
  20       possible_sdch_pass_through_(false) {
  21 }
  22
  23 GZipFilter::~GZipFilter() {
  24   if (decoding_status_ != DECODING_UNINITIALIZED) {
  25     inflateEnd(zlib_stream_.get());
  26   }
  27 }
  28
  29 bool GZipFilter::InitDecoding(Filter::FilterType filter_type) {
  30   if (decoding_status_ != DECODING_UNINITIALIZED)
  31     return false;
  32
  33   // Initialize zlib control block
  34   zlib_stream_.reset(new z_stream);
  35   if (!zlib_stream_.get())
  36     return false;
  37   memset(zlib_stream_.get(), 0, sizeof(z_stream));
  38
  39   // Set decoding mode
  40   switch (filter_type) {
  41     case Filter::FILTER_TYPE_DEFLATE: {
  42       if (inflateInit(zlib_stream_.get()) != Z_OK)
  43         return false;
  44       decoding_mode_ = DECODE_MODE_DEFLATE;
  45       break;
  46     }
  47     case Filter::FILTER_TYPE_GZIP_HELPING_SDCH:
  48       possible_sdch_pass_through_ =  true;  // Needed to optionally help sdch.
  49       // Fall through to GZIP case.
  50     case Filter::FILTER_TYPE_GZIP: {
  51       gzip_header_.reset(new GZipHeader());
  52       if (!gzip_header_.get())
  53         return false;
  54       if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK)
  55         return false;
  56       decoding_mode_ = DECODE_MODE_GZIP;
  57       break;
  58     }
  59     default: {
  60       return false;
  61     }
  62   }
  63
  64   decoding_status_ = DECODING_IN_PROGRESS;
  65   return true;
  66 }
  67
  68 Filter::FilterStatus GZipFilter::ReadFilteredData(char* dest_buffer,
  69                                                   int* dest_len) {
  70   if (!dest_buffer || !dest_len || *dest_len <= 0)
  71     return Filter::FILTER_ERROR;
  72
  73   if (decoding_status_ == DECODING_DONE) {
  74     if (GZIP_GET_INVALID_HEADER != gzip_header_status_)
  75       SkipGZipFooter();
  76     // Some server might send extra data after the gzip footer. We just copy
  77     // them out. Mozilla does this too.
  78     return CopyOut(dest_buffer, dest_len);
  79   }
  80
  81   if (decoding_status_ != DECODING_IN_PROGRESS)
  82     return Filter::FILTER_ERROR;
  83
  84   Filter::FilterStatus status;
  85
  86   if (decoding_mode_ == DECODE_MODE_GZIP &&
  87       gzip_header_status_ == GZIP_CHECK_HEADER_IN_PROGRESS) {
  88     // With gzip encoding the content is wrapped with a gzip header.
  89     // We need to parse and verify the header first.
  90     status = CheckGZipHeader();
  91     switch (status) {
  92       case Filter::FILTER_NEED_MORE_DATA: {
  93         // We have consumed all input data, either getting a complete header or
  94         // a partial header. Return now to get more data.
  95         *dest_len = 0;
  96         // Partial header means it can't be an SDCH header.
  97         // Reason: SDCH *always* starts with 8 printable characters [a-zA-Z/_].
  98         // Gzip always starts with two non-printable characters.  Hence even a
  99         // single character (partial header) means that this can't be an SDCH
 100         // encoded body masquerading as a GZIP body.
 101         possible_sdch_pass_through_ = false;
 102         return status;
 103       }
 104       case Filter::FILTER_OK: {
 105         // The header checking succeeds, and there are more data in the input.
 106         // We must have got a complete header here.
 107         DCHECK_EQ(gzip_header_status_, GZIP_GET_COMPLETE_HEADER);
 108         break;
 109       }
 110       case Filter::FILTER_ERROR: {
 111         if (possible_sdch_pass_through_ &&
 112             GZIP_GET_INVALID_HEADER == gzip_header_status_) {
 113           decoding_status_ = DECODING_DONE;  // Become a pass through filter.
 114           return CopyOut(dest_buffer, dest_len);
 115         }
 116         decoding_status_ = DECODING_ERROR;
 117         return status;
 118       }
 119       default: {
 120         status = Filter::FILTER_ERROR;    // Unexpected.
 121         decoding_status_ = DECODING_ERROR;
 122         return status;
 123       }
 124     }
 125   }
 126
 127   int dest_orig_size = *dest_len;
 128   status = DoInflate(dest_buffer, dest_len);
 129
 130   if (decoding_mode_ == DECODE_MODE_DEFLATE && status == Filter::FILTER_ERROR) {
 131     // As noted in Mozilla implementation, some servers such as Apache with
 132     // mod_deflate don't generate zlib headers.
 133     // See 677409 for instances where this work around is needed.
 134     // Insert a dummy zlib header and try again.
 135     if (InsertZlibHeader()) {
 136       *dest_len = dest_orig_size;
 137       status = DoInflate(dest_buffer, dest_len);
 138     }
 139   }
 140
 141   if (status == Filter::FILTER_DONE) {
 142     decoding_status_ = DECODING_DONE;
 143   } else if (status == Filter::FILTER_ERROR) {
 144     decoding_status_ = DECODING_ERROR;
 145   }
 146
 147   return status;
 148 }
 149
 150 Filter::FilterStatus GZipFilter::CheckGZipHeader() {
 151   DCHECK_EQ(gzip_header_status_, GZIP_CHECK_HEADER_IN_PROGRESS);
 152
 153   // Check input data in pre-filter buffer.
 154   if (!next_stream_data_ || stream_data_len_ <= 0)
 155     return Filter::FILTER_ERROR;
 156
 157   const char* header_end = NULL;
 158   GZipHeader::Status header_status;
 159   header_status = gzip_header_->ReadMore(next_stream_data_, stream_data_len_,
 160                                          &header_end);
 161
 162   switch (header_status) {
 163     case GZipHeader::INCOMPLETE_HEADER: {
 164       // We read all the data but only got a partial header.
 165       next_stream_data_ = NULL;
 166       stream_data_len_ = 0;
 167       return Filter::FILTER_NEED_MORE_DATA;
 168     }
 169     case GZipHeader::COMPLETE_HEADER: {
 170       // We have a complete header. Check whether there are more data.
 171       int num_chars_left = static_cast<int>(stream_data_len_ -
 172                                             (header_end - next_stream_data_));
 173       gzip_header_status_ = GZIP_GET_COMPLETE_HEADER;
 174
 175       if (num_chars_left > 0) {
 176         next_stream_data_ = const_cast<char*>(header_end);
 177         stream_data_len_ = num_chars_left;
 178         return Filter::FILTER_OK;
 179       } else {
 180         next_stream_data_ = NULL;
 181         stream_data_len_ = 0;
 182         return Filter::FILTER_NEED_MORE_DATA;
 183       }
 184     }
 185     case GZipHeader::INVALID_HEADER: {
 186       gzip_header_status_ = GZIP_GET_INVALID_HEADER;
 187       return Filter::FILTER_ERROR;
 188     }
 189     default: {
 190       break;
 191     }
 192   }
 193
 194   return Filter::FILTER_ERROR;
 195 }
 196
 197 Filter::FilterStatus GZipFilter::DoInflate(char* dest_buffer, int* dest_len) {
 198   // Make sure we have both valid input data and output buffer.
 199   if (!dest_buffer || !dest_len || *dest_len <= 0)  // output
 200     return Filter::FILTER_ERROR;
 201
 202   if (!next_stream_data_ || stream_data_len_ <= 0) {  // input
 203     *dest_len = 0;
 204     return Filter::FILTER_NEED_MORE_DATA;
 205   }
 206
 207   // Fill in zlib control block
 208   zlib_stream_.get()->next_in = bit_cast<Bytef*>(next_stream_data_);
 209   zlib_stream_.get()->avail_in = stream_data_len_;
 210   zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer);
 211   zlib_stream_.get()->avail_out = *dest_len;
 212
 213   int inflate_code = inflate(zlib_stream_.get(), Z_NO_FLUSH);
 214   int bytesWritten = *dest_len - zlib_stream_.get()->avail_out;
 215
 216   Filter::FilterStatus status;
 217
 218   switch (inflate_code) {
 219     case Z_STREAM_END: {
 220       *dest_len = bytesWritten;
 221
 222       stream_data_len_ = zlib_stream_.get()->avail_in;
 223       next_stream_data_ = bit_cast<char*>(zlib_stream_.get()->next_in);
 224
 225       SkipGZipFooter();
 226
 227       status = Filter::FILTER_DONE;
 228       break;
 229     }
 230     case Z_BUF_ERROR: {
 231       // According to zlib documentation, when calling inflate with Z_NO_FLUSH,
 232       // getting Z_BUF_ERROR means no progress is possible. Neither processing
 233       // more input nor producing more output can be done.
 234       // Since we have checked both input data and output buffer before calling
 235       // inflate, this result is unexpected.
 236       status = Filter::FILTER_ERROR;
 237       break;
 238     }
 239     case Z_OK: {
 240       // Some progress has been made (more input processed or more output
 241       // produced).
 242       *dest_len = bytesWritten;
 243
 244       // Check whether we have consumed all input data.
 245       stream_data_len_ = zlib_stream_.get()->avail_in;
 246       if (stream_data_len_ == 0) {
 247         next_stream_data_ = NULL;
 248         status = Filter::FILTER_NEED_MORE_DATA;
 249       } else {
 250         next_stream_data_ = bit_cast<char*>(zlib_stream_.get()->next_in);
 251         status = Filter::FILTER_OK;
 252       }
 253       break;
 254     }
 255     default: {
 256       status = Filter::FILTER_ERROR;
 257       break;
 258     }
 259   }
 260
 261   return status;
 262 }
 263
 264 bool GZipFilter::InsertZlibHeader() {
 265   static char dummy_head[2] = { 0x78, 0x1 };
 266
 267   char dummy_output[4];
 268
 269   // We only try add additional header once.
 270   if (zlib_header_added_)
 271     return false;
 272
 273   inflateReset(zlib_stream_.get());
 274   zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_head[0]);
 275   zlib_stream_.get()->avail_in = sizeof(dummy_head);
 276   zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);
 277   zlib_stream_.get()->avail_out = sizeof(dummy_output);
 278
 279   int code = inflate(zlib_stream_.get(), Z_NO_FLUSH);
 280   zlib_header_added_ = true;
 281
 282   return (code == Z_OK);
 283 }
 284
 285
 286 void GZipFilter::SkipGZipFooter() {
 287   int footer_bytes_expected = kGZipFooterSize - gzip_footer_bytes_;
 288   if (footer_bytes_expected > 0) {
 289     int footer_byte_avail = std::min(footer_bytes_expected, stream_data_len_);
 290     stream_data_len_ -= footer_byte_avail;
 291     next_stream_data_ += footer_byte_avail;
 292     gzip_footer_bytes_ += footer_byte_avail;
 293
 294     if (stream_data_len_ == 0)
 295       next_stream_data_ = NULL;
 296   }
 297 }
 298
 299 }  // namespace net