From ab4fd159815c3de3f32b1cf354a5529eb5cf6c86 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 12 Dec 2009 15:45:33 -0800 Subject: [PATCH] Lmgtwty::IO gets #write_binary and #write_utf8 methods --- lib/lmgtwty/io.rb | 197 +++++++++++++++++++++++++++++++++++++----------------- test/test_io.rb | 2 +- 2 files changed, 135 insertions(+), 64 deletions(-) rewrite lib/lmgtwty/io.rb (61%) diff --git a/lib/lmgtwty/io.rb b/lib/lmgtwty/io.rb dissimilarity index 61% index 843c2c1..cdfa93a 100644 --- a/lib/lmgtwty/io.rb +++ b/lib/lmgtwty/io.rb @@ -1,63 +1,134 @@ -# -*- encoding: binary -*- - -module Lmgtwty - - class IO < Struct.new(:to_io, :buf) - # most Web Socket messages from clients are expected to be small text - RD_SIZE = 128 - - # maximum size of a buffer we'll allow in memory - MAX_BUF_SIZE = 1024 * 16 - - # Web Sockets enforces UTF-8 when interfacing with the client - # Sockets always return strings of Encoding::Binary - ENC = defined?(Encoding::UTF_8) ? Encoding::UTF_8 : nil - - Z = "" - - def initialize(io, buf = Z.dup) - super - end - - # iterates through each message until a client the connection - # (raising EOFError) or the passed block breaks/returns - def each(&block) - yield gets while true - end - - def gets - rv = raw_gets # this is binary - # make sure client gets back UTF-8 - rv.force_encoding(ENC) unless ENC.nil? - rv - end - - def read - i = to_io - # read with no args for Revactor compat - i.respond_to?(:readpartial) ? i.readpartial(RD_SIZE) : i.read - end - - # retrieves the next record, underlying IO object will raise - # EOFError to indicate a closed connection if the client closes - def raw_gets - begin - buf.gsub!(/\A\x00(.*?)\xff/m, Z) and return $1 - raise ProtocolError, "buffer too large" if buf.size > MAX_BUF_SIZE - buf << read - end while true - end - - def write(buf) - unless ENC.nil? - # first make sure it's UTF-8 - buf = buf.to_s.encode(ENC) - - # then make sure it can be forced into a string containing \0 and \xff - buf.force_encoding(Encoding::BINARY) - end - to_io.write("\0#{buf}\xff") - end - - end -end +# -*- encoding: binary -*- + +module Lmgtwty + + class IO < Struct.new(:to_io, :buf) + # most Web Socket messages from clients are expected to be small text + RD_SIZE = 128 + + # maximum size of a UTF-8 buffer we'll allow in memory + MAX_UTF8_SIZE = 1024 * 16 + + # maximum size of a binary buffer we'll allow in memory + MAX_BIN_SIZE = 1024 * 112 + + # Web Sockets usually uses UTF-8 when interfacing with the client + # Ruby Sockets always return strings of Encoding::Binary + ENC = defined?(Encoding::UTF_8) ? Encoding::UTF_8 : nil + + Z = "" + + def initialize(io, buf = Z.dup) + super + end + + # iterates through each message until a client the connection is closed + def each(&block) + while str = gets + yield str + end + rescue EOFError + self # pretend to be like IO#each + end + + # retrieves the next record, returns nil if client closes connection + def gets + begin + unless buf.empty? + buf.gsub!(/\A\x00(.*?)\xff/m, Z) and return utf8!($1) + rv = read_binary and return rv + buf.size > MAX_UTF8_SIZE and + raise ProtocolError, "buffer too large #{buf.size}" + end + buf << read + rescue EOFError + return + end while true + end + + def write_utf8(buf) + to_io.write("\0#{buf}\xff") + end + + def write_binary(buf) + buf = binary(buf) + n = buf.size + length = [] + begin + length.unshift((n % 128) | 0x80) + end while (n /= 128) > 0 + length[-1] ^= 0x80 + + to_io.write("\x80#{length.pack("C*")}#{buf}") + end + + if ENC.nil? + def write(buf) + begin + buf.unpack("U*") + rescue ArgumentError + return write_binary(buf) + end + write_utf8(buf) + end + else + def write(buf) + buf.encoding == ENC ? write_utf8(buf) : write_binary(buf) + end + end + + # :stopdoc: + def read(size = nil) + i = to_io + # read with no args for Revactor compat + i.respond_to?(:readpartial) ? + i.readpartial(size.nil? ? RD_SIZE : size) : + i.read(size.nil? ? nil : size) + end + + if ENC.nil? + def utf8!(buf); buf; end + def binary(buf); buf; end + else + def utf8!(buf); buf.force_encoding(ENC); end + def binary(buf) + buf.encoding == Encoding::BINARY ? + buf : buf.dup.force_encoding(Encoding::BINARY) + end + end + + if Z.respond_to?(:ord) + def ord(byte_str); byte_str.ord; end + else + def ord(byte_str); byte_str; end + end + + def read_binary + (ord(buf[0]) & 0x80) == 0x80 or return + + # I bet my math is wrong *somewhere* in this method.. + + i = 1 + b = length = 0 + begin + buf << read while (b = buf[i]).nil? + b = ord(b) + length = length * 128 + (b & 0x7f) + i += 1 + end while (b & 0x80) != 0 + + length > MAX_BIN_SIZE and + raise ProtocolError, "chunk too large: #{length} bytes" + + to_read = length - buf.size + i + while to_read > 0 + buf << (tmp = read) + to_read -= tmp.size + end + rv = buf[i, length] + buf.replace(buf[i+length, buf.size]) + rv + end + + end +end diff --git a/test/test_io.rb b/test/test_io.rb index 891701d..0246921 100644 --- a/test/test_io.rb +++ b/test/test_io.rb @@ -22,7 +22,7 @@ class TestIO < Test::Unit::TestCase def test_gets_bad_alignment @raw.string << "\x00FOO" - assert_raises(EOFError) { @io.gets } + assert_nil @io.gets @raw.string << "\xff" rv = @io.gets assert_equal "FOO", rv -- 2.11.4.GIT