From de15c5a51291cae19dfad26149f00b2b836edfb3 Mon Sep 17 00:00:00 2001 From: David Fifield Date: Mon, 2 Aug 2021 14:59:24 -0600 Subject: [PATCH] Performance tuning: MaxStreamBuffer, SetWindowSize, QueueSize. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This enlarges a few buffers and windows, with the goal of improving download performance. kcp's SetWindowSize controls the number of unacknowledged packets that are allowed. smux's MaxStreamBuffer is another kind of "receive window" that is advertised to the peer of how much we are willing to receive at once. The default MaxStreamBuffer is 64 KB, but kcptun overrides the default to 2 MB. turbotunnel's QueueSize is the size of internal buffers in QueuePacketConn and RemoteMap; empirically I found that the server would sometimes fill its outgoing buffer if SetWindowSize and QueueSize were equal, so I set QueueSize to be twice SetWindowSize. https://lists.torproject.org/pipermail/anti-censorship-team/2021-July/000178.html https://gitlab.torproject.org/tpo/anti-censorship/pluggable-transports/snowflake/-/merge_requests/48 The changes have a large effect on a direct -udp connection without a recursive resolver—which, however, is a discouraged configuration. Through a recursive resolver, the improvements are more modest. If I really crank up the buffer sizes, I can get surprisingly fast downloads over a direct -udp connection (over 1 MB/s), but a connection through a resolver doesn't keep getting faster and may even get slower. I want to avoid a bufferbloat situation with oversized buffers, too. I manually explored a small neighborhood of parameter values and picked some settings that looked reasonable. The tables below show the test results. The test is downloading 10 MiB between two servers with 100 ms RTT between them. Server: dnstt-server -udp :53 -privkey-file server.key t.example.com 127.0.0.1:9321 ncat -l -k -v 9321 --send-only --sh-exec 'dd bs=1M count=10 if=/dev/urandom' Client: dnstt-client -pubkey-file server.pub t.example.com 127.0.0.1:7000 ncat --recv-only 127.0.0.1 7000 | pv -t -r -a -b -i 0.2 > /dev/null I did the download under every treatment twice and recorded the download rate in KiB/s. "Server drops" comes from hacking some log messages to turbotunnel.QueuePacketConn to track how often the "Drop the incoming packet" (QueueIncoming method) and "Drop the outgoing packet" (WriteTo) cases happen. resolver method QueueSize MaxStreamBuffer SetWindowSize KiB/s KiB/s -------- ------ --------- --------------- ------------- ----- ----- direct udp 64 64*1024 (32, 32) 169 173 (status before this commit) dns.google udp 64 64*1024 (32, 32) 63.8 64.3 (status before this commit) dns.google doh 64 64*1024 (32, 32) 125 122 (status before this commit) resolver method QueueSize MaxStreamBuffer SetWindowSize KiB/s KiB/s -------- ------ --------- --------------- ------------- ----- ----- direct udp 64 1*1024*1024 (32, 32) 172 174 dns.google udp 64 1*1024*1024 (32, 32) 57.3 58.4 server drops dns.google doh 64 1*1024*1024 (32, 32) 128 128 resolver method QueueSize MaxStreamBuffer SetWindowSize KiB/s KiB/s -------- ------ --------- --------------- ------------- ----- ----- direct udp 64 1*1024*1024 (64, 64) 322 305 dns.google udp 64 1*1024*1024 (64, 64) 72.5 70.9 server drops dns.google doh 64 1*1024*1024 (64, 64) 136 139 server drops resolver method QueueSize MaxStreamBuffer SetWindowSize KiB/s KiB/s -------- ------ --------- --------------- ------------- ----- ----- direct udp 128 1*1024*1024 (64, 64) 321 325 (this commit) dns.google udp 128 1*1024*1024 (64, 64) 82.5 78.5 (this commit) dns.google doh 128 1*1024*1024 (64, 64) 129 131 (this commit) resolver method QueueSize MaxStreamBuffer SetWindowSize KiB/s KiB/s -------- ------ --------- --------------- ------------- ----- ----- direct udp 2048 4*1024*1024 (1024, 1024) 1240 1060 server drops dns.google udp 2048 4*1024*1024 (1024, 1024) 73.5 81.4 dns.google doh 2048 4*1024*1024 (1024, 1024) 115 129 --- dnstt-client/main.go | 2 ++ dnstt-server/main.go | 2 ++ turbotunnel/consts.go | 4 +++- turbotunnel/queuepacketconn.go | 2 +- turbotunnel/remotemap.go | 2 +- 5 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dnstt-client/main.go b/dnstt-client/main.go index a2e8f77..1b4e672 100644 --- a/dnstt-client/main.go +++ b/dnstt-client/main.go @@ -153,6 +153,7 @@ func run(pubkey []byte, domain dns.Name, localAddr *net.TCPAddr, remoteAddr net. 0, // default resend 1, // nc=1 => congestion window off ) + conn.SetWindowSize(turbotunnel.QueueSize/2, turbotunnel.QueueSize/2) if rc := conn.SetMtu(mtu); !rc { panic(rc) } @@ -167,6 +168,7 @@ func run(pubkey []byte, domain dns.Name, localAddr *net.TCPAddr, remoteAddr net. smuxConfig := smux.DefaultConfig() smuxConfig.Version = 2 smuxConfig.KeepAliveTimeout = idleTimeout + smuxConfig.MaxStreamBuffer = 1 * 1024 * 1024 // default is 65536 sess, err := smux.Client(rw, smuxConfig) if err != nil { return fmt.Errorf("opening smux session: %v", err) diff --git a/dnstt-server/main.go b/dnstt-server/main.go index 8771fef..35a997e 100644 --- a/dnstt-server/main.go +++ b/dnstt-server/main.go @@ -241,6 +241,7 @@ func acceptStreams(conn *kcp.UDPSession, privkey []byte, upstream string) error smuxConfig := smux.DefaultConfig() smuxConfig.Version = 2 smuxConfig.KeepAliveTimeout = idleTimeout + smuxConfig.MaxStreamBuffer = 1 * 1024 * 1024 // default is 65536 sess, err := smux.Server(rw, smuxConfig) if err != nil { return err @@ -291,6 +292,7 @@ func acceptSessions(ln *kcp.Listener, privkey []byte, mtu int, upstream string) 0, // default resend 1, // nc=1 => congestion window off ) + conn.SetWindowSize(turbotunnel.QueueSize/2, turbotunnel.QueueSize/2) if rc := conn.SetMtu(mtu); !rc { panic(rc) } diff --git a/turbotunnel/consts.go b/turbotunnel/consts.go index db54589..5684bf7 100644 --- a/turbotunnel/consts.go +++ b/turbotunnel/consts.go @@ -6,7 +6,9 @@ package turbotunnel import "errors" -const queueSize = 64 +// QueueSize is the size of send and receive queues in QueuePacketConn and +// RemoteMap. +const QueueSize = 128 var errClosedPacketConn = errors.New("operation on closed connection") var errNotImplemented = errors.New("not implemented") diff --git a/turbotunnel/queuepacketconn.go b/turbotunnel/queuepacketconn.go index eb4df4b..6571de0 100644 --- a/turbotunnel/queuepacketconn.go +++ b/turbotunnel/queuepacketconn.go @@ -48,7 +48,7 @@ func NewQueuePacketConn(localAddr net.Addr, timeout time.Duration) *QueuePacketC return &QueuePacketConn{ remotes: NewRemoteMap(timeout), localAddr: localAddr, - recvQueue: make(chan taggedPacket, queueSize), + recvQueue: make(chan taggedPacket, QueueSize), closed: make(chan struct{}), } } diff --git a/turbotunnel/remotemap.go b/turbotunnel/remotemap.go index c679bfa..a3238e9 100644 --- a/turbotunnel/remotemap.go +++ b/turbotunnel/remotemap.go @@ -127,7 +127,7 @@ func (inner *remoteMapInner) Lookup(addr net.Addr, now time.Time) *remoteRecord record = &remoteRecord{ Addr: addr, LastSeen: now, - SendQueue: make(chan []byte, queueSize), + SendQueue: make(chan []byte, QueueSize), Stash: make(chan []byte, 1), } heap.Push(inner, record) -- 2.11.4.GIT