From dbcbe5d3b1e3eef64c21a3cf44027d4514b84f11 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 2 Sep 2009 22:15:12 -0700 Subject: [PATCH] TCP - Reduce context switching when handling large send*() or write()'s * Large sends or writes would cause sosend() to sleep and wakeup for every tcp segment acknowledged, resulting in massive inefficiencies. To deal with this, if no LOWAT is set we automatically set the LOWAT to 1/2 the send buffer size and adjust the wakeup code. --- sys/kern/uipc_socket.c | 2 ++ sys/kern/uipc_socket2.c | 32 ++++++++++++++++++++++++++++---- sys/sys/socketvar.h | 1 + 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index f6091f3fa1..e6f2ca2260 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1392,11 +1392,13 @@ sosetopt(struct socket *so, struct sockopt *sopt) so->so_snd.ssb_lowat = (optval > so->so_snd.ssb_hiwat) ? so->so_snd.ssb_hiwat : optval; + so->so_snd.ssb_flags &= ~SSB_AUTOLOWAT; break; case SO_RCVLOWAT: so->so_rcv.ssb_lowat = (optval > so->so_rcv.ssb_hiwat) ? so->so_rcv.ssb_hiwat : optval; + so->so_rcv.ssb_flags &= ~SSB_AUTOLOWAT; break; } break; diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 094e693b01..c2020b2ec4 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -259,8 +259,10 @@ sonewconn(struct socket *head, int connstatus) so->so_snd.ssb_lowat = head->so_snd.ssb_lowat; so->so_rcv.ssb_timeo = head->so_rcv.ssb_timeo; so->so_snd.ssb_timeo = head->so_snd.ssb_timeo; - so->so_rcv.ssb_flags |= head->so_rcv.ssb_flags & SSB_AUTOSIZE; - so->so_snd.ssb_flags |= head->so_snd.ssb_flags & SSB_AUTOSIZE; + so->so_rcv.ssb_flags |= head->so_rcv.ssb_flags & + (SSB_AUTOSIZE | SSB_AUTOLOWAT); + so->so_snd.ssb_flags |= head->so_snd.ssb_flags & + (SSB_AUTOSIZE | SSB_AUTOLOWAT); if (connstatus) { TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); so->so_state |= SS_COMP; @@ -312,6 +314,10 @@ socantrcvmore(struct socket *so) /* * Wakeup processes waiting on a socket buffer. Do asynchronous notification * via SIGIO if the socket has the SS_ASYNC flag set. + * + * For users waiting on send/recv try to avoid unnecessary context switch + * thrashing. Particularly for senders of large buffers (needs to be + * extended to sel and aio? XXX) */ void sowakeup(struct socket *so, struct signalsockbuf *ssb) @@ -321,8 +327,14 @@ sowakeup(struct socket *so, struct signalsockbuf *ssb) selwakeup(selinfo); ssb->ssb_flags &= ~SSB_SEL; if (ssb->ssb_flags & SSB_WAIT) { - ssb->ssb_flags &= ~SSB_WAIT; - wakeup((caddr_t)&ssb->ssb_cc); + if ((ssb == &so->so_snd && ssb_space(ssb) >= ssb->ssb_lowat) || + (ssb == &so->so_rcv && ssb->ssb_cc >= ssb->ssb_lowat) || + (ssb == &so->so_snd && (so->so_state & SS_CANTSENDMORE)) || + (ssb == &so->so_rcv && (so->so_state & SS_CANTRCVMORE)) + ) { + ssb->ssb_flags &= ~SSB_WAIT; + wakeup((caddr_t)&ssb->ssb_cc); + } } if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(so->so_sigio, SIGIO, 0); @@ -380,6 +392,8 @@ sowakeup(struct socket *so, struct signalsockbuf *ssb) int soreserve(struct socket *so, u_long sndcc, u_long rcvcc, struct rlimit *rl) { + if (so->so_snd.ssb_lowat == 0) + so->so_snd.ssb_flags |= SSB_AUTOLOWAT; if (ssb_reserve(&so->so_snd, sndcc, so, rl) == 0) goto bad; if (ssb_reserve(&so->so_rcv, rcvcc, so, rl) == 0) @@ -446,6 +460,16 @@ ssb_reserve(struct signalsockbuf *ssb, u_long cc, struct socket *so, ssb->ssb_mbmax = min(cc * sb_efficiency, sb_max); else ssb->ssb_mbmax = cc * sb_efficiency; + + /* + * AUTOLOWAT is set on send buffers and prevents large writes + * from generating a huge number of context switches. + */ + if (ssb->ssb_flags & SSB_AUTOLOWAT) { + ssb->ssb_lowat = ssb->ssb_hiwat / 2; + if (ssb->ssb_lowat < MCLBYTES) + ssb->ssb_lowat = MCLBYTES; + } if (ssb->ssb_lowat > ssb->ssb_hiwat) ssb->ssb_lowat = ssb->ssb_hiwat; return (1); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index aeb97afcbf..d7190bd6d9 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -85,6 +85,7 @@ struct signalsockbuf { #define SSB_MEVENT 0x200 /* need message event notification */ #define SSB_STOP 0x400 /* backpressure indicator */ #define SSB_AUTOSIZE 0x800 /* automatically size socket buffer */ +#define SSB_AUTOLOWAT 0x1000 /* automatically scale lowat */ /* * Per-socket kernel structure. Contains universal send and receive queues, -- 2.11.4.GIT