2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include "opt_param.h"
37 #include <sys/param.h>
38 #include <sys/aio.h> /* for aio_swake proto */
39 #include <sys/kernel.h>
42 #include <sys/mutex.h>
44 #include <sys/protosw.h>
45 #include <sys/resourcevar.h>
46 #include <sys/signalvar.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
50 #include <sys/sysctl.h>
53 * Function pointer set by the AIO routines so that the socket buffer code
54 * can call back into the AIO module if it is loaded.
56 void (*aio_swake
)(struct socket
*, struct sockbuf
*);
59 * Primitive routines for operating on socket buffers
62 u_long sb_max
= SB_MAX
;
64 SB_MAX
* MCLBYTES
/ (MSIZE
+ MCLBYTES
); /* adjusted sb_max */
66 static u_long sb_efficiency
= 8; /* parameter for sbreserve() */
68 static void sbdrop_internal(struct sockbuf
*sb
, int len
);
69 static void sbflush_internal(struct sockbuf
*sb
);
72 * Socantsendmore indicates that no more data will be sent on the socket; it
73 * would normally be applied to a socket when the user informs the system
74 * that no more data is to be sent, by the protocol code (in case
75 * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be
76 * received, and will normally be applied to the socket by a protocol when it
77 * detects that the peer will send no more data. Data queued for reading in
78 * the socket may yet be read.
81 socantsendmore_locked(struct socket
*so
)
84 SOCKBUF_LOCK_ASSERT(&so
->so_snd
);
86 so
->so_snd
.sb_state
|= SBS_CANTSENDMORE
;
88 mtx_assert(SOCKBUF_MTX(&so
->so_snd
), MA_NOTOWNED
);
92 socantsendmore(struct socket
*so
)
95 SOCKBUF_LOCK(&so
->so_snd
);
96 socantsendmore_locked(so
);
97 mtx_assert(SOCKBUF_MTX(&so
->so_snd
), MA_NOTOWNED
);
101 socantrcvmore_locked(struct socket
*so
)
104 SOCKBUF_LOCK_ASSERT(&so
->so_rcv
);
106 so
->so_rcv
.sb_state
|= SBS_CANTRCVMORE
;
107 sorwakeup_locked(so
);
108 mtx_assert(SOCKBUF_MTX(&so
->so_rcv
), MA_NOTOWNED
);
112 socantrcvmore(struct socket
*so
)
115 SOCKBUF_LOCK(&so
->so_rcv
);
116 socantrcvmore_locked(so
);
117 mtx_assert(SOCKBUF_MTX(&so
->so_rcv
), MA_NOTOWNED
);
121 * Wait for data to arrive at/drain from a socket buffer.
124 sbwait(struct sockbuf
*sb
)
127 SOCKBUF_LOCK_ASSERT(sb
);
129 sb
->sb_flags
|= SB_WAIT
;
130 return (msleep(&sb
->sb_cc
, &sb
->sb_mtx
,
131 (sb
->sb_flags
& SB_NOINTR
) ? PSOCK
: PSOCK
| PCATCH
, "sbwait",
136 sblock(struct sockbuf
*sb
, int flags
)
139 KASSERT((flags
& SBL_VALID
) == flags
,
140 ("sblock: flags invalid (0x%x)", flags
));
142 if (flags
& SBL_WAIT
) {
143 if ((sb
->sb_flags
& SB_NOINTR
) ||
144 (flags
& SBL_NOINTR
)) {
145 sx_xlock(&sb
->sb_sx
);
148 return (sx_xlock_sig(&sb
->sb_sx
));
150 if (sx_try_xlock(&sb
->sb_sx
) == 0)
151 return (EWOULDBLOCK
);
157 sbunlock(struct sockbuf
*sb
)
160 sx_xunlock(&sb
->sb_sx
);
164 * Wakeup processes waiting on a socket buffer. Do asynchronous notification
165 * via SIGIO if the socket has the SS_ASYNC flag set.
167 * Called with the socket buffer lock held; will release the lock by the end
168 * of the function. This allows the caller to acquire the socket buffer lock
169 * while testing for the need for various sorts of wakeup and hold it through
170 * to the point where it's no longer required. We currently hold the lock
171 * through calls out to other subsystems (with the exception of kqueue), and
172 * then release it to avoid lock order issues. It's not clear that's
176 sowakeup(struct socket
*so
, struct sockbuf
*sb
)
179 SOCKBUF_LOCK_ASSERT(sb
);
181 selwakeuppri(&sb
->sb_sel
, PSOCK
);
182 if (!SEL_WAITING(&sb
->sb_sel
))
183 sb
->sb_flags
&= ~SB_SEL
;
184 if (sb
->sb_flags
& SB_WAIT
) {
185 sb
->sb_flags
&= ~SB_WAIT
;
188 KNOTE_LOCKED(&sb
->sb_sel
.si_note
, 0);
190 if ((so
->so_state
& SS_ASYNC
) && so
->so_sigio
!= NULL
)
191 pgsigio(&so
->so_sigio
, SIGIO
, 0);
192 if (sb
->sb_flags
& SB_UPCALL
)
193 (*so
->so_upcall
)(so
, so
->so_upcallarg
, M_DONTWAIT
);
194 if (sb
->sb_flags
& SB_AIO
)
196 mtx_assert(SOCKBUF_MTX(sb
), MA_NOTOWNED
);
200 * Socket buffer (struct sockbuf) utility routines.
202 * Each socket contains two socket buffers: one for sending data and one for
203 * receiving data. Each buffer contains a queue of mbufs, information about
204 * the number of mbufs and amount of data in the queue, and other fields
205 * allowing select() statements and notification on data availability to be
208 * Data stored in a socket buffer is maintained as a list of records. Each
209 * record is a list of mbufs chained together with the m_next field. Records
210 * are chained together with the m_nextpkt field. The upper level routine
211 * soreceive() expects the following conventions to be observed when placing
212 * information in the receive buffer:
214 * 1. If the protocol requires each message be preceded by the sender's name,
215 * then a record containing that name must be present before any
216 * associated data (mbuf's must be of type MT_SONAME).
217 * 2. If the protocol supports the exchange of ``access rights'' (really just
218 * additional data associated with the message), and there are ``rights''
219 * to be received, then a record containing this data should be present
220 * (mbuf's must be of type MT_RIGHTS).
221 * 3. If a name or rights record exists, then it must be followed by a data
222 * record, perhaps of zero length.
224 * Before using a new socket structure it is first necessary to reserve
225 * buffer space to the socket, by calling sbreserve(). This should commit
226 * some of the available buffer space in the system buffer pool for the
227 * socket (currently, it does nothing but enforce limits). The space should
228 * be released by calling sbrelease() when the socket is destroyed.
231 soreserve(struct socket
*so
, u_long sndcc
, u_long rcvcc
)
233 struct thread
*td
= curthread
;
235 SOCKBUF_LOCK(&so
->so_snd
);
236 SOCKBUF_LOCK(&so
->so_rcv
);
237 if (sbreserve_locked(&so
->so_snd
, sndcc
, so
, td
) == 0)
239 if (sbreserve_locked(&so
->so_rcv
, rcvcc
, so
, td
) == 0)
241 if (so
->so_rcv
.sb_lowat
== 0)
242 so
->so_rcv
.sb_lowat
= 1;
243 if (so
->so_snd
.sb_lowat
== 0)
244 so
->so_snd
.sb_lowat
= MCLBYTES
;
245 if (so
->so_snd
.sb_lowat
> so
->so_snd
.sb_hiwat
)
246 so
->so_snd
.sb_lowat
= so
->so_snd
.sb_hiwat
;
247 SOCKBUF_UNLOCK(&so
->so_rcv
);
248 SOCKBUF_UNLOCK(&so
->so_snd
);
251 sbrelease_locked(&so
->so_snd
, so
);
253 SOCKBUF_UNLOCK(&so
->so_rcv
);
254 SOCKBUF_UNLOCK(&so
->so_snd
);
259 sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS
)
262 u_long tmp_sb_max
= sb_max
;
264 error
= sysctl_handle_long(oidp
, &tmp_sb_max
, arg2
, req
);
265 if (error
|| !req
->newptr
)
267 if (tmp_sb_max
< MSIZE
+ MCLBYTES
)
270 sb_max_adj
= (u_quad_t
)sb_max
* MCLBYTES
/ (MSIZE
+ MCLBYTES
);
275 * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't
276 * become limiting if buffering efficiency is near the normal case.
279 sbreserve_locked(struct sockbuf
*sb
, u_long cc
, struct socket
*so
,
284 SOCKBUF_LOCK_ASSERT(sb
);
287 * td will only be NULL when we're in an interrupt (e.g. in
290 * XXXRW: This comment needs updating, as might the code.
295 PROC_LOCK(td
->td_proc
);
296 sbsize_limit
= lim_cur(td
->td_proc
, RLIMIT_SBSIZE
);
297 PROC_UNLOCK(td
->td_proc
);
299 sbsize_limit
= RLIM_INFINITY
;
300 if (!chgsbsize(so
->so_cred
->cr_uidinfo
, &sb
->sb_hiwat
, cc
,
303 sb
->sb_mbmax
= min(cc
* sb_efficiency
, sb_max
);
304 if (sb
->sb_lowat
> sb
->sb_hiwat
)
305 sb
->sb_lowat
= sb
->sb_hiwat
;
310 sbreserve(struct sockbuf
*sb
, u_long cc
, struct socket
*so
,
316 error
= sbreserve_locked(sb
, cc
, so
, td
);
322 * Free mbufs held by a socket, and reserved mbuf space.
325 sbrelease_internal(struct sockbuf
*sb
, struct socket
*so
)
328 sbflush_internal(sb
);
329 (void)chgsbsize(so
->so_cred
->cr_uidinfo
, &sb
->sb_hiwat
, 0,
335 sbrelease_locked(struct sockbuf
*sb
, struct socket
*so
)
338 SOCKBUF_LOCK_ASSERT(sb
);
340 sbrelease_internal(sb
, so
);
344 sbrelease(struct sockbuf
*sb
, struct socket
*so
)
348 sbrelease_locked(sb
, so
);
353 sbdestroy(struct sockbuf
*sb
, struct socket
*so
)
356 sbrelease_internal(sb
, so
);
360 * Routines to add and remove data from an mbuf queue.
362 * The routines sbappend() or sbappendrecord() are normally called to append
363 * new mbufs to a socket buffer, after checking that adequate space is
364 * available, comparing the function sbspace() with the amount of data to be
365 * added. sbappendrecord() differs from sbappend() in that data supplied is
366 * treated as the beginning of a new record. To place a sender's address,
367 * optional access rights, and data in a socket receive buffer,
368 * sbappendaddr() should be used. To place access rights and data in a
369 * socket receive buffer, sbappendrights() should be used. In either case,
370 * the new data begins a new record. Note that unlike sbappend() and
371 * sbappendrecord(), these routines check for the caller that there will be
372 * enough space to store the data. Each fails if there is not enough space,
373 * or if it cannot find mbufs to store additional information in.
375 * Reliable protocols may use the socket send buffer to hold data awaiting
376 * acknowledgement. Data is normally copied from a socket send buffer in a
377 * protocol with m_copy for output to a peer, and then removing the data from
378 * the socket buffer with sbdrop() or sbdroprecord() when the data is
379 * acknowledged by the peer.
383 sblastrecordchk(struct sockbuf
*sb
, const char *file
, int line
)
385 struct mbuf
*m
= sb
->sb_mb
;
387 SOCKBUF_LOCK_ASSERT(sb
);
389 while (m
&& m
->m_nextpkt
)
392 if (m
!= sb
->sb_lastrecord
) {
393 printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
394 __func__
, sb
->sb_mb
, sb
->sb_lastrecord
, m
);
395 printf("packet chain:\n");
396 for (m
= sb
->sb_mb
; m
!= NULL
; m
= m
->m_nextpkt
)
398 panic("%s from %s:%u", __func__
, file
, line
);
403 sblastmbufchk(struct sockbuf
*sb
, const char *file
, int line
)
405 struct mbuf
*m
= sb
->sb_mb
;
408 SOCKBUF_LOCK_ASSERT(sb
);
410 while (m
&& m
->m_nextpkt
)
413 while (m
&& m
->m_next
)
416 if (m
!= sb
->sb_mbtail
) {
417 printf("%s: sb_mb %p sb_mbtail %p last %p\n",
418 __func__
, sb
->sb_mb
, sb
->sb_mbtail
, m
);
419 printf("packet tree:\n");
420 for (m
= sb
->sb_mb
; m
!= NULL
; m
= m
->m_nextpkt
) {
422 for (n
= m
; n
!= NULL
; n
= n
->m_next
)
426 panic("%s from %s:%u", __func__
, file
, line
);
429 #endif /* SOCKBUF_DEBUG */
431 #define SBLINKRECORD(sb, m0) do { \
432 SOCKBUF_LOCK_ASSERT(sb); \
433 if ((sb)->sb_lastrecord != NULL) \
434 (sb)->sb_lastrecord->m_nextpkt = (m0); \
436 (sb)->sb_mb = (m0); \
437 (sb)->sb_lastrecord = (m0); \
438 } while (/*CONSTCOND*/0)
441 * Append mbuf chain m to the last record in the socket buffer sb. The
442 * additional space associated the mbuf chain is recorded in sb. Empty mbufs
443 * are discarded and mbufs are compacted where possible.
446 sbappend_locked(struct sockbuf
*sb
, struct mbuf
*m
)
450 SOCKBUF_LOCK_ASSERT(sb
);
461 if (n
->m_flags
& M_EOR
) {
462 sbappendrecord_locked(sb
, m
); /* XXXXXX!!!! */
465 } while (n
->m_next
&& (n
= n
->m_next
));
468 * XXX Would like to simply use sb_mbtail here, but
469 * XXX I need to verify that I won't miss an EOR that
472 if ((n
= sb
->sb_lastrecord
) != NULL
) {
474 if (n
->m_flags
& M_EOR
) {
475 sbappendrecord_locked(sb
, m
); /* XXXXXX!!!! */
478 } while (n
->m_next
&& (n
= n
->m_next
));
481 * If this is the first record in the socket buffer,
482 * it's also the last record.
484 sb
->sb_lastrecord
= m
;
487 sbcompress(sb
, m
, n
);
492 * Append mbuf chain m to the last record in the socket buffer sb. The
493 * additional space associated the mbuf chain is recorded in sb. Empty mbufs
494 * are discarded and mbufs are compacted where possible.
497 sbappend(struct sockbuf
*sb
, struct mbuf
*m
)
501 sbappend_locked(sb
, m
);
506 * This version of sbappend() should only be used when the caller absolutely
507 * knows that there will never be more than one record in the socket buffer,
508 * that is, a stream protocol (such as TCP).
511 sbappendstream_locked(struct sockbuf
*sb
, struct mbuf
*m
)
513 SOCKBUF_LOCK_ASSERT(sb
);
515 KASSERT(m
->m_nextpkt
== NULL
,("sbappendstream 0"));
516 KASSERT(sb
->sb_mb
== sb
->sb_lastrecord
,("sbappendstream 1"));
520 sbcompress(sb
, m
, sb
->sb_mbtail
);
522 sb
->sb_lastrecord
= sb
->sb_mb
;
527 * This version of sbappend() should only be used when the caller absolutely
528 * knows that there will never be more than one record in the socket buffer,
529 * that is, a stream protocol (such as TCP).
532 sbappendstream(struct sockbuf
*sb
, struct mbuf
*m
)
536 sbappendstream_locked(sb
, m
);
542 sbcheck(struct sockbuf
*sb
)
546 u_long len
= 0, mbcnt
= 0;
548 SOCKBUF_LOCK_ASSERT(sb
);
550 for (m
= sb
->sb_mb
; m
; m
= n
) {
552 for (; m
; m
= m
->m_next
) {
555 if (m
->m_flags
& M_EXT
) /*XXX*/ /* pretty sure this is bogus */
556 mbcnt
+= m
->m_ext
.ext_size
;
559 if (len
!= sb
->sb_cc
|| mbcnt
!= sb
->sb_mbcnt
) {
560 printf("cc %ld != %u || mbcnt %ld != %u\n", len
, sb
->sb_cc
,
561 mbcnt
, sb
->sb_mbcnt
);
568 * As above, except the mbuf chain begins a new record.
571 sbappendrecord_locked(struct sockbuf
*sb
, struct mbuf
*m0
)
575 SOCKBUF_LOCK_ASSERT(sb
);
584 * Put the first mbuf on the queue. Note this permits zero length
589 SBLINKRECORD(sb
, m0
);
596 if (m
&& (m0
->m_flags
& M_EOR
)) {
597 m0
->m_flags
&= ~M_EOR
;
600 sbcompress(sb
, m
, m0
);
604 * As above, except the mbuf chain begins a new record.
607 sbappendrecord(struct sockbuf
*sb
, struct mbuf
*m0
)
611 sbappendrecord_locked(sb
, m0
);
616 * Append address and data, and optionally, control (ancillary) data to the
617 * receive queue of a socket. If present, m0 must include a packet header
618 * with total length. Returns 0 if no space in sockbuf or insufficient
622 sbappendaddr_locked(struct sockbuf
*sb
, const struct sockaddr
*asa
,
623 struct mbuf
*m0
, struct mbuf
*control
)
625 struct mbuf
*m
, *n
, *nlast
;
626 int space
= asa
->sa_len
;
628 SOCKBUF_LOCK_ASSERT(sb
);
630 if (m0
&& (m0
->m_flags
& M_PKTHDR
) == 0)
631 panic("sbappendaddr_locked");
633 space
+= m0
->m_pkthdr
.len
;
634 space
+= m_length(control
, &n
);
636 if (space
> sbspace(sb
))
639 if (asa
->sa_len
> MLEN
)
642 MGET(m
, M_DONTWAIT
, MT_SONAME
);
645 m
->m_len
= asa
->sa_len
;
646 bcopy(asa
, mtod(m
, caddr_t
), asa
->sa_len
);
648 n
->m_next
= m0
; /* concatenate data to control */
652 for (n
= m
; n
->m_next
!= NULL
; n
= n
->m_next
)
658 sb
->sb_mbtail
= nlast
;
666 * Append address and data, and optionally, control (ancillary) data to the
667 * receive queue of a socket. If present, m0 must include a packet header
668 * with total length. Returns 0 if no space in sockbuf or insufficient
672 sbappendaddr(struct sockbuf
*sb
, const struct sockaddr
*asa
,
673 struct mbuf
*m0
, struct mbuf
*control
)
678 retval
= sbappendaddr_locked(sb
, asa
, m0
, control
);
684 sbappendcontrol_locked(struct sockbuf
*sb
, struct mbuf
*m0
,
685 struct mbuf
*control
)
687 struct mbuf
*m
, *n
, *mlast
;
690 SOCKBUF_LOCK_ASSERT(sb
);
693 panic("sbappendcontrol_locked");
694 space
= m_length(control
, &n
) + m_length(m0
, NULL
);
696 if (space
> sbspace(sb
))
698 n
->m_next
= m0
; /* concatenate data to control */
702 for (m
= control
; m
->m_next
; m
= m
->m_next
)
706 SBLINKRECORD(sb
, control
);
708 sb
->sb_mbtail
= mlast
;
716 sbappendcontrol(struct sockbuf
*sb
, struct mbuf
*m0
, struct mbuf
*control
)
721 retval
= sbappendcontrol_locked(sb
, m0
, control
);
727 * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
728 * (n). If (n) is NULL, the buffer is presumed empty.
730 * When the data is compressed, mbufs in the chain may be handled in one of
733 * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
734 * record boundary, and no change in data type).
736 * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
737 * an mbuf already in the socket buffer. This can occur if an
738 * appropriate mbuf exists, there is room, and no merging of data types
741 * (3) The mbuf may be appended to the end of the existing mbuf chain.
743 * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
747 sbcompress(struct sockbuf
*sb
, struct mbuf
*m
, struct mbuf
*n
)
752 SOCKBUF_LOCK_ASSERT(sb
);
755 eor
|= m
->m_flags
& M_EOR
;
758 (((o
= m
->m_next
) || (o
= n
)) &&
759 o
->m_type
== m
->m_type
))) {
760 if (sb
->sb_lastrecord
== m
)
761 sb
->sb_lastrecord
= m
->m_next
;
765 if (n
&& (n
->m_flags
& M_EOR
) == 0 &&
767 ((sb
->sb_flags
& SB_NOCOALESCE
) == 0) &&
768 m
->m_len
<= MCLBYTES
/ 4 && /* XXX: Don't copy too much */
769 m
->m_len
<= M_TRAILINGSPACE(n
) &&
770 n
->m_type
== m
->m_type
) {
771 bcopy(mtod(m
, caddr_t
), mtod(n
, caddr_t
) + n
->m_len
,
773 n
->m_len
+= m
->m_len
;
774 sb
->sb_cc
+= m
->m_len
;
775 if (m
->m_type
!= MT_DATA
&& m
->m_type
!= MT_OOBDATA
)
776 /* XXX: Probably don't need.*/
777 sb
->sb_ctl
+= m
->m_len
;
788 m
->m_flags
&= ~M_EOR
;
793 KASSERT(n
!= NULL
, ("sbcompress: eor && n == NULL"));
800 * Free all mbufs in a sockbuf. Check that all resources are reclaimed.
803 sbflush_internal(struct sockbuf
*sb
)
806 while (sb
->sb_mbcnt
) {
808 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
809 * we would loop forever. Panic instead.
811 if (!sb
->sb_cc
&& (sb
->sb_mb
== NULL
|| sb
->sb_mb
->m_len
))
813 sbdrop_internal(sb
, (int)sb
->sb_cc
);
815 if (sb
->sb_cc
|| sb
->sb_mb
|| sb
->sb_mbcnt
)
816 panic("sbflush_internal: cc %u || mb %p || mbcnt %u",
817 sb
->sb_cc
, (void *)sb
->sb_mb
, sb
->sb_mbcnt
);
821 sbflush_locked(struct sockbuf
*sb
)
824 SOCKBUF_LOCK_ASSERT(sb
);
825 sbflush_internal(sb
);
829 sbflush(struct sockbuf
*sb
)
838 * Drop data from (the front of) a sockbuf.
841 sbdrop_internal(struct sockbuf
*sb
, int len
)
846 next
= (m
= sb
->sb_mb
) ? m
->m_nextpkt
: 0;
855 if (m
->m_len
> len
) {
859 if (sb
->sb_sndptroff
!= 0)
860 sb
->sb_sndptroff
-= len
;
861 if (m
->m_type
!= MT_DATA
&& m
->m_type
!= MT_OOBDATA
)
869 while (m
&& m
->m_len
== 0) {
879 * First part is an inline SB_EMPTY_FIXUP(). Second part makes sure
880 * sb_lastrecord is up-to-date if we dropped part of the last record.
884 sb
->sb_mbtail
= NULL
;
885 sb
->sb_lastrecord
= NULL
;
886 } else if (m
->m_nextpkt
== NULL
) {
887 sb
->sb_lastrecord
= m
;
892 * Drop data from (the front of) a sockbuf.
895 sbdrop_locked(struct sockbuf
*sb
, int len
)
898 SOCKBUF_LOCK_ASSERT(sb
);
900 sbdrop_internal(sb
, len
);
904 sbdrop(struct sockbuf
*sb
, int len
)
908 sbdrop_locked(sb
, len
);
913 * Maintain a pointer and offset pair into the socket buffer mbuf chain to
914 * avoid traversal of the entire socket buffer for larger offsets.
917 sbsndptr(struct sockbuf
*sb
, u_int off
, u_int len
, u_int
*moff
)
919 struct mbuf
*m
, *ret
;
921 KASSERT(sb
->sb_mb
!= NULL
, ("%s: sb_mb is NULL", __func__
));
922 KASSERT(off
+ len
<= sb
->sb_cc
, ("%s: beyond sb", __func__
));
923 KASSERT(sb
->sb_sndptroff
<= sb
->sb_cc
, ("%s: sndptroff broken", __func__
));
926 * Is off below stored offset? Happens on retransmits.
927 * Just return, we can't help here.
929 if (sb
->sb_sndptroff
> off
) {
934 /* Return closest mbuf in chain for current offset. */
935 *moff
= off
- sb
->sb_sndptroff
;
936 m
= ret
= sb
->sb_sndptr
? sb
->sb_sndptr
: sb
->sb_mb
;
938 /* Advance by len to be as close as possible for the next transmit. */
939 for (off
= off
- sb
->sb_sndptroff
+ len
- 1;
940 off
> 0 && m
!= NULL
&& off
>= m
->m_len
;
942 sb
->sb_sndptroff
+= m
->m_len
;
945 if (off
> 0 && m
== NULL
)
946 panic("%s: sockbuf %p and mbuf %p clashing", __func__
, sb
, ret
);
953 * Drop a record off the front of a sockbuf and move the next record to the
957 sbdroprecord_locked(struct sockbuf
*sb
)
961 SOCKBUF_LOCK_ASSERT(sb
);
965 sb
->sb_mb
= m
->m_nextpkt
;
975 * Drop a record off the front of a sockbuf and move the next record to the
979 sbdroprecord(struct sockbuf
*sb
)
983 sbdroprecord_locked(sb
);
988 * Create a "control" mbuf containing the specified data with the specified
989 * type for presentation on a socket buffer.
992 sbcreatecontrol(caddr_t p
, int size
, int type
, int level
)
997 if (CMSG_SPACE((u_int
)size
) > MCLBYTES
)
998 return ((struct mbuf
*) NULL
);
999 if (CMSG_SPACE((u_int
)size
) > MLEN
)
1000 m
= m_getcl(M_DONTWAIT
, MT_CONTROL
, 0);
1002 m
= m_get(M_DONTWAIT
, MT_CONTROL
);
1004 return ((struct mbuf
*) NULL
);
1005 cp
= mtod(m
, struct cmsghdr
*);
1007 KASSERT(CMSG_SPACE((u_int
)size
) <= M_TRAILINGSPACE(m
),
1008 ("sbcreatecontrol: short mbuf"));
1010 (void)memcpy(CMSG_DATA(cp
), p
, size
);
1011 m
->m_len
= CMSG_SPACE(size
);
1012 cp
->cmsg_len
= CMSG_LEN(size
);
1013 cp
->cmsg_level
= level
;
1014 cp
->cmsg_type
= type
;
1019 * This does the same for socket buffers that sotoxsocket does for sockets:
1020 * generate an user-format data structure describing the socket buffer. Note
1021 * that the xsockbuf structure, since it is always embedded in a socket, does
1022 * not include a self pointer nor a length. We make this entry point public
1023 * in case some other mechanism needs it.
1026 sbtoxsockbuf(struct sockbuf
*sb
, struct xsockbuf
*xsb
)
1029 xsb
->sb_cc
= sb
->sb_cc
;
1030 xsb
->sb_hiwat
= sb
->sb_hiwat
;
1031 xsb
->sb_mbcnt
= sb
->sb_mbcnt
;
1032 xsb
->sb_mcnt
= sb
->sb_mcnt
;
1033 xsb
->sb_ccnt
= sb
->sb_ccnt
;
1034 xsb
->sb_mbmax
= sb
->sb_mbmax
;
1035 xsb
->sb_lowat
= sb
->sb_lowat
;
1036 xsb
->sb_flags
= sb
->sb_flags
;
1037 xsb
->sb_timeo
= sb
->sb_timeo
;
1040 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1042 SYSCTL_INT(_kern
, KERN_DUMMY
, dummy
, CTLFLAG_RW
, &dummy
, 0, "");
1043 SYSCTL_OID(_kern_ipc
, KIPC_MAXSOCKBUF
, maxsockbuf
, CTLTYPE_ULONG
|CTLFLAG_RW
,
1044 &sb_max
, 0, sysctl_handle_sb_max
, "LU", "Maximum socket buffer size");
1045 SYSCTL_ULONG(_kern_ipc
, KIPC_SOCKBUF_WASTE
, sockbuf_waste_factor
, CTLFLAG_RW
,
1046 &sb_efficiency
, 0, "");