1 /* $NetBSD: kttcp.c,v 1.23 2007/02/09 21:55:26 ad Exp $ */
4 * Copyright (c) 2002 Wasabi Systems, Inc.
7 * Written by Frank van der Linden and Jason R. Thorpe for
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed for the NetBSD Project by
21 * Wasabi Systems, Inc.
22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
23 * or promote products derived from this software without specific prior
26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
40 * kttcp.c -- provides kernel support for testing network testing,
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: kttcp.c,v 1.23 2007/02/09 21:55:26 ad Exp $");
47 #include <sys/param.h>
48 #include <sys/types.h>
49 #include <sys/ioctl.h>
51 #include <sys/filedesc.h>
53 #include <sys/systm.h>
54 #include <sys/protosw.h>
56 #include <sys/resourcevar.h>
57 #include <sys/signal.h>
58 #include <sys/socketvar.h>
59 #include <sys/socket.h>
61 #include <sys/mount.h>
62 #include <sys/syscallargs.h>
64 #include <dev/kttcpio.h>
66 static int kttcp_send(struct lwp
*l
, struct kttcp_io_args
*);
67 static int kttcp_recv(struct lwp
*l
, struct kttcp_io_args
*);
68 static int kttcp_sosend(struct socket
*, unsigned long long,
69 unsigned long long *, struct lwp
*, int);
70 static int kttcp_soreceive(struct socket
*, unsigned long long,
71 unsigned long long *, struct lwp
*, int *);
73 void kttcpattach(int);
75 dev_type_ioctl(kttcpioctl
);
77 const struct cdevsw kttcp_cdevsw
= {
78 nullopen
, nullclose
, noread
, nowrite
, kttcpioctl
,
79 nostop
, notty
, nopoll
, nommap
, nokqfilter
, D_OTHER
83 kttcpattach(int count
)
89 kttcpioctl(dev_t dev
, u_long cmd
, void *data
, int flag
,
94 if ((flag
& FWRITE
) == 0)
99 error
= kttcp_send(l
, (struct kttcp_io_args
*) data
);
103 error
= kttcp_recv(l
, (struct kttcp_io_args
*) data
);
114 kttcp_send(struct lwp
*l
, struct kttcp_io_args
*kio
)
118 struct timeval t0
, t1
;
119 unsigned long long len
, done
;
121 if (kio
->kio_totalsize
>= KTTCP_MAX_XMIT
)
124 fp
= fd_getfile(l
->l_proc
->p_fd
, kio
->kio_socket
);
128 if (fp
->f_type
!= DTYPE_SOCKET
) {
133 len
= kio
->kio_totalsize
;
136 error
= kttcp_sosend((struct socket
*)fp
->f_data
, len
,
139 } while (error
== 0 && len
> 0);
146 timersub(&t1
, &t0
, &kio
->kio_elapsed
);
148 kio
->kio_bytesdone
= kio
->kio_totalsize
- len
;
154 kttcp_recv(struct lwp
*l
, struct kttcp_io_args
*kio
)
158 struct timeval t0
, t1
;
159 unsigned long long len
, done
;
161 done
= 0; /* XXX gcc */
163 if (kio
->kio_totalsize
> KTTCP_MAX_XMIT
)
166 fp
= fd_getfile(l
->l_proc
->p_fd
, kio
->kio_socket
);
170 if (fp
->f_type
!= DTYPE_SOCKET
) {
174 len
= kio
->kio_totalsize
;
177 error
= kttcp_soreceive((struct socket
*)fp
->f_data
,
178 len
, &done
, l
, NULL
);
180 } while (error
== 0 && len
> 0 && done
> 0);
189 timersub(&t1
, &t0
, &kio
->kio_elapsed
);
191 kio
->kio_bytesdone
= kio
->kio_totalsize
- len
;
196 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
199 * Slightly changed version of sosend()
202 kttcp_sosend(struct socket
*so
, unsigned long long slen
,
203 unsigned long long *done
, struct lwp
*l
, int flags
)
205 struct mbuf
**mp
, *m
, *top
;
206 long space
, len
, mlen
;
207 int error
, s
, dontroute
, atomic
;
210 atomic
= sosendallatonce(so
);
214 * In theory resid should be unsigned.
215 * However, space must be signed, as it might be less than 0
216 * if we over-committed, and we must use a signed comparison
217 * of space and resid. On the other hand, a negative resid
218 * causes us to loop sending 0-length segments to the protocol.
225 (flags
& MSG_DONTROUTE
) && (so
->so_options
& SO_DONTROUTE
) == 0 &&
226 (so
->so_proto
->pr_flags
& PR_ATOMIC
);
227 /* WRS XXX - are we doing per-lwp or per-proc stats? */
228 l
->l_proc
->p_stats
->p_ru
.ru_msgsnd
++;
229 #define snderr(errno) { error = errno; splx(s); goto release; }
232 if ((error
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
))) != 0)
236 if (so
->so_state
& SS_CANTSENDMORE
)
239 error
= so
->so_error
;
244 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
245 if (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) {
246 if ((so
->so_state
& SS_ISCONFIRMING
) == 0)
249 snderr(EDESTADDRREQ
);
251 space
= sbspace(&so
->so_snd
);
254 if ((atomic
&& resid
> so
->so_snd
.sb_hiwat
))
256 if (space
< resid
&& (atomic
|| space
< so
->so_snd
.sb_lowat
)) {
257 if (so
->so_state
& SS_NBIO
)
259 SBLASTRECORDCHK(&so
->so_rcv
,
260 "kttcp_soreceive sbwait 1");
261 SBLASTMBUFCHK(&so
->so_rcv
,
262 "kttcp_soreceive sbwait 1");
263 sbunlock(&so
->so_snd
);
264 error
= sbwait(&so
->so_snd
);
275 m
= m_gethdr(M_WAIT
, MT_DATA
);
278 m
->m_pkthdr
.rcvif
= NULL
;
280 m
= m_get(M_WAIT
, MT_DATA
);
283 if (resid
>= MINCLSIZE
&& space
>= MCLBYTES
) {
285 if ((m
->m_flags
& M_EXT
) == 0)
289 len
= lmin(MCLBYTES
, resid
);
291 if (atomic
&& top
== 0) {
292 len
= lmin(MCLBYTES
- max_hdr
,
294 m
->m_data
+= max_hdr
;
296 len
= lmin(MCLBYTES
, resid
);
301 len
= lmin(lmin(mlen
, resid
), space
);
304 * For datagram protocols, leave room
305 * for protocol headers in first mbuf.
307 if (atomic
&& top
== 0 && len
< mlen
)
313 top
->m_pkthdr
.len
+= len
;
319 top
->m_flags
|= M_EOR
;
322 } while (space
> 0 && atomic
);
326 if (so
->so_state
& SS_CANTSENDMORE
)
330 so
->so_options
|= SO_DONTROUTE
;
332 so
->so_state
|= SS_MORETOCOME
;
333 error
= (*so
->so_proto
->pr_usrreq
)(so
,
334 (flags
& MSG_OOB
) ? PRU_SENDOOB
: PRU_SEND
,
337 so
->so_options
&= ~SO_DONTROUTE
;
339 so
->so_state
&= ~SS_MORETOCOME
;
346 } while (resid
&& space
> 0);
350 sbunlock(&so
->so_snd
);
354 *done
= slen
- resid
;
356 printf("sosend: error %d slen %llu resid %lld\n", error
, slen
, resid
);
362 kttcp_soreceive(struct socket
*so
, unsigned long long slen
,
363 unsigned long long *done
, struct lwp
*l
, int *flagsp
)
365 struct mbuf
*m
, **mp
;
366 int flags
, len
, error
, s
, offset
, moff
, type
;
367 long long orig_resid
, resid
;
368 const struct protosw
*pr
;
369 struct mbuf
*nextrecord
;
374 resid
= orig_resid
= slen
;
376 flags
= *flagsp
&~ MSG_EOR
;
379 if (flags
& MSG_OOB
) {
380 m
= m_get(M_WAIT
, MT_DATA
);
381 error
= (*pr
->pr_usrreq
)(so
, PRU_RCVOOB
, m
,
382 (struct mbuf
*)(long)(flags
& MSG_PEEK
), NULL
, NULL
);
386 resid
-= min(resid
, m
->m_len
);
388 } while (resid
&& error
== 0 && m
);
396 if (so
->so_state
& SS_ISCONFIRMING
&& resid
)
397 (*pr
->pr_usrreq
)(so
, PRU_RCVD
, NULL
, NULL
, NULL
, NULL
);
400 if ((error
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
))) != 0)
404 m
= so
->so_rcv
.sb_mb
;
406 * If we have less data than requested, block awaiting more
407 * (subject to any timeout) if:
408 * 1. the current count is less than the low water mark,
409 * 2. MSG_WAITALL is set, and it is possible to do the entire
410 * receive operation at once if we block (resid <= hiwat), or
411 * 3. MSG_DONTWAIT is not set.
412 * If MSG_WAITALL is set but resid is larger than the receive buffer,
413 * we have to do the receive in sections, and thus risk returning
414 * a short count if a timeout or signal occurs after we start.
416 if (m
== NULL
|| (((flags
& MSG_DONTWAIT
) == 0 &&
417 so
->so_rcv
.sb_cc
< resid
) &&
418 (so
->so_rcv
.sb_cc
< so
->so_rcv
.sb_lowat
||
419 ((flags
& MSG_WAITALL
) && resid
<= so
->so_rcv
.sb_hiwat
)) &&
420 m
->m_nextpkt
== NULL
&& (pr
->pr_flags
& PR_ATOMIC
) == 0)) {
422 if (m
== NULL
&& so
->so_rcv
.sb_cc
)
428 error
= so
->so_error
;
429 if ((flags
& MSG_PEEK
) == 0)
433 if (so
->so_state
& SS_CANTRCVMORE
) {
439 for (; m
; m
= m
->m_next
)
440 if (m
->m_type
== MT_OOBDATA
|| (m
->m_flags
& M_EOR
)) {
441 m
= so
->so_rcv
.sb_mb
;
444 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 &&
445 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
451 if ((so
->so_state
& SS_NBIO
) || (flags
& MSG_DONTWAIT
)) {
455 sbunlock(&so
->so_rcv
);
456 error
= sbwait(&so
->so_rcv
);
464 * On entry here, m points to the first record of the socket buffer.
465 * While we process the initial mbufs containing address and control
466 * info, we save a copy of m->m_nextpkt into nextrecord.
468 #ifdef notyet /* XXXX */
470 uio
->uio_lwp
->l_proc
->p_stats
->p_ru
.ru_msgrcv
++;
472 KASSERT(m
== so
->so_rcv
.sb_mb
);
473 SBLASTRECORDCHK(&so
->so_rcv
, "kttcp_soreceive 1");
474 SBLASTMBUFCHK(&so
->so_rcv
, "kttcp_soreceive 1");
475 nextrecord
= m
->m_nextpkt
;
476 if (pr
->pr_flags
& PR_ADDR
) {
478 if (m
->m_type
!= MT_SONAME
)
482 if (flags
& MSG_PEEK
) {
485 sbfree(&so
->so_rcv
, m
);
486 MFREE(m
, so
->so_rcv
.sb_mb
);
487 m
= so
->so_rcv
.sb_mb
;
490 while (m
&& m
->m_type
== MT_CONTROL
&& error
== 0) {
491 if (flags
& MSG_PEEK
) {
494 sbfree(&so
->so_rcv
, m
);
495 MFREE(m
, so
->so_rcv
.sb_mb
);
496 m
= so
->so_rcv
.sb_mb
;
501 * If m is non-NULL, we have some data to read. From now on,
502 * make sure to keep sb_lastrecord consistent when working on
503 * the last packet on the chain (nextrecord == NULL) and we
504 * change m->m_nextpkt.
507 if ((flags
& MSG_PEEK
) == 0) {
508 m
->m_nextpkt
= nextrecord
;
510 * If nextrecord == NULL (this is a single chain),
511 * then sb_lastrecord may not be valid here if m
512 * was changed earlier.
514 if (nextrecord
== NULL
) {
515 KASSERT(so
->so_rcv
.sb_mb
== m
);
516 so
->so_rcv
.sb_lastrecord
= m
;
520 if (type
== MT_OOBDATA
)
523 if ((flags
& MSG_PEEK
) == 0) {
524 KASSERT(so
->so_rcv
.sb_mb
== m
);
525 so
->so_rcv
.sb_mb
= nextrecord
;
526 SB_EMPTY_FIXUP(&so
->so_rcv
);
529 SBLASTRECORDCHK(&so
->so_rcv
, "kttcp_soreceive 2");
530 SBLASTMBUFCHK(&so
->so_rcv
, "kttcp_soreceive 2");
534 while (m
&& resid
> 0 && error
== 0) {
535 if (m
->m_type
== MT_OOBDATA
) {
536 if (type
!= MT_OOBDATA
)
538 } else if (type
== MT_OOBDATA
)
541 else if (m
->m_type
!= MT_DATA
&& m
->m_type
!= MT_HEADER
)
544 so
->so_state
&= ~SS_RCVATMARK
;
546 if (so
->so_oobmark
&& len
> so
->so_oobmark
- offset
)
547 len
= so
->so_oobmark
- offset
;
548 if (len
> m
->m_len
- moff
)
549 len
= m
->m_len
- moff
;
551 * If mp is set, just pass back the mbufs.
552 * Otherwise copy them out via the uio, then free.
553 * Sockbuf must be consistent here (points to current mbuf,
554 * it points to next record) when we drop priority;
555 * we must note any additions to the sockbuf when we
556 * block interrupts again.
559 if (len
== m
->m_len
- moff
) {
560 if (m
->m_flags
& M_EOR
)
562 if (flags
& MSG_PEEK
) {
566 nextrecord
= m
->m_nextpkt
;
567 sbfree(&so
->so_rcv
, m
);
571 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
574 MFREE(m
, so
->so_rcv
.sb_mb
);
575 m
= so
->so_rcv
.sb_mb
;
578 * If m != NULL, we also know that
579 * so->so_rcv.sb_mb != NULL.
581 KASSERT(so
->so_rcv
.sb_mb
== m
);
583 m
->m_nextpkt
= nextrecord
;
584 if (nextrecord
== NULL
)
585 so
->so_rcv
.sb_lastrecord
= m
;
587 so
->so_rcv
.sb_mb
= nextrecord
;
588 SB_EMPTY_FIXUP(&so
->so_rcv
);
590 SBLASTRECORDCHK(&so
->so_rcv
,
591 "kttcp_soreceive 3");
592 SBLASTMBUFCHK(&so
->so_rcv
,
593 "kttcp_soreceive 3");
596 if (flags
& MSG_PEEK
)
600 *mp
= m_copym(m
, 0, len
, M_WAIT
);
603 so
->so_rcv
.sb_cc
-= len
;
606 if (so
->so_oobmark
) {
607 if ((flags
& MSG_PEEK
) == 0) {
608 so
->so_oobmark
-= len
;
609 if (so
->so_oobmark
== 0) {
610 so
->so_state
|= SS_RCVATMARK
;
615 if (offset
== so
->so_oobmark
)
622 * If the MSG_WAITALL flag is set (for non-atomic socket),
623 * we must not quit until "uio->uio_resid == 0" or an error
624 * termination. If a signal/timeout occurs, return
625 * with a short count but without error.
626 * Keep sockbuf locked against other readers.
628 while (flags
& MSG_WAITALL
&& m
== NULL
&& resid
> 0 &&
629 !sosendallatonce(so
) && !nextrecord
) {
630 if (so
->so_error
|| so
->so_state
& SS_CANTRCVMORE
)
633 * If we are peeking and the socket receive buffer is
634 * full, stop since we can't get more data to peek at.
636 if ((flags
& MSG_PEEK
) && sbspace(&so
->so_rcv
) <= 0)
639 * If we've drained the socket buffer, tell the
640 * protocol in case it needs to do something to
641 * get it filled again.
643 if ((pr
->pr_flags
& PR_WANTRCVD
) && so
->so_pcb
)
644 (*pr
->pr_usrreq
)(so
, PRU_RCVD
, NULL
,
645 (struct mbuf
*)(long)flags
, NULL
, NULL
);
646 SBLASTRECORDCHK(&so
->so_rcv
,
647 "kttcp_soreceive sbwait 2");
648 SBLASTMBUFCHK(&so
->so_rcv
,
649 "kttcp_soreceive sbwait 2");
650 error
= sbwait(&so
->so_rcv
);
652 sbunlock(&so
->so_rcv
);
656 if ((m
= so
->so_rcv
.sb_mb
) != NULL
)
657 nextrecord
= m
->m_nextpkt
;
661 if (m
&& pr
->pr_flags
& PR_ATOMIC
) {
663 if ((flags
& MSG_PEEK
) == 0)
664 (void) sbdroprecord(&so
->so_rcv
);
666 if ((flags
& MSG_PEEK
) == 0) {
669 * First part is an SB_EMPTY_FIXUP(). Second part
670 * makes sure sb_lastrecord is up-to-date if
671 * there is still data in the socket buffer.
673 so
->so_rcv
.sb_mb
= nextrecord
;
674 if (so
->so_rcv
.sb_mb
== NULL
) {
675 so
->so_rcv
.sb_mbtail
= NULL
;
676 so
->so_rcv
.sb_lastrecord
= NULL
;
677 } else if (nextrecord
->m_nextpkt
== NULL
)
678 so
->so_rcv
.sb_lastrecord
= nextrecord
;
680 SBLASTRECORDCHK(&so
->so_rcv
, "kttcp_soreceive 4");
681 SBLASTMBUFCHK(&so
->so_rcv
, "kttcp_soreceive 4");
682 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
)
683 (*pr
->pr_usrreq
)(so
, PRU_RCVD
, NULL
,
684 (struct mbuf
*)(long)flags
, NULL
, NULL
);
686 if (orig_resid
== resid
&& orig_resid
&&
687 (flags
& MSG_EOR
) == 0 && (so
->so_state
& SS_CANTRCVMORE
) == 0) {
688 sbunlock(&so
->so_rcv
);
696 sbunlock(&so
->so_rcv
);
698 *done
= slen
- resid
;
700 printf("soreceive: error %d slen %llu resid %lld\n", error
, slen
, resid
);