4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/t_lock.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
36 #include <sys/vnode.h>
37 #include <sys/debug.h>
38 #include <sys/errno.h>
43 #include <sys/termios.h>
44 #include <sys/stream.h>
45 #include <sys/strsubr.h>
46 #include <sys/sunddi.h>
47 #include <sys/esunddi.h>
48 #include <sys/flock.h>
49 #include <sys/modctl.h>
50 #include <sys/cmn_err.h>
51 #include <sys/vmsystm.h>
53 #include <sys/socket.h>
54 #include <sys/socketvar.h>
55 #include <fs/sockfs/sockcommon.h>
56 #include <fs/sockfs/socktpi.h>
58 #include <netinet/in.h>
59 #include <sys/sendfile.h>
61 #include <sys/tihdr.h>
62 #include <sys/atomic.h>
64 #include <inet/common.h>
69 extern int sosendfile64(file_t
*, file_t
*, const struct ksendfilevec64
*,
71 extern int nl7c_sendfilev(struct sonode
*, u_offset_t
*, struct sendfilevec
*,
73 extern int snf_segmap(file_t
*, vnode_t
*, u_offset_t
, u_offset_t
, ssize_t
*,
75 extern sotpi_info_t
*sotpi_sototpi(struct sonode
*);
77 #define SEND_MAX_CHUNK 16
79 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
81 * 64 bit offsets for 32 bit applications only running either on
82 * 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
83 * more than 2GB of data.
86 sendvec_chunk64(file_t
*fp
, u_offset_t
*fileoff
, struct ksendfilevec64
*sfv
,
87 int copy_cnt
, ssize32_t
*count
)
102 for (i
= 0; i
< copy_cnt
; i
++) {
104 if (ISSIG(curthread
, JUSTLOOKING
))
108 * Do similar checks as "write" as we are writing
109 * sfv_len bytes into "vp".
111 sfv_len
= (ssize32_t
)sfv
->sfv_len
;
121 if (vp
->v_type
== VREG
) {
122 if (*fileoff
>= curproc
->p_fsz_ctl
) {
123 mutex_enter(&curproc
->p_lock
);
125 rctlproc_legacy
[RLIMIT_FSIZE
],
126 curproc
->p_rctls
, curproc
, RCA_SAFE
);
127 mutex_exit(&curproc
->p_lock
);
131 if (*fileoff
>= OFFSET_MAX(fp
))
134 if (*fileoff
+ sfv_len
> OFFSET_MAX(fp
))
138 tmpcount
= *count
+ sfv_len
;
142 sfv_off
= sfv
->sfv_off
;
144 auio
.uio_extflg
= UIO_COPY_DEFAULT
;
145 if (sfv
->sfv_fd
== SFV_FD_SELF
) {
146 aiov
.iov_len
= sfv_len
;
147 aiov
.iov_base
= (caddr_t
)(uintptr_t)sfv_off
;
148 auio
.uio_loffset
= *fileoff
;
150 auio
.uio_resid
= sfv_len
;
151 auio
.uio_iov
= &aiov
;
152 auio
.uio_segflg
= UIO_USERSPACE
;
153 auio
.uio_llimit
= curproc
->p_fsz_ctl
;
154 auio
.uio_fmode
= fflag
;
155 ioflag
= auio
.uio_fmode
& (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
156 while (sfv_len
> 0) {
157 error
= VOP_WRITE(vp
, &auio
, ioflag
,
159 cnt
= sfv_len
- auio
.uio_resid
;
161 ttolwp(curthread
)->lwp_ru
.ioch
+= (ulong_t
)cnt
;
162 if (vp
->v_type
== VREG
)
174 if ((ffp
= getf(sfv
->sfv_fd
)) == NULL
)
177 if ((ffp
->f_flag
& FREAD
) == 0) {
178 releasef(sfv
->sfv_fd
);
182 readvp
= ffp
->f_vnode
;
183 if (readvp
->v_type
!= VREG
) {
184 releasef(sfv
->sfv_fd
);
189 * No point reading and writing to same vp,
190 * as long as both are regular files. readvp is not
191 * locked; but since we got it from an open file the
192 * contents will be valid during the time of access.
194 if (vn_compare(vp
, readvp
)) {
195 releasef(sfv
->sfv_fd
);
200 * Optimize the regular file over
203 if (vp
->v_type
== VSOCK
) {
204 error
= sosendfile64(fp
, ffp
, sfv
,
214 * Note: we assume readvp != vp. "vp" is already
215 * locked, and "readvp" must not be.
218 VOP_RWUNLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
219 (void) VOP_RWLOCK(readvp
, V_WRITELOCK_FALSE
,
221 (void) VOP_RWLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
223 (void) VOP_RWLOCK(readvp
, V_WRITELOCK_FALSE
,
228 * Same checks as in pread64.
230 if (sfv_off
> MAXOFFSET_T
) {
231 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
, NULL
);
232 releasef(sfv
->sfv_fd
);
236 if (sfv_off
+ sfv_len
> MAXOFFSET_T
)
237 sfv_len
= (ssize32_t
)(MAXOFFSET_T
- sfv_off
);
239 /* Find the native blocksize to transfer data */
240 size
= MIN(vp
->v_vfsp
->vfs_bsize
,
241 readvp
->v_vfsp
->vfs_bsize
);
242 size
= sfv_len
< size
? sfv_len
: size
;
243 ptr
= kmem_alloc(size
, KM_NOSLEEP
);
245 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
, NULL
);
246 releasef(sfv
->sfv_fd
);
250 while (sfv_len
> 0) {
253 iov_len
= MIN(size
, sfv_len
);
255 aiov
.iov_len
= iov_len
;
256 auio
.uio_loffset
= sfv_off
;
257 auio
.uio_iov
= &aiov
;
259 auio
.uio_resid
= iov_len
;
260 auio
.uio_segflg
= UIO_SYSSPACE
;
261 auio
.uio_llimit
= MAXOFFSET_T
;
262 auio
.uio_fmode
= ffp
->f_flag
;
263 ioflag
= auio
.uio_fmode
&
264 (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
267 * If read sync is not asked for,
270 if ((ioflag
& FRSYNC
) == 0)
271 ioflag
&= ~(FSYNC
|FDSYNC
);
272 error
= VOP_READ(readvp
, &auio
, ioflag
,
275 kmem_free(ptr
, size
);
276 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
,
278 releasef(sfv
->sfv_fd
);
283 * Check how must data was really read.
284 * Decrement the 'len' and increment the
285 * 'off' appropriately.
287 cnt
= iov_len
- auio
.uio_resid
;
290 * If we were reading a pipe (currently
291 * not implemented), we may now lose
294 kmem_free(ptr
, size
);
295 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
,
297 releasef(sfv
->sfv_fd
);
305 auio
.uio_loffset
= *fileoff
;
306 auio
.uio_iov
= &aiov
;
308 auio
.uio_resid
= cnt
;
309 auio
.uio_segflg
= UIO_SYSSPACE
;
310 auio
.uio_llimit
= curproc
->p_fsz_ctl
;
311 auio
.uio_fmode
= fflag
;
312 ioflag
= auio
.uio_fmode
&
313 (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
314 error
= VOP_WRITE(vp
, &auio
, ioflag
,
318 * Check how much data was written. Increment
319 * the 'len' and decrement the 'off' if all
320 * the data was not written.
322 cnt
-= auio
.uio_resid
;
323 sfv_len
+= auio
.uio_resid
;
324 sfv_off
-= auio
.uio_resid
;
325 ttolwp(curthread
)->lwp_ru
.ioch
+= (ulong_t
)cnt
;
326 if (vp
->v_type
== VREG
)
330 kmem_free(ptr
, size
);
331 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
,
333 releasef(sfv
->sfv_fd
);
337 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
, NULL
);
338 releasef(sfv
->sfv_fd
);
339 kmem_free(ptr
, size
);
347 sendvec64(file_t
*fp
, const struct ksendfilevec64
*vec
, int sfvcnt
,
348 size32_t
*xferred
, int fildes
)
352 const struct ksendfilevec64
*copy_vec
;
353 struct ksendfilevec64 sfv
[SEND_MAX_CHUNK
];
359 (void) VOP_RWLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
362 fileoff
= fp
->f_offset
;
365 copy_cnt
= MIN(sfvcnt
, SEND_MAX_CHUNK
);
366 if (copyin(copy_vec
, sfv
, copy_cnt
*
367 sizeof (struct ksendfilevec64
))) {
372 error
= sendvec_chunk64(fp
, &fileoff
, sfv
, copy_cnt
, &count
);
376 copy_vec
+= copy_cnt
;
378 } while (sfvcnt
> 0);
380 if (vp
->v_type
== VREG
)
381 fp
->f_offset
+= count
;
383 VOP_RWUNLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
384 if (copyout(&count
, xferred
, sizeof (count
)))
388 return (set_errno(error
));
394 sendvec_small_chunk(file_t
*fp
, u_offset_t
*fileoff
, struct sendfilevec
*sfv
,
395 int copy_cnt
, ssize_t total_size
, int maxblk
, ssize_t
*count
)
406 #ifdef _SYSCALL32_IMPL
407 model_t model
= get_udatamodel();
408 u_offset_t maxoff
= (model
== DATAMODEL_ILP32
) ?
409 MAXOFF32_T
: MAXOFFSET_T
;
411 const u_offset_t maxoff
= MAXOFF32_T
;
418 size_t size
= total_size
;
426 ASSERT(vp
->v_type
== VSOCK
);
429 /* If nothing to send, return */
433 if (vp
->v_stream
!= NULL
) {
434 wroff
= (int)vp
->v_stream
->sd_wroff
;
435 tail_len
= (int)vp
->v_stream
->sd_tail
;
440 wroff
= so
->so_proto_props
.sopp_wroff
;
441 tail_len
= so
->so_proto_props
.sopp_tail
;
444 extra
= wroff
+ tail_len
;
446 buf_left
= MIN(total_size
, maxblk
);
447 head
= dmp
= allocb(buf_left
+ extra
, BPRI_HI
);
450 head
->b_wptr
= head
->b_rptr
= head
->b_rptr
+ wroff
;
451 bzero(&msg
, sizeof (msg
));
453 auio
.uio_extflg
= UIO_COPY_DEFAULT
;
454 for (i
= 0; i
< copy_cnt
; i
++) {
455 if (ISSIG(curthread
, JUSTLOOKING
)) {
461 * Do similar checks as "write" as we are writing
462 * sfv_len bytes into "vp".
464 sfv_len
= (ssize_t
)sfv
->sfv_len
;
471 /* Check for overflow */
472 #ifdef _SYSCALL32_IMPL
473 if (model
== DATAMODEL_ILP32
) {
474 if (((ssize32_t
)(*count
+ sfv_len
)) < 0) {
480 if ((*count
+ sfv_len
) < 0) {
485 sfv_off
= (u_offset_t
)(ulong_t
)sfv
->sfv_off
;
487 if (sfv
->sfv_fd
== SFV_FD_SELF
) {
488 while (sfv_len
> 0) {
491 buf_left
= MIN(total_size
, maxblk
);
492 iov_len
= MIN(buf_left
, sfv_len
);
493 dmp
= allocb(buf_left
+ extra
, BPRI_HI
);
498 dmp
->b_wptr
= dmp
->b_rptr
=
502 iov_len
= MIN(buf_left
, sfv_len
);
505 aiov
.iov_len
= iov_len
;
506 aiov
.iov_base
= (caddr_t
)(uintptr_t)sfv_off
;
507 auio
.uio_loffset
= *fileoff
;
509 auio
.uio_resid
= iov_len
;
510 auio
.uio_iov
= &aiov
;
511 auio
.uio_segflg
= UIO_USERSPACE
;
512 auio
.uio_llimit
= curproc
->p_fsz_ctl
;
513 auio
.uio_fmode
= fflag
;
516 total_size
-= iov_len
;
520 error
= uiomove((caddr_t
)dmp
->b_wptr
,
521 iov_len
, UIO_WRITE
, &auio
);
526 dmp
->b_wptr
+= iov_len
;
532 if ((ffp
= getf(sfv
->sfv_fd
)) == NULL
) {
537 if ((ffp
->f_flag
& FREAD
) == 0) {
538 releasef(sfv
->sfv_fd
);
543 readvp
= ffp
->f_vnode
;
544 if (readvp
->v_type
!= VREG
) {
545 releasef(sfv
->sfv_fd
);
551 * No point reading and writing to same vp,
552 * as long as both are regular files. readvp is not
553 * locked; but since we got it from an open file the
554 * contents will be valid during the time of access.
557 if (vn_compare(vp
, readvp
)) {
558 releasef(sfv
->sfv_fd
);
564 * Note: we assume readvp != vp. "vp" is already
565 * locked, and "readvp" must not be.
569 VOP_RWUNLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
570 (void) VOP_RWLOCK(readvp
, V_WRITELOCK_FALSE
,
572 (void) VOP_RWLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
574 (void) VOP_RWLOCK(readvp
, V_WRITELOCK_FALSE
,
578 /* Same checks as in pread */
579 if (sfv_off
> maxoff
) {
580 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
, NULL
);
581 releasef(sfv
->sfv_fd
);
585 if (sfv_off
+ sfv_len
> maxoff
) {
586 total_size
-= (sfv_off
+ sfv_len
- maxoff
);
587 sfv_len
= (ssize_t
)((offset_t
)maxoff
-
591 while (sfv_len
> 0) {
594 buf_left
= MIN(total_size
, maxblk
);
595 iov_len
= MIN(buf_left
, sfv_len
);
596 dmp
= allocb(buf_left
+ extra
, BPRI_HI
);
599 V_WRITELOCK_FALSE
, NULL
);
600 releasef(sfv
->sfv_fd
);
604 dmp
->b_wptr
= dmp
->b_rptr
=
608 iov_len
= MIN(buf_left
, sfv_len
);
610 aiov
.iov_base
= (caddr_t
)dmp
->b_wptr
;
611 aiov
.iov_len
= iov_len
;
612 auio
.uio_loffset
= sfv_off
;
613 auio
.uio_iov
= &aiov
;
615 auio
.uio_resid
= iov_len
;
616 auio
.uio_segflg
= UIO_SYSSPACE
;
617 auio
.uio_llimit
= MAXOFFSET_T
;
618 auio
.uio_fmode
= ffp
->f_flag
;
619 ioflag
= auio
.uio_fmode
&
620 (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
623 * If read sync is not asked for,
626 if ((ioflag
& FRSYNC
) == 0)
627 ioflag
&= ~(FSYNC
|FDSYNC
);
628 error
= VOP_READ(readvp
, &auio
, ioflag
,
632 * If we were reading a pipe (currently
633 * not implemented), we may now loose
636 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
,
638 releasef(sfv
->sfv_fd
);
644 * Check how much data was really read.
645 * Decrement the 'len' and increment the
646 * 'off' appropriately.
648 cnt
= iov_len
- auio
.uio_resid
;
650 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
,
652 releasef(sfv
->sfv_fd
);
663 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
, NULL
);
664 releasef(sfv
->sfv_fd
);
669 ASSERT(total_size
== 0);
670 error
= socket_sendmblk(VTOSO(vp
), &msg
, fflag
, CRED(), &head
);
676 ttolwp(curthread
)->lwp_ru
.ioch
+= (ulong_t
)size
;
684 sendvec_chunk(file_t
*fp
, u_offset_t
*fileoff
, struct sendfilevec
*sfv
,
685 int copy_cnt
, ssize_t
*count
)
696 #ifdef _SYSCALL32_IMPL
697 model_t model
= get_udatamodel();
698 u_offset_t maxoff
= (model
== DATAMODEL_ILP32
) ?
699 MAXOFF32_T
: MAXOFFSET_T
;
701 const u_offset_t maxoff
= MAXOFF32_T
;
706 int maxblk
, wroff
, tail_len
;
714 if (vp
->v_type
== VSOCK
) {
716 if (vp
->v_stream
!= NULL
) {
718 wroff
= (int)stp
->sd_wroff
;
719 tail_len
= (int)stp
->sd_tail
;
720 maxblk
= (int)stp
->sd_maxblk
;
723 wroff
= so
->so_proto_props
.sopp_wroff
;
724 tail_len
= so
->so_proto_props
.sopp_tail
;
725 maxblk
= so
->so_proto_props
.sopp_maxblk
;
727 extra
= wroff
+ tail_len
;
730 bzero(&msg
, sizeof (msg
));
731 auio
.uio_extflg
= UIO_COPY_DEFAULT
;
732 for (i
= 0; i
< copy_cnt
; i
++) {
733 if (ISSIG(curthread
, JUSTLOOKING
))
737 * Do similar checks as "write" as we are writing
738 * sfv_len bytes into "vp".
740 sfv_len
= (ssize_t
)sfv
->sfv_len
;
747 if (vp
->v_type
== VREG
) {
748 if (*fileoff
>= curproc
->p_fsz_ctl
) {
749 mutex_enter(&curproc
->p_lock
);
751 rctlproc_legacy
[RLIMIT_FSIZE
],
752 curproc
->p_rctls
, curproc
, RCA_SAFE
);
753 mutex_exit(&curproc
->p_lock
);
758 if (*fileoff
>= maxoff
)
761 if (*fileoff
+ sfv_len
> maxoff
)
765 /* Check for overflow */
766 #ifdef _SYSCALL32_IMPL
767 if (model
== DATAMODEL_ILP32
) {
768 if (((ssize32_t
)(*count
+ sfv_len
)) < 0)
772 if ((*count
+ sfv_len
) < 0)
775 sfv_off
= (u_offset_t
)(ulong_t
)sfv
->sfv_off
;
777 if (sfv
->sfv_fd
== SFV_FD_SELF
) {
778 if (vp
->v_type
== VSOCK
) {
779 while (sfv_len
> 0) {
784 * Socket filters can limit the mblk
785 * size, so limit reads to maxblk if
786 * there are filters present.
788 if (so
->so_filter_active
> 0 &&
790 iov_len
= MIN(iov_len
, maxblk
);
792 aiov
.iov_len
= iov_len
;
794 (caddr_t
)(uintptr_t)sfv_off
;
796 auio
.uio_iov
= &aiov
;
798 auio
.uio_loffset
= *fileoff
;
799 auio
.uio_segflg
= UIO_USERSPACE
;
800 auio
.uio_fmode
= fflag
;
801 auio
.uio_llimit
= curproc
->p_fsz_ctl
;
802 auio
.uio_resid
= iov_len
;
804 dmp
= allocb(iov_len
+ extra
, BPRI_HI
);
807 dmp
->b_wptr
= dmp
->b_rptr
=
809 error
= uiomove((caddr_t
)dmp
->b_wptr
,
810 iov_len
, UIO_WRITE
, &auio
);
815 dmp
->b_wptr
+= iov_len
;
816 error
= socket_sendmblk(VTOSO(vp
),
817 &msg
, fflag
, CRED(), &dmp
);
824 ttolwp(curthread
)->lwp_ru
.ioch
+=
831 aiov
.iov_len
= sfv_len
;
832 aiov
.iov_base
= (caddr_t
)(uintptr_t)sfv_off
;
834 auio
.uio_iov
= &aiov
;
836 auio
.uio_loffset
= *fileoff
;
837 auio
.uio_segflg
= UIO_USERSPACE
;
838 auio
.uio_fmode
= fflag
;
839 auio
.uio_llimit
= curproc
->p_fsz_ctl
;
840 auio
.uio_resid
= sfv_len
;
842 ioflag
= auio
.uio_fmode
&
843 (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
844 while (sfv_len
> 0) {
845 error
= VOP_WRITE(vp
, &auio
, ioflag
,
847 cnt
= sfv_len
- auio
.uio_resid
;
849 ttolwp(curthread
)->lwp_ru
.ioch
+=
861 struct vnode
*realvp
;
865 if ((ffp
= getf(sfv
->sfv_fd
)) == NULL
)
868 if ((ffp
->f_flag
& FREAD
) == 0) {
869 releasef(sfv
->sfv_fd
);
873 readvp
= ffp
->f_vnode
;
874 if (VOP_REALVP(readvp
, &realvp
, NULL
) == 0)
876 if (readvp
->v_type
!= VREG
) {
877 releasef(sfv
->sfv_fd
);
882 * No point reading and writing to same vp,
883 * as long as both are regular files. readvp is not
884 * locked; but since we got it from an open file the
885 * contents will be valid during the time of access.
887 if (vn_compare(vp
, readvp
)) {
888 releasef(sfv
->sfv_fd
);
893 * Note: we assume readvp != vp. "vp" is already
894 * locked, and "readvp" must not be.
897 VOP_RWUNLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
898 (void) VOP_RWLOCK(readvp
, V_WRITELOCK_FALSE
,
900 (void) VOP_RWLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
902 (void) VOP_RWLOCK(readvp
, V_WRITELOCK_FALSE
,
906 /* Same checks as in pread */
907 if (sfv_off
> maxoff
) {
908 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
, NULL
);
909 releasef(sfv
->sfv_fd
);
912 if (sfv_off
+ sfv_len
> maxoff
) {
913 sfv_len
= (ssize_t
)((offset_t
)maxoff
-
916 /* Find the native blocksize to transfer data */
917 size
= MIN(vp
->v_vfsp
->vfs_bsize
,
918 readvp
->v_vfsp
->vfs_bsize
);
919 size
= sfv_len
< size
? sfv_len
: size
;
921 if (vp
->v_type
!= VSOCK
) {
923 buf
= kmem_alloc(size
, KM_NOSLEEP
);
925 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
,
927 releasef(sfv
->sfv_fd
);
933 copyflag
= stp
!= NULL
? stp
->sd_copyflag
:
934 so
->so_proto_props
.sopp_zcopyflag
;
937 * Socket filters can limit the mblk size,
938 * so limit reads to maxblk if there are
941 if (so
->so_filter_active
> 0 &&
943 size
= MIN(size
, maxblk
);
945 if (vn_has_flocks(readvp
) ||
946 readvp
->v_flag
& VNOMAP
||
947 copyflag
& STZCVMUNSAFE
) {
949 } else if (copyflag
& STZCVMSAFE
) {
953 if (socket_setsockopt(VTOSO(vp
),
954 SOL_SOCKET
, SO_SND_COPYAVOID
,
955 &on
, sizeof (on
), CRED()) == 0)
963 nowait
= (sfv
->sfv_flag
& SFV_NOWAIT
) != 0;
964 error
= snf_segmap(fp
, readvp
, sfv_off
,
965 (u_offset_t
)sfv_len
, (ssize_t
*)&cnt
,
967 releasef(sfv
->sfv_fd
);
975 while (sfv_len
> 0) {
978 iov_len
= MIN(size
, sfv_len
);
980 if (vp
->v_type
== VSOCK
) {
981 dmp
= allocb(iov_len
+ extra
, BPRI_HI
);
984 V_WRITELOCK_FALSE
, NULL
);
985 releasef(sfv
->sfv_fd
);
988 dmp
->b_wptr
= dmp
->b_rptr
=
990 ptr
= (caddr_t
)dmp
->b_rptr
;
996 aiov
.iov_len
= iov_len
;
997 auio
.uio_loffset
= sfv_off
;
998 auio
.uio_iov
= &aiov
;
1000 auio
.uio_resid
= iov_len
;
1001 auio
.uio_segflg
= UIO_SYSSPACE
;
1002 auio
.uio_llimit
= MAXOFFSET_T
;
1003 auio
.uio_fmode
= ffp
->f_flag
;
1004 ioflag
= auio
.uio_fmode
&
1005 (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
1008 * If read sync is not asked for,
1011 if ((ioflag
& FRSYNC
) == 0)
1012 ioflag
&= ~(FSYNC
|FDSYNC
);
1013 error
= VOP_READ(readvp
, &auio
, ioflag
,
1017 * If we were reading a pipe (currently
1018 * not implemented), we may now lose
1021 if (vp
->v_type
== VSOCK
)
1024 kmem_free(buf
, size
);
1025 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
,
1027 releasef(sfv
->sfv_fd
);
1032 * Check how much data was really read.
1033 * Decrement the 'len' and increment the
1034 * 'off' appropriately.
1036 cnt
= iov_len
- auio
.uio_resid
;
1038 if (vp
->v_type
== VSOCK
)
1041 kmem_free(buf
, size
);
1042 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
,
1044 releasef(sfv
->sfv_fd
);
1050 if (vp
->v_type
== VSOCK
) {
1051 dmp
->b_wptr
= dmp
->b_rptr
+ cnt
;
1053 error
= socket_sendmblk(VTOSO(vp
),
1054 &msg
, fflag
, CRED(), &dmp
);
1059 VOP_RWUNLOCK(readvp
,
1060 V_WRITELOCK_FALSE
, NULL
);
1061 releasef(sfv
->sfv_fd
);
1065 ttolwp(curthread
)->lwp_ru
.ioch
+=
1070 aiov
.iov_base
= ptr
;
1072 auio
.uio_loffset
= *fileoff
;
1073 auio
.uio_resid
= cnt
;
1074 auio
.uio_iov
= &aiov
;
1075 auio
.uio_iovcnt
= 1;
1076 auio
.uio_segflg
= UIO_SYSSPACE
;
1077 auio
.uio_llimit
= curproc
->p_fsz_ctl
;
1078 auio
.uio_fmode
= fflag
;
1079 ioflag
= auio
.uio_fmode
&
1080 (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
1081 error
= VOP_WRITE(vp
, &auio
, ioflag
,
1085 * Check how much data was written.
1086 * Increment the 'len' and decrement the
1087 * 'off' if all the data was not
1090 cnt
-= auio
.uio_resid
;
1091 sfv_len
+= auio
.uio_resid
;
1092 sfv_off
-= auio
.uio_resid
;
1093 ttolwp(curthread
)->lwp_ru
.ioch
+=
1098 kmem_free(buf
, size
);
1099 VOP_RWUNLOCK(readvp
,
1100 V_WRITELOCK_FALSE
, NULL
);
1101 releasef(sfv
->sfv_fd
);
1107 kmem_free(buf
, size
);
1110 VOP_RWUNLOCK(readvp
, V_WRITELOCK_FALSE
, NULL
);
1111 releasef(sfv
->sfv_fd
);
1119 sendfilev(int opcode
, int fildes
, const struct sendfilevec
*vec
, int sfvcnt
,
1123 int first_vector_error
= 0;
1129 const struct sendfilevec
*copy_vec
;
1130 struct sendfilevec sfv
[SEND_MAX_CHUNK
];
1132 #ifdef _SYSCALL32_IMPL
1133 struct ksendfilevec32 sfv32
[SEND_MAX_CHUNK
];
1137 boolean_t is_sock
= B_FALSE
;
1141 return (set_errno(EINVAL
));
1143 if ((fp
= getf(fildes
)) == NULL
)
1144 return (set_errno(EBADF
));
1146 if (((fp
->f_flag
) & FWRITE
) == 0) {
1151 fileoff
= fp
->f_offset
;
1154 switch (vp
->v_type
) {
1158 if (SOCK_IS_NONSTR(so
)) {
1159 maxblk
= so
->so_proto_props
.sopp_maxblk
;
1161 maxblk
= (int)vp
->v_stream
->sd_maxblk
;
1174 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1176 return (sendvec64(fp
, (struct ksendfilevec64
*)vec
, sfvcnt
,
1177 (size32_t
*)xferred
, fildes
));
1184 (void) VOP_RWLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
1189 copy_cnt
= MIN(sfvcnt
, SEND_MAX_CHUNK
);
1190 #ifdef _SYSCALL32_IMPL
1191 /* 32-bit callers need to have their iovec expanded. */
1192 if (get_udatamodel() == DATAMODEL_ILP32
) {
1193 if (copyin(copy_vec
, sfv32
,
1194 copy_cnt
* sizeof (ksendfilevec32_t
))) {
1199 for (i
= 0; i
< copy_cnt
; i
++) {
1200 sfv
[i
].sfv_fd
= sfv32
[i
].sfv_fd
;
1202 (off_t
)(uint32_t)sfv32
[i
].sfv_off
;
1203 sfv
[i
].sfv_len
= (size_t)sfv32
[i
].sfv_len
;
1204 total_size
+= sfv
[i
].sfv_len
;
1205 sfv
[i
].sfv_flag
= sfv32
[i
].sfv_flag
;
1207 * Individual elements of the vector must not
1208 * wrap or overflow, as later math is signed.
1209 * Equally total_size needs to be checked after
1210 * each vector is added in, to be sure that
1211 * rogue values haven't overflowed the counter.
1213 if (((ssize32_t
)sfv
[i
].sfv_len
< 0) ||
1214 ((ssize32_t
)total_size
< 0)) {
1216 * Truncate the vector to send data
1217 * described by elements before the
1221 first_vector_error
= EINVAL
;
1222 /* total_size can't be trusted */
1223 if ((ssize32_t
)total_size
< 0)
1228 /* Nothing to do, process errors */
1234 if (copyin(copy_vec
, sfv
,
1235 copy_cnt
* sizeof (sendfilevec_t
))) {
1240 for (i
= 0; i
< copy_cnt
; i
++) {
1241 total_size
+= sfv
[i
].sfv_len
;
1243 * Individual elements of the vector must not
1244 * wrap or overflow, as later math is signed.
1245 * Equally total_size needs to be checked after
1246 * each vector is added in, to be sure that
1247 * rogue values haven't overflowed the counter.
1249 if (((ssize_t
)sfv
[i
].sfv_len
< 0) ||
1252 * Truncate the vector to send data
1253 * described by elements before the
1257 first_vector_error
= EINVAL
;
1258 /* total_size can't be trusted */
1264 /* Nothing to do, process errors */
1267 #ifdef _SYSCALL32_IMPL
1272 * The task between deciding to use sendvec_small_chunk
1273 * and sendvec_chunk is dependant on multiple things:
1275 * i) latency is important for smaller files. So if the
1276 * data is smaller than 'tcp_slow_start_initial' times
1277 * maxblk, then use sendvec_small_chunk which creates
1278 * maxblk size mblks and chains them together and sends
1279 * them to TCP in one shot. It also leaves 'wroff' size
1280 * space for the headers in each mblk.
1282 * ii) for total size bigger than 'tcp_slow_start_initial'
1283 * time maxblk, its probably real file data which is
1284 * dominating. So its better to use sendvec_chunk because
1285 * performance goes to dog if we don't do pagesize reads.
1286 * sendvec_chunk will do pagesize reads and write them
1287 * in pagesize mblks to TCP.
1289 * Side Notes: A write to file has not been optimized.
1290 * Future zero copy code will plugin into sendvec_chunk
1291 * only because doing zero copy for files smaller then
1292 * pagesize is useless.
1294 * Note, if socket has NL7C enabled then call NL7C's
1295 * senfilev() function to consume the sfv[].
1298 if (!SOCK_IS_NONSTR(so
) &&
1299 _SOTOTPI(so
)->sti_nl7c_flags
!= 0) {
1300 error
= nl7c_sendfilev(so
, &fileoff
,
1301 sfv
, copy_cnt
, &count
);
1302 } else if ((total_size
<= (4 * maxblk
)) &&
1304 error
= sendvec_small_chunk(fp
,
1305 &fileoff
, sfv
, copy_cnt
,
1306 total_size
, maxblk
, &count
);
1308 error
= sendvec_chunk(fp
, &fileoff
,
1309 sfv
, copy_cnt
, &count
);
1312 ASSERT(vp
->v_type
== VREG
);
1313 error
= sendvec_chunk(fp
, &fileoff
, sfv
, copy_cnt
,
1318 #ifdef _SYSCALL32_IMPL
1319 if (get_udatamodel() == DATAMODEL_ILP32
) {
1320 copy_vec
= (const struct sendfilevec
*)
1322 (copy_cnt
* sizeof (ksendfilevec32_t
)));
1325 copy_vec
+= copy_cnt
;
1328 /* Process all vector members up to first error */
1329 } while ((sfvcnt
> 0) && first_vector_error
== 0 && error
== 0);
1331 if (vp
->v_type
== VREG
)
1332 fp
->f_offset
+= count
;
1334 VOP_RWUNLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
1336 #ifdef _SYSCALL32_IMPL
1337 if (get_udatamodel() == DATAMODEL_ILP32
) {
1338 ssize32_t count32
= (ssize32_t
)count
;
1339 if (copyout(&count32
, xferred
, sizeof (count32
)))
1343 return (set_errno(error
));
1344 if (first_vector_error
!= 0)
1345 return (set_errno(first_vector_error
));
1349 if (copyout(&count
, xferred
, sizeof (count
)))
1353 return (set_errno(error
));
1354 if (first_vector_error
!= 0)
1355 return (set_errno(first_vector_error
));
1360 return (set_errno(error
));