Merge commit '4ec4134be29a3b00791f6d70074168a6a3ff4fb3'
[unleashed.git] / kernel / syscall / rw.c
blob05cc6d949bbedaee1bcc56e12ab843adc148160c
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2015, Joyent, Inc. All rights reserved.
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
32 * Portions of this source code were derived from Berkeley 4.3 BSD
33 * under license from the Regents of the University of California.
36 #include <sys/param.h>
37 #include <sys/isa_defs.h>
38 #include <sys/types.h>
39 #include <sys/inttypes.h>
40 #include <sys/sysmacros.h>
41 #include <sys/cred.h>
42 #include <sys/user.h>
43 #include <sys/systm.h>
44 #include <sys/errno.h>
45 #include <sys/vnode.h>
46 #include <sys/file.h>
47 #include <sys/proc.h>
48 #include <sys/cpuvar.h>
49 #include <sys/uio.h>
50 #include <sys/debug.h>
51 #include <sys/rctl.h>
52 #include <sys/nbmlock.h>
53 #include <sys/limits.h>
55 #define COPYOUT_MAX_CACHE (1<<17) /* 128K */
57 size_t copyout_max_cached = COPYOUT_MAX_CACHE; /* global so it's patchable */
60 * read, write, pread, pwrite, readv, and writev syscalls.
62 * 64-bit open: all open's are large file opens.
63 * Large Files: the behaviour of read depends on whether the fd
64 * corresponds to large open or not.
65 * 32-bit open: FOFFMAX flag not set.
66 * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns
67 * EOVERFLOW if count is non-zero and if size of file
68 * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read
69 * at >= MAXOFF32_T returns EOF.
73 * Native system call
75 ssize_t
76 read(int fdes, void *cbuf, size_t count)
78 struct uio auio;
79 struct iovec aiov;
80 file_t *fp;
81 register vnode_t *vp;
82 struct cpu *cp;
83 int fflag, ioflag, rwflag;
84 ssize_t cnt, bcount;
85 int error = 0;
86 uoff_t fileoff;
87 int in_crit = 0;
89 if ((cnt = (ssize_t)count) < 0)
90 return (set_errno(EINVAL));
91 if ((fp = getf(fdes)) == NULL)
92 return (set_errno(EBADF));
93 if (((fflag = fp->f_flag) & FREAD) == 0) {
94 error = EBADF;
95 goto out;
97 vp = fp->f_vnode;
99 if (vp->v_type == VREG && cnt == 0) {
100 goto out;
103 rwflag = 0;
104 aiov.iov_base = cbuf;
105 aiov.iov_len = cnt;
108 * We have to enter the critical region before calling fop_rwlock
109 * to avoid a deadlock with write() calls.
111 if (nbl_need_check(vp)) {
112 int svmand;
114 nbl_start_crit(vp, RW_READER);
115 in_crit = 1;
116 error = nbl_svmand(vp, fp->f_cred, &svmand);
117 if (error != 0)
118 goto out;
119 if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand,
120 NULL)) {
121 error = EACCES;
122 goto out;
126 (void) fop_rwlock(vp, rwflag, NULL);
129 * We do the following checks inside fop_rwlock so as to
130 * prevent file size from changing while these checks are
131 * being done. Also, we load fp's offset to the local
132 * variable fileoff because we can have a parallel lseek
133 * going on (f_offset is not protected by any lock) which
134 * could change f_offset. We need to see the value only
135 * once here and take a decision. Seeing it more than once
136 * can lead to incorrect functionality.
139 fileoff = (uoff_t)fp->f_offset;
140 if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) {
141 struct vattr va;
142 va.va_mask = AT_SIZE;
143 if ((error = fop_getattr(vp, &va, 0, fp->f_cred, NULL))) {
144 fop_rwunlock(vp, rwflag, NULL);
145 goto out;
147 if (fileoff >= va.va_size) {
148 cnt = 0;
149 fop_rwunlock(vp, rwflag, NULL);
150 goto out;
151 } else {
152 error = EOVERFLOW;
153 fop_rwunlock(vp, rwflag, NULL);
154 goto out;
157 if ((vp->v_type == VREG) &&
158 (fileoff + cnt > OFFSET_MAX(fp))) {
159 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
161 auio.uio_loffset = fileoff;
162 auio.uio_iov = &aiov;
163 auio.uio_iovcnt = 1;
164 auio.uio_resid = bcount = cnt;
165 auio.uio_segflg = UIO_USERSPACE;
166 auio.uio_llimit = MAXOFFSET_T;
167 auio.uio_fmode = fflag;
169 * Only use bypass caches when the count is large enough
171 if (bcount <= copyout_max_cached)
172 auio.uio_extflg = UIO_COPY_CACHED;
173 else
174 auio.uio_extflg = UIO_COPY_DEFAULT;
176 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
178 /* If read sync is not asked for, filter sync flags */
179 if ((ioflag & FRSYNC) == 0)
180 ioflag &= ~(FSYNC|FDSYNC);
181 error = fop_read(vp, &auio, ioflag, fp->f_cred, NULL);
182 cnt -= auio.uio_resid;
183 CPU_STATS_ENTER_K();
184 cp = CPU;
185 CPU_STATS_ADDQ(cp, sys, sysread, 1);
186 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt);
187 CPU_STATS_EXIT_K();
188 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
190 if (vp->v_type == VFIFO) /* Backward compatibility */
191 fp->f_offset = cnt;
192 else if (((fp->f_flag & FAPPEND) == 0) ||
193 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
194 fp->f_offset = auio.uio_loffset;
195 fop_rwunlock(vp, rwflag, NULL);
197 if (error == EINTR && cnt != 0)
198 error = 0;
199 out:
200 if (in_crit)
201 nbl_end_crit(vp);
202 releasef(fdes);
203 if (error)
204 return (set_errno(error));
205 return (cnt);
209 * Native system call
211 ssize_t
212 write(int fdes, void *cbuf, size_t count)
214 struct uio auio;
215 struct iovec aiov;
216 file_t *fp;
217 register vnode_t *vp;
218 struct cpu *cp;
219 int fflag, ioflag, rwflag;
220 ssize_t cnt, bcount;
221 int error = 0;
222 uoff_t fileoff;
223 int in_crit = 0;
225 if ((cnt = (ssize_t)count) < 0)
226 return (set_errno(EINVAL));
227 if ((fp = getf(fdes)) == NULL)
228 return (set_errno(EBADF));
229 if (((fflag = fp->f_flag) & FWRITE) == 0) {
230 error = EBADF;
231 goto out;
233 vp = fp->f_vnode;
235 if (vp->v_type == VREG && cnt == 0) {
236 goto out;
239 rwflag = 1;
240 aiov.iov_base = cbuf;
241 aiov.iov_len = cnt;
244 * We have to enter the critical region before calling fop_rwlock
245 * to avoid a deadlock with ufs.
247 if (nbl_need_check(vp)) {
248 int svmand;
250 nbl_start_crit(vp, RW_READER);
251 in_crit = 1;
252 error = nbl_svmand(vp, fp->f_cred, &svmand);
253 if (error != 0)
254 goto out;
255 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand,
256 NULL)) {
257 error = EACCES;
258 goto out;
262 (void) fop_rwlock(vp, rwflag, NULL);
264 fileoff = fp->f_offset;
265 if (vp->v_type == VREG) {
268 * We raise psignal if write for >0 bytes causes
269 * it to exceed the ulimit.
271 if (fileoff >= curproc->p_fsz_ctl) {
272 fop_rwunlock(vp, rwflag, NULL);
274 mutex_enter(&curproc->p_lock);
275 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
276 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
277 mutex_exit(&curproc->p_lock);
279 error = EFBIG;
280 goto out;
283 * We return EFBIG if write is done at an offset
284 * greater than the offset maximum for this file structure.
287 if (fileoff >= OFFSET_MAX(fp)) {
288 fop_rwunlock(vp, rwflag, NULL);
289 error = EFBIG;
290 goto out;
293 * Limit the bytes to be written upto offset maximum for
294 * this open file structure.
296 if (fileoff + cnt > OFFSET_MAX(fp))
297 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
299 auio.uio_loffset = fileoff;
300 auio.uio_iov = &aiov;
301 auio.uio_iovcnt = 1;
302 auio.uio_resid = bcount = cnt;
303 auio.uio_segflg = UIO_USERSPACE;
304 auio.uio_llimit = curproc->p_fsz_ctl;
305 auio.uio_fmode = fflag;
306 auio.uio_extflg = UIO_COPY_DEFAULT;
308 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
310 error = fop_write(vp, &auio, ioflag, fp->f_cred, NULL);
311 cnt -= auio.uio_resid;
312 CPU_STATS_ENTER_K();
313 cp = CPU;
314 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
315 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt);
316 CPU_STATS_EXIT_K();
317 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
319 if (vp->v_type == VFIFO) /* Backward compatibility */
320 fp->f_offset = cnt;
321 else if (((fp->f_flag & FAPPEND) == 0) ||
322 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
323 fp->f_offset = auio.uio_loffset;
324 fop_rwunlock(vp, rwflag, NULL);
326 if (error == EINTR && cnt != 0)
327 error = 0;
328 out:
329 if (in_crit)
330 nbl_end_crit(vp);
331 releasef(fdes);
332 if (error)
333 return (set_errno(error));
334 return (cnt);
337 ssize_t
338 pread(int fdes, void *cbuf, size_t count, off_t offset)
340 struct uio auio;
341 struct iovec aiov;
342 file_t *fp;
343 register vnode_t *vp;
344 struct cpu *cp;
345 int fflag, ioflag, rwflag;
346 ssize_t bcount;
347 int error = 0;
348 uoff_t fileoff = (uoff_t)(ulong_t)offset;
349 #ifdef _SYSCALL32_IMPL
350 uoff_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
351 MAXOFF32_T : MAXOFFSET_T;
352 #else
353 const uoff_t maxoff = MAXOFF32_T;
354 #endif
355 int in_crit = 0;
357 if ((bcount = (ssize_t)count) < 0)
358 return (set_errno(EINVAL));
360 if ((fp = getf(fdes)) == NULL)
361 return (set_errno(EBADF));
362 if (((fflag = fp->f_flag) & (FREAD)) == 0) {
363 error = EBADF;
364 goto out;
367 rwflag = 0;
368 vp = fp->f_vnode;
370 if (vp->v_type == VREG) {
372 if (bcount == 0)
373 goto out;
376 * Return EINVAL if an invalid offset comes to pread.
377 * Negative offset from user will cause this error.
380 if (fileoff > maxoff) {
381 error = EINVAL;
382 goto out;
385 * Limit offset such that we don't read or write
386 * a file beyond the maximum offset representable in
387 * an off_t structure.
389 if (fileoff + bcount > maxoff)
390 bcount = (ssize_t)((offset_t)maxoff - fileoff);
391 } else if (vp->v_type == VFIFO) {
392 error = ESPIPE;
393 goto out;
397 * We have to enter the critical region before calling fop_rwlock
398 * to avoid a deadlock with ufs.
400 if (nbl_need_check(vp)) {
401 int svmand;
403 nbl_start_crit(vp, RW_READER);
404 in_crit = 1;
405 error = nbl_svmand(vp, fp->f_cred, &svmand);
406 if (error != 0)
407 goto out;
408 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
409 NULL)) {
410 error = EACCES;
411 goto out;
415 aiov.iov_base = cbuf;
416 aiov.iov_len = bcount;
417 (void) fop_rwlock(vp, rwflag, NULL);
418 if (vp->v_type == VREG && fileoff == (uoff_t)maxoff) {
419 struct vattr va;
420 va.va_mask = AT_SIZE;
421 if ((error = fop_getattr(vp, &va, 0, fp->f_cred, NULL))) {
422 fop_rwunlock(vp, rwflag, NULL);
423 goto out;
425 fop_rwunlock(vp, rwflag, NULL);
428 * We have to return EOF if fileoff is >= file size.
430 if (fileoff >= va.va_size) {
431 bcount = 0;
432 goto out;
436 * File is greater than or equal to maxoff and therefore
437 * we return EOVERFLOW.
439 error = EOVERFLOW;
440 goto out;
442 auio.uio_loffset = fileoff;
443 auio.uio_iov = &aiov;
444 auio.uio_iovcnt = 1;
445 auio.uio_resid = bcount;
446 auio.uio_segflg = UIO_USERSPACE;
447 auio.uio_llimit = MAXOFFSET_T;
448 auio.uio_fmode = fflag;
449 auio.uio_extflg = UIO_COPY_CACHED;
451 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
453 /* If read sync is not asked for, filter sync flags */
454 if ((ioflag & FRSYNC) == 0)
455 ioflag &= ~(FSYNC|FDSYNC);
456 error = fop_read(vp, &auio, ioflag, fp->f_cred, NULL);
457 bcount -= auio.uio_resid;
458 CPU_STATS_ENTER_K();
459 cp = CPU;
460 CPU_STATS_ADDQ(cp, sys, sysread, 1);
461 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
462 CPU_STATS_EXIT_K();
463 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
464 fop_rwunlock(vp, rwflag, NULL);
466 if (error == EINTR && bcount != 0)
467 error = 0;
468 out:
469 if (in_crit)
470 nbl_end_crit(vp);
471 releasef(fdes);
472 if (error)
473 return (set_errno(error));
474 return (bcount);
477 ssize_t
478 pwrite(int fdes, void *cbuf, size_t count, off_t offset)
480 struct uio auio;
481 struct iovec aiov;
482 file_t *fp;
483 register vnode_t *vp;
484 struct cpu *cp;
485 int fflag, ioflag, rwflag;
486 ssize_t bcount;
487 int error = 0;
488 uoff_t fileoff = (uoff_t)(ulong_t)offset;
489 #ifdef _SYSCALL32_IMPL
490 uoff_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
491 MAXOFF32_T : MAXOFFSET_T;
492 #else
493 const uoff_t maxoff = MAXOFF32_T;
494 #endif
495 int in_crit = 0;
497 if ((bcount = (ssize_t)count) < 0)
498 return (set_errno(EINVAL));
499 if ((fp = getf(fdes)) == NULL)
500 return (set_errno(EBADF));
501 if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
502 error = EBADF;
503 goto out;
506 rwflag = 1;
507 vp = fp->f_vnode;
509 if (vp->v_type == VREG) {
511 if (bcount == 0)
512 goto out;
515 * return EINVAL for offsets that cannot be
516 * represented in an off_t.
518 if (fileoff > maxoff) {
519 error = EINVAL;
520 goto out;
523 * Take appropriate action if we are trying to write above the
524 * resource limit.
526 if (fileoff >= curproc->p_fsz_ctl) {
527 mutex_enter(&curproc->p_lock);
528 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
529 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
530 mutex_exit(&curproc->p_lock);
532 error = EFBIG;
533 goto out;
536 * Don't allow pwrite to cause file sizes to exceed
537 * maxoff.
539 if (fileoff == maxoff) {
540 error = EFBIG;
541 goto out;
543 if (fileoff + count > maxoff)
544 bcount = (ssize_t)((uoff_t)maxoff - fileoff);
545 } else if (vp->v_type == VFIFO) {
546 error = ESPIPE;
547 goto out;
551 * We have to enter the critical region before calling fop_rwlock
552 * to avoid a deadlock with ufs.
554 if (nbl_need_check(vp)) {
555 int svmand;
557 nbl_start_crit(vp, RW_READER);
558 in_crit = 1;
559 error = nbl_svmand(vp, fp->f_cred, &svmand);
560 if (error != 0)
561 goto out;
562 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
563 NULL)) {
564 error = EACCES;
565 goto out;
569 aiov.iov_base = cbuf;
570 aiov.iov_len = bcount;
571 (void) fop_rwlock(vp, rwflag, NULL);
572 auio.uio_loffset = fileoff;
573 auio.uio_iov = &aiov;
574 auio.uio_iovcnt = 1;
575 auio.uio_resid = bcount;
576 auio.uio_segflg = UIO_USERSPACE;
577 auio.uio_llimit = curproc->p_fsz_ctl;
578 auio.uio_fmode = fflag;
579 auio.uio_extflg = UIO_COPY_CACHED;
582 * The SUSv4 POSIX specification states:
583 * The pwrite() function shall be equivalent to write(), except
584 * that it writes into a given position and does not change
585 * the file offset (regardless of whether O_APPEND is set).
586 * To make this be true, we omit the FAPPEND flag from ioflag.
588 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
590 error = fop_write(vp, &auio, ioflag, fp->f_cred, NULL);
591 bcount -= auio.uio_resid;
592 CPU_STATS_ENTER_K();
593 cp = CPU;
594 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
595 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
596 CPU_STATS_EXIT_K();
597 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
598 fop_rwunlock(vp, rwflag, NULL);
600 if (error == EINTR && bcount != 0)
601 error = 0;
602 out:
603 if (in_crit)
604 nbl_end_crit(vp);
605 releasef(fdes);
606 if (error)
607 return (set_errno(error));
608 return (bcount);
611 ssize_t
612 readv(int fdes, struct iovec *iovp, int iovcnt)
614 struct uio auio;
615 struct iovec buf[IOV_MAX_STACK], *aiov = buf;
616 int aiovlen = 0;
617 file_t *fp;
618 register vnode_t *vp;
619 struct cpu *cp;
620 int fflag, ioflag, rwflag;
621 ssize_t count, bcount;
622 int error = 0;
623 int i;
624 uoff_t fileoff;
625 int in_crit = 0;
627 if (iovcnt <= 0 || iovcnt > IOV_MAX)
628 return (set_errno(EINVAL));
630 if (iovcnt > IOV_MAX_STACK) {
631 aiovlen = iovcnt * sizeof (iovec_t);
632 aiov = kmem_alloc(aiovlen, KM_SLEEP);
635 #ifdef _SYSCALL32_IMPL
637 * 32-bit callers need to have their iovec expanded,
638 * while ensuring that they can't move more than 2Gbytes
639 * of data in a single call.
641 if (get_udatamodel() == DATAMODEL_ILP32) {
642 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
643 int aiov32len;
644 ssize32_t count32;
646 aiov32len = iovcnt * sizeof (iovec32_t);
647 if (aiovlen != 0)
648 aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
650 if (copyin(iovp, aiov32, aiov32len)) {
651 if (aiovlen != 0) {
652 kmem_free(aiov32, aiov32len);
653 kmem_free(aiov, aiovlen);
655 return (set_errno(EFAULT));
658 count32 = 0;
659 for (i = 0; i < iovcnt; i++) {
660 ssize32_t iovlen32 = aiov32[i].iov_len;
661 count32 += iovlen32;
662 if (iovlen32 < 0 || count32 < 0) {
663 if (aiovlen != 0) {
664 kmem_free(aiov32, aiov32len);
665 kmem_free(aiov, aiovlen);
667 return (set_errno(EINVAL));
669 aiov[i].iov_len = iovlen32;
670 aiov[i].iov_base =
671 (caddr_t)(uintptr_t)aiov32[i].iov_base;
674 if (aiovlen != 0)
675 kmem_free(aiov32, aiov32len);
676 } else
677 #endif
678 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
679 if (aiovlen != 0)
680 kmem_free(aiov, aiovlen);
681 return (set_errno(EFAULT));
684 count = 0;
685 for (i = 0; i < iovcnt; i++) {
686 ssize_t iovlen = aiov[i].iov_len;
687 count += iovlen;
688 if (iovlen < 0 || count < 0) {
689 if (aiovlen != 0)
690 kmem_free(aiov, aiovlen);
691 return (set_errno(EINVAL));
694 if ((fp = getf(fdes)) == NULL) {
695 if (aiovlen != 0)
696 kmem_free(aiov, aiovlen);
697 return (set_errno(EBADF));
699 if (((fflag = fp->f_flag) & FREAD) == 0) {
700 error = EBADF;
701 goto out;
703 vp = fp->f_vnode;
704 if (vp->v_type == VREG && count == 0) {
705 goto out;
708 rwflag = 0;
711 * We have to enter the critical region before calling fop_rwlock
712 * to avoid a deadlock with ufs.
714 if (nbl_need_check(vp)) {
715 int svmand;
717 nbl_start_crit(vp, RW_READER);
718 in_crit = 1;
719 error = nbl_svmand(vp, fp->f_cred, &svmand);
720 if (error != 0)
721 goto out;
722 if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand,
723 NULL)) {
724 error = EACCES;
725 goto out;
729 (void) fop_rwlock(vp, rwflag, NULL);
730 fileoff = fp->f_offset;
733 * Behaviour is same as read. Please see comments in read.
736 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
737 struct vattr va;
738 va.va_mask = AT_SIZE;
739 if ((error = fop_getattr(vp, &va, 0, fp->f_cred, NULL))) {
740 fop_rwunlock(vp, rwflag, NULL);
741 goto out;
743 if (fileoff >= va.va_size) {
744 fop_rwunlock(vp, rwflag, NULL);
745 count = 0;
746 goto out;
747 } else {
748 fop_rwunlock(vp, rwflag, NULL);
749 error = EOVERFLOW;
750 goto out;
753 if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) {
754 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
756 auio.uio_loffset = fileoff;
757 auio.uio_iov = aiov;
758 auio.uio_iovcnt = iovcnt;
759 auio.uio_resid = bcount = count;
760 auio.uio_segflg = UIO_USERSPACE;
761 auio.uio_llimit = MAXOFFSET_T;
762 auio.uio_fmode = fflag;
763 if (bcount <= copyout_max_cached)
764 auio.uio_extflg = UIO_COPY_CACHED;
765 else
766 auio.uio_extflg = UIO_COPY_DEFAULT;
769 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
771 /* If read sync is not asked for, filter sync flags */
772 if ((ioflag & FRSYNC) == 0)
773 ioflag &= ~(FSYNC|FDSYNC);
774 error = fop_read(vp, &auio, ioflag, fp->f_cred, NULL);
775 count -= auio.uio_resid;
776 CPU_STATS_ENTER_K();
777 cp = CPU;
778 CPU_STATS_ADDQ(cp, sys, sysread, 1);
779 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
780 CPU_STATS_EXIT_K();
781 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
783 if (vp->v_type == VFIFO) /* Backward compatibility */
784 fp->f_offset = count;
785 else if (((fp->f_flag & FAPPEND) == 0) ||
786 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
787 fp->f_offset = auio.uio_loffset;
789 fop_rwunlock(vp, rwflag, NULL);
791 if (error == EINTR && count != 0)
792 error = 0;
793 out:
794 if (in_crit)
795 nbl_end_crit(vp);
796 releasef(fdes);
797 if (aiovlen != 0)
798 kmem_free(aiov, aiovlen);
799 if (error)
800 return (set_errno(error));
801 return (count);
804 ssize_t
805 writev(int fdes, struct iovec *iovp, int iovcnt)
807 struct uio auio;
808 struct iovec buf[IOV_MAX_STACK], *aiov = buf;
809 int aiovlen = 0;
810 file_t *fp;
811 register vnode_t *vp;
812 struct cpu *cp;
813 int fflag, ioflag, rwflag;
814 ssize_t count, bcount;
815 int error = 0;
816 int i;
817 uoff_t fileoff;
818 int in_crit = 0;
820 if (iovcnt <= 0 || iovcnt > IOV_MAX)
821 return (set_errno(EINVAL));
823 if (iovcnt > IOV_MAX_STACK) {
824 aiovlen = iovcnt * sizeof (iovec_t);
825 aiov = kmem_alloc(aiovlen, KM_SLEEP);
828 #ifdef _SYSCALL32_IMPL
830 * 32-bit callers need to have their iovec expanded,
831 * while ensuring that they can't move more than 2Gbytes
832 * of data in a single call.
834 if (get_udatamodel() == DATAMODEL_ILP32) {
835 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
836 int aiov32len;
837 ssize32_t count32;
839 aiov32len = iovcnt * sizeof (iovec32_t);
840 if (aiovlen != 0)
841 aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
843 if (copyin(iovp, aiov32, aiov32len)) {
844 if (aiovlen != 0) {
845 kmem_free(aiov32, aiov32len);
846 kmem_free(aiov, aiovlen);
848 return (set_errno(EFAULT));
851 count32 = 0;
852 for (i = 0; i < iovcnt; i++) {
853 ssize32_t iovlen = aiov32[i].iov_len;
854 count32 += iovlen;
855 if (iovlen < 0 || count32 < 0) {
856 if (aiovlen != 0) {
857 kmem_free(aiov32, aiov32len);
858 kmem_free(aiov, aiovlen);
860 return (set_errno(EINVAL));
862 aiov[i].iov_len = iovlen;
863 aiov[i].iov_base =
864 (caddr_t)(uintptr_t)aiov32[i].iov_base;
866 if (aiovlen != 0)
867 kmem_free(aiov32, aiov32len);
868 } else
869 #endif
870 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
871 if (aiovlen != 0)
872 kmem_free(aiov, aiovlen);
873 return (set_errno(EFAULT));
876 count = 0;
877 for (i = 0; i < iovcnt; i++) {
878 ssize_t iovlen = aiov[i].iov_len;
879 count += iovlen;
880 if (iovlen < 0 || count < 0) {
881 if (aiovlen != 0)
882 kmem_free(aiov, aiovlen);
883 return (set_errno(EINVAL));
886 if ((fp = getf(fdes)) == NULL) {
887 if (aiovlen != 0)
888 kmem_free(aiov, aiovlen);
889 return (set_errno(EBADF));
891 if (((fflag = fp->f_flag) & FWRITE) == 0) {
892 error = EBADF;
893 goto out;
895 vp = fp->f_vnode;
896 if (vp->v_type == VREG && count == 0) {
897 goto out;
900 rwflag = 1;
903 * We have to enter the critical region before calling fop_rwlock
904 * to avoid a deadlock with ufs.
906 if (nbl_need_check(vp)) {
907 int svmand;
909 nbl_start_crit(vp, RW_READER);
910 in_crit = 1;
911 error = nbl_svmand(vp, fp->f_cred, &svmand);
912 if (error != 0)
913 goto out;
914 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand,
915 NULL)) {
916 error = EACCES;
917 goto out;
921 (void) fop_rwlock(vp, rwflag, NULL);
923 fileoff = fp->f_offset;
926 * Behaviour is same as write. Please see comments for write.
929 if (vp->v_type == VREG) {
930 if (fileoff >= curproc->p_fsz_ctl) {
931 fop_rwunlock(vp, rwflag, NULL);
932 mutex_enter(&curproc->p_lock);
933 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
934 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
935 mutex_exit(&curproc->p_lock);
936 error = EFBIG;
937 goto out;
939 if (fileoff >= OFFSET_MAX(fp)) {
940 fop_rwunlock(vp, rwflag, NULL);
941 error = EFBIG;
942 goto out;
944 if (fileoff + count > OFFSET_MAX(fp))
945 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
947 auio.uio_loffset = fileoff;
948 auio.uio_iov = aiov;
949 auio.uio_iovcnt = iovcnt;
950 auio.uio_resid = bcount = count;
951 auio.uio_segflg = UIO_USERSPACE;
952 auio.uio_llimit = curproc->p_fsz_ctl;
953 auio.uio_fmode = fflag;
954 auio.uio_extflg = UIO_COPY_DEFAULT;
956 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
958 error = fop_write(vp, &auio, ioflag, fp->f_cred, NULL);
959 count -= auio.uio_resid;
960 CPU_STATS_ENTER_K();
961 cp = CPU;
962 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
963 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
964 CPU_STATS_EXIT_K();
965 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
967 if (vp->v_type == VFIFO) /* Backward compatibility */
968 fp->f_offset = count;
969 else if (((fp->f_flag & FAPPEND) == 0) ||
970 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
971 fp->f_offset = auio.uio_loffset;
972 fop_rwunlock(vp, rwflag, NULL);
974 if (error == EINTR && count != 0)
975 error = 0;
976 out:
977 if (in_crit)
978 nbl_end_crit(vp);
979 releasef(fdes);
980 if (aiovlen != 0)
981 kmem_free(aiov, aiovlen);
982 if (error)
983 return (set_errno(error));
984 return (count);
987 ssize_t
988 preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
989 off_t extended_offset)
991 struct uio auio;
992 struct iovec buf[IOV_MAX_STACK], *aiov = buf;
993 int aiovlen = 0;
994 file_t *fp;
995 register vnode_t *vp;
996 struct cpu *cp;
997 int fflag, ioflag, rwflag;
998 ssize_t count, bcount;
999 int error = 0;
1000 int i;
1002 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1003 uoff_t fileoff = ((uoff_t)extended_offset << 32) |
1004 (uoff_t)offset;
1005 #else /* _SYSCALL32_IMPL || _ILP32 */
1006 uoff_t fileoff = (uoff_t)(ulong_t)offset;
1007 #endif /* _SYSCALL32_IMPR || _ILP32 */
1008 #ifdef _SYSCALL32_IMPL
1009 const uoff_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1010 extended_offset == 0?
1011 MAXOFF32_T : MAXOFFSET_T;
1012 #else /* _SYSCALL32_IMPL */
1013 const uoff_t maxoff = MAXOFF32_T;
1014 #endif /* _SYSCALL32_IMPL */
1016 int in_crit = 0;
1018 if (iovcnt <= 0 || iovcnt > IOV_MAX)
1019 return (set_errno(EINVAL));
1021 if (iovcnt > IOV_MAX_STACK) {
1022 aiovlen = iovcnt * sizeof (iovec_t);
1023 aiov = kmem_alloc(aiovlen, KM_SLEEP);
1026 #ifdef _SYSCALL32_IMPL
1028 * 32-bit callers need to have their iovec expanded,
1029 * while ensuring that they can't move more than 2Gbytes
1030 * of data in a single call.
1032 if (get_udatamodel() == DATAMODEL_ILP32) {
1033 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1034 int aiov32len;
1035 ssize32_t count32;
1037 aiov32len = iovcnt * sizeof (iovec32_t);
1038 if (aiovlen != 0)
1039 aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
1041 if (copyin(iovp, aiov32, aiov32len)) {
1042 if (aiovlen != 0) {
1043 kmem_free(aiov32, aiov32len);
1044 kmem_free(aiov, aiovlen);
1046 return (set_errno(EFAULT));
1049 count32 = 0;
1050 for (i = 0; i < iovcnt; i++) {
1051 ssize32_t iovlen32 = aiov32[i].iov_len;
1052 count32 += iovlen32;
1053 if (iovlen32 < 0 || count32 < 0) {
1054 if (aiovlen != 0) {
1055 kmem_free(aiov32, aiov32len);
1056 kmem_free(aiov, aiovlen);
1058 return (set_errno(EINVAL));
1060 aiov[i].iov_len = iovlen32;
1061 aiov[i].iov_base =
1062 (caddr_t)(uintptr_t)aiov32[i].iov_base;
1064 if (aiovlen != 0)
1065 kmem_free(aiov32, aiov32len);
1066 } else
1067 #endif /* _SYSCALL32_IMPL */
1068 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1069 if (aiovlen != 0)
1070 kmem_free(aiov, aiovlen);
1071 return (set_errno(EFAULT));
1074 count = 0;
1075 for (i = 0; i < iovcnt; i++) {
1076 ssize_t iovlen = aiov[i].iov_len;
1077 count += iovlen;
1078 if (iovlen < 0 || count < 0) {
1079 if (aiovlen != 0)
1080 kmem_free(aiov, aiovlen);
1081 return (set_errno(EINVAL));
1085 if ((bcount = (ssize_t)count) < 0) {
1086 if (aiovlen != 0)
1087 kmem_free(aiov, aiovlen);
1088 return (set_errno(EINVAL));
1090 if ((fp = getf(fdes)) == NULL) {
1091 if (aiovlen != 0)
1092 kmem_free(aiov, aiovlen);
1093 return (set_errno(EBADF));
1095 if (((fflag = fp->f_flag) & FREAD) == 0) {
1096 error = EBADF;
1097 goto out;
1099 vp = fp->f_vnode;
1100 rwflag = 0;
1101 if (vp->v_type == VREG) {
1103 if (bcount == 0)
1104 goto out;
1107 * return EINVAL for offsets that cannot be
1108 * represented in an off_t.
1110 if (fileoff > maxoff) {
1111 error = EINVAL;
1112 goto out;
1115 if (fileoff + bcount > maxoff)
1116 bcount = (ssize_t)((uoff_t)maxoff - fileoff);
1117 } else if (vp->v_type == VFIFO) {
1118 error = ESPIPE;
1119 goto out;
1122 * We have to enter the critical region before calling fop_rwlock
1123 * to avoid a deadlock with ufs.
1125 if (nbl_need_check(vp)) {
1126 int svmand;
1128 nbl_start_crit(vp, RW_READER);
1129 in_crit = 1;
1130 error = nbl_svmand(vp, fp->f_cred, &svmand);
1131 if (error != 0)
1132 goto out;
1133 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1134 NULL)) {
1135 error = EACCES;
1136 goto out;
1140 (void) fop_rwlock(vp, rwflag, NULL);
1143 * Behaviour is same as read(2). Please see comments in
1144 * read(2).
1147 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
1148 struct vattr va;
1149 va.va_mask = AT_SIZE;
1150 if ((error =
1151 fop_getattr(vp, &va, 0, fp->f_cred, NULL))) {
1152 fop_rwunlock(vp, rwflag, NULL);
1153 goto out;
1155 if (fileoff >= va.va_size) {
1156 fop_rwunlock(vp, rwflag, NULL);
1157 count = 0;
1158 goto out;
1159 } else {
1160 fop_rwunlock(vp, rwflag, NULL);
1161 error = EOVERFLOW;
1162 goto out;
1165 if ((vp->v_type == VREG) &&
1166 (fileoff + count > OFFSET_MAX(fp))) {
1167 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1169 auio.uio_loffset = fileoff;
1170 auio.uio_iov = aiov;
1171 auio.uio_iovcnt = iovcnt;
1172 auio.uio_resid = bcount = count;
1173 auio.uio_segflg = UIO_USERSPACE;
1174 auio.uio_llimit = MAXOFFSET_T;
1175 auio.uio_fmode = fflag;
1176 if (bcount <= copyout_max_cached)
1177 auio.uio_extflg = UIO_COPY_CACHED;
1178 else
1179 auio.uio_extflg = UIO_COPY_DEFAULT;
1181 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1182 error = fop_read(vp, &auio, ioflag, fp->f_cred, NULL);
1183 count -= auio.uio_resid;
1184 CPU_STATS_ENTER_K();
1185 cp = CPU;
1186 CPU_STATS_ADDQ(cp, sys, sysread, 1);
1187 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
1188 CPU_STATS_EXIT_K();
1189 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1191 fop_rwunlock(vp, rwflag, NULL);
1193 if (error == EINTR && count != 0)
1194 error = 0;
1195 out:
1196 if (in_crit)
1197 nbl_end_crit(vp);
1198 releasef(fdes);
1199 if (aiovlen != 0)
1200 kmem_free(aiov, aiovlen);
1201 if (error)
1202 return (set_errno(error));
1203 return (count);
1206 ssize_t
1207 pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
1208 off_t extended_offset)
1210 struct uio auio;
1211 struct iovec buf[IOV_MAX_STACK], *aiov = buf;
1212 int aiovlen = 0;
1213 file_t *fp;
1214 register vnode_t *vp;
1215 struct cpu *cp;
1216 int fflag, ioflag, rwflag;
1217 ssize_t count, bcount;
1218 int error = 0;
1219 int i;
1221 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1222 uoff_t fileoff = ((uoff_t)extended_offset << 32) |
1223 (uoff_t)offset;
1224 #else /* _SYSCALL32_IMPL || _ILP32 */
1225 uoff_t fileoff = (uoff_t)(ulong_t)offset;
1226 #endif /* _SYSCALL32_IMPR || _ILP32 */
1227 #ifdef _SYSCALL32_IMPL
1228 const uoff_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1229 extended_offset == 0?
1230 MAXOFF32_T : MAXOFFSET_T;
1231 #else /* _SYSCALL32_IMPL */
1232 const uoff_t maxoff = MAXOFF32_T;
1233 #endif /* _SYSCALL32_IMPL */
1235 int in_crit = 0;
1237 if (iovcnt <= 0 || iovcnt > IOV_MAX)
1238 return (set_errno(EINVAL));
1240 if (iovcnt > IOV_MAX_STACK) {
1241 aiovlen = iovcnt * sizeof (iovec_t);
1242 aiov = kmem_alloc(aiovlen, KM_SLEEP);
1245 #ifdef _SYSCALL32_IMPL
1247 * 32-bit callers need to have their iovec expanded,
1248 * while ensuring that they can't move more than 2Gbytes
1249 * of data in a single call.
1251 if (get_udatamodel() == DATAMODEL_ILP32) {
1252 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1253 int aiov32len;
1254 ssize32_t count32;
1256 aiov32len = iovcnt * sizeof (iovec32_t);
1257 if (aiovlen != 0)
1258 aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
1260 if (copyin(iovp, aiov32, aiov32len)) {
1261 if (aiovlen != 0) {
1262 kmem_free(aiov32, aiov32len);
1263 kmem_free(aiov, aiovlen);
1265 return (set_errno(EFAULT));
1268 count32 = 0;
1269 for (i = 0; i < iovcnt; i++) {
1270 ssize32_t iovlen32 = aiov32[i].iov_len;
1271 count32 += iovlen32;
1272 if (iovlen32 < 0 || count32 < 0) {
1273 if (aiovlen != 0) {
1274 kmem_free(aiov32, aiov32len);
1275 kmem_free(aiov, aiovlen);
1277 return (set_errno(EINVAL));
1279 aiov[i].iov_len = iovlen32;
1280 aiov[i].iov_base =
1281 (caddr_t)(uintptr_t)aiov32[i].iov_base;
1283 if (aiovlen != 0)
1284 kmem_free(aiov32, aiov32len);
1285 } else
1286 #endif /* _SYSCALL32_IMPL */
1287 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1288 if (aiovlen != 0)
1289 kmem_free(aiov, aiovlen);
1290 return (set_errno(EFAULT));
1293 count = 0;
1294 for (i = 0; i < iovcnt; i++) {
1295 ssize_t iovlen = aiov[i].iov_len;
1296 count += iovlen;
1297 if (iovlen < 0 || count < 0) {
1298 if (aiovlen != 0)
1299 kmem_free(aiov, aiovlen);
1300 return (set_errno(EINVAL));
1304 if ((bcount = (ssize_t)count) < 0) {
1305 if (aiovlen != 0)
1306 kmem_free(aiov, aiovlen);
1307 return (set_errno(EINVAL));
1309 if ((fp = getf(fdes)) == NULL) {
1310 if (aiovlen != 0)
1311 kmem_free(aiov, aiovlen);
1312 return (set_errno(EBADF));
1314 if (((fflag = fp->f_flag) & FWRITE) == 0) {
1315 error = EBADF;
1316 goto out;
1318 vp = fp->f_vnode;
1319 rwflag = 1;
1320 if (vp->v_type == VREG) {
1322 if (bcount == 0)
1323 goto out;
1326 * return EINVAL for offsets that cannot be
1327 * represented in an off_t.
1329 if (fileoff > maxoff) {
1330 error = EINVAL;
1331 goto out;
1334 * Take appropriate action if we are trying
1335 * to write above the resource limit.
1337 if (fileoff >= curproc->p_fsz_ctl) {
1338 mutex_enter(&curproc->p_lock);
1340 * Return value ignored because it lists
1341 * actions taken, but we are in an error case.
1342 * We don't have any actions that depend on
1343 * what could happen in this call, so we ignore
1344 * the return value.
1346 (void) rctl_action(
1347 rctlproc_legacy[RLIMIT_FSIZE],
1348 curproc->p_rctls, curproc,
1349 RCA_UNSAFE_SIGINFO);
1350 mutex_exit(&curproc->p_lock);
1352 error = EFBIG;
1353 goto out;
1356 * Don't allow pwritev to cause file sizes to exceed
1357 * maxoff.
1359 if (fileoff == maxoff) {
1360 error = EFBIG;
1361 goto out;
1364 if (fileoff + bcount > maxoff)
1365 bcount = (ssize_t)((uoff_t)maxoff - fileoff);
1366 } else if (vp->v_type == VFIFO) {
1367 error = ESPIPE;
1368 goto out;
1371 * We have to enter the critical region before calling fop_rwlock
1372 * to avoid a deadlock with ufs.
1374 if (nbl_need_check(vp)) {
1375 int svmand;
1377 nbl_start_crit(vp, RW_READER);
1378 in_crit = 1;
1379 error = nbl_svmand(vp, fp->f_cred, &svmand);
1380 if (error != 0)
1381 goto out;
1382 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1383 NULL)) {
1384 error = EACCES;
1385 goto out;
1389 (void) fop_rwlock(vp, rwflag, NULL);
1393 * Behaviour is same as write(2). Please see comments for
1394 * write(2).
1397 if (vp->v_type == VREG) {
1398 if (fileoff >= curproc->p_fsz_ctl) {
1399 fop_rwunlock(vp, rwflag, NULL);
1400 mutex_enter(&curproc->p_lock);
1401 /* see above rctl_action comment */
1402 (void) rctl_action(
1403 rctlproc_legacy[RLIMIT_FSIZE],
1404 curproc->p_rctls,
1405 curproc, RCA_UNSAFE_SIGINFO);
1406 mutex_exit(&curproc->p_lock);
1407 error = EFBIG;
1408 goto out;
1410 if (fileoff >= OFFSET_MAX(fp)) {
1411 fop_rwunlock(vp, rwflag, NULL);
1412 error = EFBIG;
1413 goto out;
1415 if (fileoff + count > OFFSET_MAX(fp))
1416 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1419 auio.uio_loffset = fileoff;
1420 auio.uio_iov = aiov;
1421 auio.uio_iovcnt = iovcnt;
1422 auio.uio_resid = bcount = count;
1423 auio.uio_segflg = UIO_USERSPACE;
1424 auio.uio_llimit = curproc->p_fsz_ctl;
1425 auio.uio_fmode = fflag;
1426 auio.uio_extflg = UIO_COPY_CACHED;
1427 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1428 error = fop_write(vp, &auio, ioflag, fp->f_cred, NULL);
1429 count -= auio.uio_resid;
1430 CPU_STATS_ENTER_K();
1431 cp = CPU;
1432 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1433 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
1434 CPU_STATS_EXIT_K();
1435 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1437 fop_rwunlock(vp, rwflag, NULL);
1439 if (error == EINTR && count != 0)
1440 error = 0;
1441 out:
1442 if (in_crit)
1443 nbl_end_crit(vp);
1444 releasef(fdes);
1445 if (aiovlen != 0)
1446 kmem_free(aiov, aiovlen);
1447 if (error)
1448 return (set_errno(error));
1449 return (count);
1452 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1455 * This syscall supplies 64-bit file offsets to 32-bit applications only.
1457 ssize32_t
1458 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1459 uint32_t offset_2)
1461 struct uio auio;
1462 struct iovec aiov;
1463 file_t *fp;
1464 register vnode_t *vp;
1465 struct cpu *cp;
1466 int fflag, ioflag, rwflag;
1467 ssize_t bcount;
1468 int error = 0;
1469 uoff_t fileoff;
1470 int in_crit = 0;
1472 #if defined(_LITTLE_ENDIAN)
1473 fileoff = ((uoff_t)offset_2 << 32) | (uoff_t)offset_1;
1474 #else
1475 fileoff = ((uoff_t)offset_1 << 32) | (uoff_t)offset_2;
1476 #endif
1478 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
1479 return (set_errno(EINVAL));
1481 if ((fp = getf(fdes)) == NULL)
1482 return (set_errno(EBADF));
1483 if (((fflag = fp->f_flag) & (FREAD)) == 0) {
1484 error = EBADF;
1485 goto out;
1488 rwflag = 0;
1489 vp = fp->f_vnode;
1491 if (vp->v_type == VREG) {
1493 if (bcount == 0)
1494 goto out;
1497 * Same as pread. See comments in pread.
1500 if (fileoff > MAXOFFSET_T) {
1501 error = EINVAL;
1502 goto out;
1504 if (fileoff + bcount > MAXOFFSET_T)
1505 bcount = (ssize_t)(MAXOFFSET_T - fileoff);
1506 } else if (vp->v_type == VFIFO) {
1507 error = ESPIPE;
1508 goto out;
1512 * We have to enter the critical region before calling fop_rwlock
1513 * to avoid a deadlock with ufs.
1515 if (nbl_need_check(vp)) {
1516 int svmand;
1518 nbl_start_crit(vp, RW_READER);
1519 in_crit = 1;
1520 error = nbl_svmand(vp, fp->f_cred, &svmand);
1521 if (error != 0)
1522 goto out;
1523 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
1524 NULL)) {
1525 error = EACCES;
1526 goto out;
1530 aiov.iov_base = cbuf;
1531 aiov.iov_len = bcount;
1532 (void) fop_rwlock(vp, rwflag, NULL);
1533 auio.uio_loffset = fileoff;
1536 * Note: File size can never be greater than MAXOFFSET_T.
1537 * If ever we start supporting 128 bit files the code
1538 * similar to the one in pread at this place should be here.
1539 * Here we avoid the unnecessary fop_getattr() when we
1540 * know that fileoff == MAXOFFSET_T implies that it is always
1541 * greater than or equal to file size.
1543 auio.uio_iov = &aiov;
1544 auio.uio_iovcnt = 1;
1545 auio.uio_resid = bcount;
1546 auio.uio_segflg = UIO_USERSPACE;
1547 auio.uio_llimit = MAXOFFSET_T;
1548 auio.uio_fmode = fflag;
1549 auio.uio_extflg = UIO_COPY_CACHED;
1551 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1553 /* If read sync is not asked for, filter sync flags */
1554 if ((ioflag & FRSYNC) == 0)
1555 ioflag &= ~(FSYNC|FDSYNC);
1556 error = fop_read(vp, &auio, ioflag, fp->f_cred, NULL);
1557 bcount -= auio.uio_resid;
1558 CPU_STATS_ENTER_K();
1559 cp = CPU;
1560 CPU_STATS_ADDQ(cp, sys, sysread, 1);
1561 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
1562 CPU_STATS_EXIT_K();
1563 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
1564 fop_rwunlock(vp, rwflag, NULL);
1566 if (error == EINTR && bcount != 0)
1567 error = 0;
1568 out:
1569 if (in_crit)
1570 nbl_end_crit(vp);
1571 releasef(fdes);
1572 if (error)
1573 return (set_errno(error));
1574 return (bcount);
1578 * This syscall supplies 64-bit file offsets to 32-bit applications only.
1580 ssize32_t
1581 pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1582 uint32_t offset_2)
1584 struct uio auio;
1585 struct iovec aiov;
1586 file_t *fp;
1587 register vnode_t *vp;
1588 struct cpu *cp;
1589 int fflag, ioflag, rwflag;
1590 ssize_t bcount;
1591 int error = 0;
1592 uoff_t fileoff;
1593 int in_crit = 0;
1595 #if defined(_LITTLE_ENDIAN)
1596 fileoff = ((uoff_t)offset_2 << 32) | (uoff_t)offset_1;
1597 #else
1598 fileoff = ((uoff_t)offset_1 << 32) | (uoff_t)offset_2;
1599 #endif
1601 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
1602 return (set_errno(EINVAL));
1603 if ((fp = getf(fdes)) == NULL)
1604 return (set_errno(EBADF));
1605 if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
1606 error = EBADF;
1607 goto out;
1610 rwflag = 1;
1611 vp = fp->f_vnode;
1613 if (vp->v_type == VREG) {
1615 if (bcount == 0)
1616 goto out;
1619 * See comments in pwrite.
1621 if (fileoff > MAXOFFSET_T) {
1622 error = EINVAL;
1623 goto out;
1625 if (fileoff >= curproc->p_fsz_ctl) {
1626 mutex_enter(&curproc->p_lock);
1627 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
1628 curproc->p_rctls, curproc, RCA_SAFE);
1629 mutex_exit(&curproc->p_lock);
1630 error = EFBIG;
1631 goto out;
1633 if (fileoff == MAXOFFSET_T) {
1634 error = EFBIG;
1635 goto out;
1637 if (fileoff + bcount > MAXOFFSET_T)
1638 bcount = (ssize_t)((uoff_t)MAXOFFSET_T - fileoff);
1639 } else if (vp->v_type == VFIFO) {
1640 error = ESPIPE;
1641 goto out;
1645 * We have to enter the critical region before calling fop_rwlock
1646 * to avoid a deadlock with ufs.
1648 if (nbl_need_check(vp)) {
1649 int svmand;
1651 nbl_start_crit(vp, RW_READER);
1652 in_crit = 1;
1653 error = nbl_svmand(vp, fp->f_cred, &svmand);
1654 if (error != 0)
1655 goto out;
1656 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
1657 NULL)) {
1658 error = EACCES;
1659 goto out;
1663 aiov.iov_base = cbuf;
1664 aiov.iov_len = bcount;
1665 (void) fop_rwlock(vp, rwflag, NULL);
1666 auio.uio_loffset = fileoff;
1667 auio.uio_iov = &aiov;
1668 auio.uio_iovcnt = 1;
1669 auio.uio_resid = bcount;
1670 auio.uio_segflg = UIO_USERSPACE;
1671 auio.uio_llimit = curproc->p_fsz_ctl;
1672 auio.uio_fmode = fflag;
1673 auio.uio_extflg = UIO_COPY_CACHED;
1676 * The SUSv4 POSIX specification states:
1677 * The pwrite() function shall be equivalent to write(), except
1678 * that it writes into a given position and does not change
1679 * the file offset (regardless of whether O_APPEND is set).
1680 * To make this be true, we omit the FAPPEND flag from ioflag.
1682 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1684 error = fop_write(vp, &auio, ioflag, fp->f_cred, NULL);
1685 bcount -= auio.uio_resid;
1686 CPU_STATS_ENTER_K();
1687 cp = CPU;
1688 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1689 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
1690 CPU_STATS_EXIT_K();
1691 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
1692 fop_rwunlock(vp, rwflag, NULL);
1694 if (error == EINTR && bcount != 0)
1695 error = 0;
1696 out:
1697 if (in_crit)
1698 nbl_end_crit(vp);
1699 releasef(fdes);
1700 if (error)
1701 return (set_errno(error));
1702 return (bcount);
1705 #endif /* _SYSCALL32_IMPL || _ILP32 */
1707 #ifdef _SYSCALL32_IMPL
1709 * Tail-call elimination of xxx32() down to xxx()
1711 * A number of xxx32 system calls take a len (or count) argument and
1712 * return a number in the range [0,len] or -1 on error.
1713 * Given an ssize32_t input len, the downcall xxx() will return
1714 * a 64-bit value that is -1 or in the range [0,len] which actually
1715 * is a proper return value for the xxx32 call. So even if the xxx32
1716 * calls can be considered as returning a ssize32_t, they are currently
1717 * declared as returning a ssize_t as this enables tail-call elimination.
1719 * The cast of len (or count) to ssize32_t is needed to ensure we pass
1720 * down negative input values as such and let the downcall handle error
1721 * reporting. Functions covered by this comments are:
1723 * rw.c: read32, write32, pread32, pwrite32, readv32, writev32.
1724 * socksyscall.c: recv32, recvfrom32, send32, sendto32.
1725 * readlink.c: readlink32.
1728 ssize_t
1729 read32(int32_t fdes, caddr32_t cbuf, size32_t count)
1731 return (read(fdes,
1732 (void *)(uintptr_t)cbuf, (ssize32_t)count));
1735 ssize_t
1736 write32(int32_t fdes, caddr32_t cbuf, size32_t count)
1738 return (write(fdes,
1739 (void *)(uintptr_t)cbuf, (ssize32_t)count));
1742 ssize_t
1743 pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
1745 return (pread(fdes,
1746 (void *)(uintptr_t)cbuf, (ssize32_t)count,
1747 (off_t)(uint32_t)offset));
1750 ssize_t
1751 pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
1753 return (pwrite(fdes,
1754 (void *)(uintptr_t)cbuf, (ssize32_t)count,
1755 (off_t)(uint32_t)offset));
1758 ssize_t
1759 readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
1761 return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt));
1764 ssize_t
1765 writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
1767 return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt));
1769 #endif /* _SYSCALL32_IMPL */