Unleashed v1.4
[unleashed.git] / kernel / syscall / rw.c
blobe78eab698a5bdc9e163ebeca1ba12ebaaad04ca9
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2015, Joyent, Inc. All rights reserved.
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
32 * Portions of this source code were derived from Berkeley 4.3 BSD
33 * under license from the Regents of the University of California.
36 #include <sys/param.h>
37 #include <sys/isa_defs.h>
38 #include <sys/types.h>
39 #include <sys/inttypes.h>
40 #include <sys/sysmacros.h>
41 #include <sys/cred.h>
42 #include <sys/user.h>
43 #include <sys/systm.h>
44 #include <sys/errno.h>
45 #include <sys/vnode.h>
46 #include <sys/file.h>
47 #include <sys/proc.h>
48 #include <sys/cpuvar.h>
49 #include <sys/uio.h>
50 #include <sys/debug.h>
51 #include <sys/rctl.h>
52 #include <sys/nbmlock.h>
53 #include <sys/limits.h>
55 #define COPYOUT_MAX_CACHE (1<<17) /* 128K */
57 size_t copyout_max_cached = COPYOUT_MAX_CACHE; /* global so it's patchable */
60 * read, write, pread, pwrite, readv, and writev syscalls.
64 * Native system call
66 ssize_t
67 read(int fdes, void *cbuf, size_t count)
69 struct uio auio;
70 struct iovec aiov;
71 file_t *fp;
72 register vnode_t *vp;
73 struct cpu *cp;
74 int fflag, ioflag, rwflag;
75 ssize_t cnt, bcount;
76 int error = 0;
77 uoff_t fileoff;
78 int in_crit = 0;
80 if ((cnt = (ssize_t)count) < 0)
81 return (set_errno(EINVAL));
82 if ((fp = getf(fdes)) == NULL)
83 return (set_errno(EBADF));
84 if (((fflag = fp->f_flag) & FREAD) == 0) {
85 error = EBADF;
86 goto out;
88 vp = fp->f_vnode;
90 if (vp->v_type == VREG && cnt == 0) {
91 goto out;
94 rwflag = 0;
95 aiov.iov_base = cbuf;
96 aiov.iov_len = cnt;
99 * We have to enter the critical region before calling fop_rwlock
100 * to avoid a deadlock with write() calls.
102 if (nbl_need_check(vp)) {
103 int svmand;
105 nbl_start_crit(vp, RW_READER);
106 in_crit = 1;
107 error = nbl_svmand(vp, fp->f_cred, &svmand);
108 if (error != 0)
109 goto out;
110 if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand,
111 NULL)) {
112 error = EACCES;
113 goto out;
117 (void) fop_rwlock(vp, rwflag, NULL);
120 * We do the following checks inside fop_rwlock so as to
121 * prevent file size from changing while these checks are
122 * being done. Also, we load fp's offset to the local
123 * variable fileoff because we can have a parallel lseek
124 * going on (f_offset is not protected by any lock) which
125 * could change f_offset. We need to see the value only
126 * once here and take a decision. Seeing it more than once
127 * can lead to incorrect functionality.
130 fileoff = (uoff_t)fp->f_offset;
131 if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) {
132 struct vattr va;
133 va.va_mask = VATTR_SIZE;
134 if ((error = fop_getattr(vp, &va, 0, fp->f_cred, NULL))) {
135 fop_rwunlock(vp, rwflag, NULL);
136 goto out;
138 if (fileoff >= va.va_size) {
139 cnt = 0;
140 fop_rwunlock(vp, rwflag, NULL);
141 goto out;
142 } else {
143 error = EOVERFLOW;
144 fop_rwunlock(vp, rwflag, NULL);
145 goto out;
148 if ((vp->v_type == VREG) &&
149 (fileoff + cnt > OFFSET_MAX(fp))) {
150 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
152 auio.uio_loffset = fileoff;
153 auio.uio_iov = &aiov;
154 auio.uio_iovcnt = 1;
155 auio.uio_resid = bcount = cnt;
156 auio.uio_segflg = UIO_USERSPACE;
157 auio.uio_llimit = MAXOFFSET_T;
158 auio.uio_fmode = fflag;
160 * Only use bypass caches when the count is large enough
162 if (bcount <= copyout_max_cached)
163 auio.uio_extflg = UIO_COPY_CACHED;
164 else
165 auio.uio_extflg = UIO_COPY_DEFAULT;
167 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
169 /* If read sync is not asked for, filter sync flags */
170 if ((ioflag & FRSYNC) == 0)
171 ioflag &= ~(FSYNC|FDSYNC);
172 error = fop_read(vp, &auio, ioflag, fp->f_cred, NULL);
173 cnt -= auio.uio_resid;
174 CPU_STATS_ENTER_K();
175 cp = CPU;
176 CPU_STATS_ADDQ(cp, sys, sysread, 1);
177 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt);
178 CPU_STATS_EXIT_K();
179 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
181 if (vp->v_type == VFIFO) /* Backward compatibility */
182 fp->f_offset = cnt;
183 else if (((fp->f_flag & FAPPEND) == 0) ||
184 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
185 fp->f_offset = auio.uio_loffset;
186 fop_rwunlock(vp, rwflag, NULL);
188 if (error == EINTR && cnt != 0)
189 error = 0;
190 out:
191 if (in_crit)
192 nbl_end_crit(vp);
193 releasef(fdes);
194 if (error)
195 return (set_errno(error));
196 return (cnt);
200 * Native system call
202 ssize_t
203 write(int fdes, void *cbuf, size_t count)
205 struct uio auio;
206 struct iovec aiov;
207 file_t *fp;
208 register vnode_t *vp;
209 struct cpu *cp;
210 int fflag, ioflag, rwflag;
211 ssize_t cnt, bcount;
212 int error = 0;
213 uoff_t fileoff;
214 int in_crit = 0;
216 if ((cnt = (ssize_t)count) < 0)
217 return (set_errno(EINVAL));
218 if ((fp = getf(fdes)) == NULL)
219 return (set_errno(EBADF));
220 if (((fflag = fp->f_flag) & FWRITE) == 0) {
221 error = EBADF;
222 goto out;
224 vp = fp->f_vnode;
226 if (vp->v_type == VREG && cnt == 0) {
227 goto out;
230 rwflag = 1;
231 aiov.iov_base = cbuf;
232 aiov.iov_len = cnt;
235 * We have to enter the critical region before calling fop_rwlock
236 * to avoid a deadlock with ufs.
238 if (nbl_need_check(vp)) {
239 int svmand;
241 nbl_start_crit(vp, RW_READER);
242 in_crit = 1;
243 error = nbl_svmand(vp, fp->f_cred, &svmand);
244 if (error != 0)
245 goto out;
246 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand,
247 NULL)) {
248 error = EACCES;
249 goto out;
253 (void) fop_rwlock(vp, rwflag, NULL);
255 fileoff = fp->f_offset;
256 if (vp->v_type == VREG) {
259 * We raise psignal if write for >0 bytes causes
260 * it to exceed the ulimit.
262 if (fileoff >= curproc->p_fsz_ctl) {
263 fop_rwunlock(vp, rwflag, NULL);
265 mutex_enter(&curproc->p_lock);
266 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
267 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
268 mutex_exit(&curproc->p_lock);
270 error = EFBIG;
271 goto out;
274 * We return EFBIG if write is done at an offset
275 * greater than the offset maximum for this file structure.
278 if (fileoff >= OFFSET_MAX(fp)) {
279 fop_rwunlock(vp, rwflag, NULL);
280 error = EFBIG;
281 goto out;
284 * Limit the bytes to be written upto offset maximum for
285 * this open file structure.
287 if (fileoff + cnt > OFFSET_MAX(fp))
288 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
290 auio.uio_loffset = fileoff;
291 auio.uio_iov = &aiov;
292 auio.uio_iovcnt = 1;
293 auio.uio_resid = bcount = cnt;
294 auio.uio_segflg = UIO_USERSPACE;
295 auio.uio_llimit = curproc->p_fsz_ctl;
296 auio.uio_fmode = fflag;
297 auio.uio_extflg = UIO_COPY_DEFAULT;
299 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
301 error = fop_write(vp, &auio, ioflag, fp->f_cred, NULL);
302 cnt -= auio.uio_resid;
303 CPU_STATS_ENTER_K();
304 cp = CPU;
305 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
306 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt);
307 CPU_STATS_EXIT_K();
308 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
310 if (vp->v_type == VFIFO) /* Backward compatibility */
311 fp->f_offset = cnt;
312 else if (((fp->f_flag & FAPPEND) == 0) ||
313 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
314 fp->f_offset = auio.uio_loffset;
315 fop_rwunlock(vp, rwflag, NULL);
317 if (error == EINTR && cnt != 0)
318 error = 0;
319 out:
320 if (in_crit)
321 nbl_end_crit(vp);
322 releasef(fdes);
323 if (error)
324 return (set_errno(error));
325 return (cnt);
328 ssize_t
329 pread(int fdes, void *cbuf, size_t count, off_t offset)
331 struct uio auio;
332 struct iovec aiov;
333 file_t *fp;
334 register vnode_t *vp;
335 struct cpu *cp;
336 int fflag, ioflag, rwflag;
337 ssize_t bcount;
338 int error = 0;
339 uoff_t fileoff = (uoff_t)(ulong_t)offset;
340 const uoff_t maxoff = MAXOFFSET_T;
341 int in_crit = 0;
343 if ((bcount = (ssize_t)count) < 0)
344 return (set_errno(EINVAL));
346 if ((fp = getf(fdes)) == NULL)
347 return (set_errno(EBADF));
348 if (((fflag = fp->f_flag) & (FREAD)) == 0) {
349 error = EBADF;
350 goto out;
353 rwflag = 0;
354 vp = fp->f_vnode;
356 if (vp->v_type == VREG) {
358 if (bcount == 0)
359 goto out;
362 * Return EINVAL if an invalid offset comes to pread.
363 * Negative offset from user will cause this error.
366 if (fileoff > maxoff) {
367 error = EINVAL;
368 goto out;
371 * Limit offset such that we don't read or write
372 * a file beyond the maximum offset representable in
373 * an off_t structure.
375 if (fileoff + bcount > maxoff)
376 bcount = (ssize_t)((offset_t)maxoff - fileoff);
377 } else if (vp->v_type == VFIFO) {
378 error = ESPIPE;
379 goto out;
383 * We have to enter the critical region before calling fop_rwlock
384 * to avoid a deadlock with ufs.
386 if (nbl_need_check(vp)) {
387 int svmand;
389 nbl_start_crit(vp, RW_READER);
390 in_crit = 1;
391 error = nbl_svmand(vp, fp->f_cred, &svmand);
392 if (error != 0)
393 goto out;
394 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
395 NULL)) {
396 error = EACCES;
397 goto out;
401 aiov.iov_base = cbuf;
402 aiov.iov_len = bcount;
403 (void) fop_rwlock(vp, rwflag, NULL);
404 if (vp->v_type == VREG && fileoff == (uoff_t)maxoff) {
405 struct vattr va;
406 va.va_mask = VATTR_SIZE;
407 if ((error = fop_getattr(vp, &va, 0, fp->f_cred, NULL))) {
408 fop_rwunlock(vp, rwflag, NULL);
409 goto out;
411 fop_rwunlock(vp, rwflag, NULL);
414 * We have to return EOF if fileoff is >= file size.
416 if (fileoff >= va.va_size) {
417 bcount = 0;
418 goto out;
422 * File is greater than or equal to maxoff and therefore
423 * we return EOVERFLOW.
425 error = EOVERFLOW;
426 goto out;
428 auio.uio_loffset = fileoff;
429 auio.uio_iov = &aiov;
430 auio.uio_iovcnt = 1;
431 auio.uio_resid = bcount;
432 auio.uio_segflg = UIO_USERSPACE;
433 auio.uio_llimit = MAXOFFSET_T;
434 auio.uio_fmode = fflag;
435 auio.uio_extflg = UIO_COPY_CACHED;
437 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
439 /* If read sync is not asked for, filter sync flags */
440 if ((ioflag & FRSYNC) == 0)
441 ioflag &= ~(FSYNC|FDSYNC);
442 error = fop_read(vp, &auio, ioflag, fp->f_cred, NULL);
443 bcount -= auio.uio_resid;
444 CPU_STATS_ENTER_K();
445 cp = CPU;
446 CPU_STATS_ADDQ(cp, sys, sysread, 1);
447 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
448 CPU_STATS_EXIT_K();
449 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
450 fop_rwunlock(vp, rwflag, NULL);
452 if (error == EINTR && bcount != 0)
453 error = 0;
454 out:
455 if (in_crit)
456 nbl_end_crit(vp);
457 releasef(fdes);
458 if (error)
459 return (set_errno(error));
460 return (bcount);
463 ssize_t
464 pwrite(int fdes, void *cbuf, size_t count, off_t offset)
466 struct uio auio;
467 struct iovec aiov;
468 file_t *fp;
469 register vnode_t *vp;
470 struct cpu *cp;
471 int fflag, ioflag, rwflag;
472 ssize_t bcount;
473 int error = 0;
474 uoff_t fileoff = (uoff_t)(ulong_t)offset;
475 uoff_t maxoff = MAXOFFSET_T;
476 int in_crit = 0;
478 if ((bcount = (ssize_t)count) < 0)
479 return (set_errno(EINVAL));
480 if ((fp = getf(fdes)) == NULL)
481 return (set_errno(EBADF));
482 if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
483 error = EBADF;
484 goto out;
487 rwflag = 1;
488 vp = fp->f_vnode;
490 if (vp->v_type == VREG) {
492 if (bcount == 0)
493 goto out;
496 * return EINVAL for offsets that cannot be
497 * represented in an off_t.
499 if (fileoff > maxoff) {
500 error = EINVAL;
501 goto out;
504 * Take appropriate action if we are trying to write above the
505 * resource limit.
507 if (fileoff >= curproc->p_fsz_ctl) {
508 mutex_enter(&curproc->p_lock);
509 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
510 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
511 mutex_exit(&curproc->p_lock);
513 error = EFBIG;
514 goto out;
517 * Don't allow pwrite to cause file sizes to exceed
518 * maxoff.
520 if (fileoff == maxoff) {
521 error = EFBIG;
522 goto out;
524 if (fileoff + count > maxoff)
525 bcount = (ssize_t)((uoff_t)maxoff - fileoff);
526 } else if (vp->v_type == VFIFO) {
527 error = ESPIPE;
528 goto out;
532 * We have to enter the critical region before calling fop_rwlock
533 * to avoid a deadlock with ufs.
535 if (nbl_need_check(vp)) {
536 int svmand;
538 nbl_start_crit(vp, RW_READER);
539 in_crit = 1;
540 error = nbl_svmand(vp, fp->f_cred, &svmand);
541 if (error != 0)
542 goto out;
543 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
544 NULL)) {
545 error = EACCES;
546 goto out;
550 aiov.iov_base = cbuf;
551 aiov.iov_len = bcount;
552 (void) fop_rwlock(vp, rwflag, NULL);
553 auio.uio_loffset = fileoff;
554 auio.uio_iov = &aiov;
555 auio.uio_iovcnt = 1;
556 auio.uio_resid = bcount;
557 auio.uio_segflg = UIO_USERSPACE;
558 auio.uio_llimit = curproc->p_fsz_ctl;
559 auio.uio_fmode = fflag;
560 auio.uio_extflg = UIO_COPY_CACHED;
563 * The SUSv4 POSIX specification states:
564 * The pwrite() function shall be equivalent to write(), except
565 * that it writes into a given position and does not change
566 * the file offset (regardless of whether O_APPEND is set).
567 * To make this be true, we omit the FAPPEND flag from ioflag.
569 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
571 error = fop_write(vp, &auio, ioflag, fp->f_cred, NULL);
572 bcount -= auio.uio_resid;
573 CPU_STATS_ENTER_K();
574 cp = CPU;
575 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
576 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
577 CPU_STATS_EXIT_K();
578 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
579 fop_rwunlock(vp, rwflag, NULL);
581 if (error == EINTR && bcount != 0)
582 error = 0;
583 out:
584 if (in_crit)
585 nbl_end_crit(vp);
586 releasef(fdes);
587 if (error)
588 return (set_errno(error));
589 return (bcount);
592 ssize_t
593 readv(int fdes, struct iovec *iovp, int iovcnt)
595 struct uio auio;
596 struct iovec buf[IOV_MAX_STACK], *aiov = buf;
597 int aiovlen = 0;
598 file_t *fp;
599 register vnode_t *vp;
600 struct cpu *cp;
601 int fflag, ioflag, rwflag;
602 ssize_t count, bcount;
603 int error = 0;
604 int i;
605 uoff_t fileoff;
606 int in_crit = 0;
608 if (iovcnt <= 0 || iovcnt > IOV_MAX)
609 return (set_errno(EINVAL));
611 if (iovcnt > IOV_MAX_STACK) {
612 aiovlen = iovcnt * sizeof (iovec_t);
613 aiov = kmem_alloc(aiovlen, KM_SLEEP);
616 #ifdef _SYSCALL32_IMPL
618 * 32-bit callers need to have their iovec expanded,
619 * while ensuring that they can't move more than 2Gbytes
620 * of data in a single call.
622 if (get_udatamodel() == DATAMODEL_ILP32) {
623 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
624 int aiov32len;
625 ssize32_t count32;
627 aiov32len = iovcnt * sizeof (iovec32_t);
628 if (aiovlen != 0)
629 aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
631 if (copyin(iovp, aiov32, aiov32len)) {
632 if (aiovlen != 0) {
633 kmem_free(aiov32, aiov32len);
634 kmem_free(aiov, aiovlen);
636 return (set_errno(EFAULT));
639 count32 = 0;
640 for (i = 0; i < iovcnt; i++) {
641 ssize32_t iovlen32 = aiov32[i].iov_len;
642 count32 += iovlen32;
643 if (iovlen32 < 0 || count32 < 0) {
644 if (aiovlen != 0) {
645 kmem_free(aiov32, aiov32len);
646 kmem_free(aiov, aiovlen);
648 return (set_errno(EINVAL));
650 aiov[i].iov_len = iovlen32;
651 aiov[i].iov_base =
652 (caddr_t)(uintptr_t)aiov32[i].iov_base;
655 if (aiovlen != 0)
656 kmem_free(aiov32, aiov32len);
657 } else
658 #endif
659 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
660 if (aiovlen != 0)
661 kmem_free(aiov, aiovlen);
662 return (set_errno(EFAULT));
665 count = 0;
666 for (i = 0; i < iovcnt; i++) {
667 ssize_t iovlen = aiov[i].iov_len;
668 count += iovlen;
669 if (iovlen < 0 || count < 0) {
670 if (aiovlen != 0)
671 kmem_free(aiov, aiovlen);
672 return (set_errno(EINVAL));
675 if ((fp = getf(fdes)) == NULL) {
676 if (aiovlen != 0)
677 kmem_free(aiov, aiovlen);
678 return (set_errno(EBADF));
680 if (((fflag = fp->f_flag) & FREAD) == 0) {
681 error = EBADF;
682 goto out;
684 vp = fp->f_vnode;
685 if (vp->v_type == VREG && count == 0) {
686 goto out;
689 rwflag = 0;
692 * We have to enter the critical region before calling fop_rwlock
693 * to avoid a deadlock with ufs.
695 if (nbl_need_check(vp)) {
696 int svmand;
698 nbl_start_crit(vp, RW_READER);
699 in_crit = 1;
700 error = nbl_svmand(vp, fp->f_cred, &svmand);
701 if (error != 0)
702 goto out;
703 if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand,
704 NULL)) {
705 error = EACCES;
706 goto out;
710 (void) fop_rwlock(vp, rwflag, NULL);
711 fileoff = fp->f_offset;
714 * Behaviour is same as read. Please see comments in read.
717 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
718 struct vattr va;
719 va.va_mask = VATTR_SIZE;
720 if ((error = fop_getattr(vp, &va, 0, fp->f_cred, NULL))) {
721 fop_rwunlock(vp, rwflag, NULL);
722 goto out;
724 if (fileoff >= va.va_size) {
725 fop_rwunlock(vp, rwflag, NULL);
726 count = 0;
727 goto out;
728 } else {
729 fop_rwunlock(vp, rwflag, NULL);
730 error = EOVERFLOW;
731 goto out;
734 if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) {
735 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
737 auio.uio_loffset = fileoff;
738 auio.uio_iov = aiov;
739 auio.uio_iovcnt = iovcnt;
740 auio.uio_resid = bcount = count;
741 auio.uio_segflg = UIO_USERSPACE;
742 auio.uio_llimit = MAXOFFSET_T;
743 auio.uio_fmode = fflag;
744 if (bcount <= copyout_max_cached)
745 auio.uio_extflg = UIO_COPY_CACHED;
746 else
747 auio.uio_extflg = UIO_COPY_DEFAULT;
750 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
752 /* If read sync is not asked for, filter sync flags */
753 if ((ioflag & FRSYNC) == 0)
754 ioflag &= ~(FSYNC|FDSYNC);
755 error = fop_read(vp, &auio, ioflag, fp->f_cred, NULL);
756 count -= auio.uio_resid;
757 CPU_STATS_ENTER_K();
758 cp = CPU;
759 CPU_STATS_ADDQ(cp, sys, sysread, 1);
760 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
761 CPU_STATS_EXIT_K();
762 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
764 if (vp->v_type == VFIFO) /* Backward compatibility */
765 fp->f_offset = count;
766 else if (((fp->f_flag & FAPPEND) == 0) ||
767 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
768 fp->f_offset = auio.uio_loffset;
770 fop_rwunlock(vp, rwflag, NULL);
772 if (error == EINTR && count != 0)
773 error = 0;
774 out:
775 if (in_crit)
776 nbl_end_crit(vp);
777 releasef(fdes);
778 if (aiovlen != 0)
779 kmem_free(aiov, aiovlen);
780 if (error)
781 return (set_errno(error));
782 return (count);
785 ssize_t
786 writev(int fdes, struct iovec *iovp, int iovcnt)
788 struct uio auio;
789 struct iovec buf[IOV_MAX_STACK], *aiov = buf;
790 int aiovlen = 0;
791 file_t *fp;
792 register vnode_t *vp;
793 struct cpu *cp;
794 int fflag, ioflag, rwflag;
795 ssize_t count, bcount;
796 int error = 0;
797 int i;
798 uoff_t fileoff;
799 int in_crit = 0;
801 if (iovcnt <= 0 || iovcnt > IOV_MAX)
802 return (set_errno(EINVAL));
804 if (iovcnt > IOV_MAX_STACK) {
805 aiovlen = iovcnt * sizeof (iovec_t);
806 aiov = kmem_alloc(aiovlen, KM_SLEEP);
809 #ifdef _SYSCALL32_IMPL
811 * 32-bit callers need to have their iovec expanded,
812 * while ensuring that they can't move more than 2Gbytes
813 * of data in a single call.
815 if (get_udatamodel() == DATAMODEL_ILP32) {
816 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
817 int aiov32len;
818 ssize32_t count32;
820 aiov32len = iovcnt * sizeof (iovec32_t);
821 if (aiovlen != 0)
822 aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
824 if (copyin(iovp, aiov32, aiov32len)) {
825 if (aiovlen != 0) {
826 kmem_free(aiov32, aiov32len);
827 kmem_free(aiov, aiovlen);
829 return (set_errno(EFAULT));
832 count32 = 0;
833 for (i = 0; i < iovcnt; i++) {
834 ssize32_t iovlen = aiov32[i].iov_len;
835 count32 += iovlen;
836 if (iovlen < 0 || count32 < 0) {
837 if (aiovlen != 0) {
838 kmem_free(aiov32, aiov32len);
839 kmem_free(aiov, aiovlen);
841 return (set_errno(EINVAL));
843 aiov[i].iov_len = iovlen;
844 aiov[i].iov_base =
845 (caddr_t)(uintptr_t)aiov32[i].iov_base;
847 if (aiovlen != 0)
848 kmem_free(aiov32, aiov32len);
849 } else
850 #endif
851 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
852 if (aiovlen != 0)
853 kmem_free(aiov, aiovlen);
854 return (set_errno(EFAULT));
857 count = 0;
858 for (i = 0; i < iovcnt; i++) {
859 ssize_t iovlen = aiov[i].iov_len;
860 count += iovlen;
861 if (iovlen < 0 || count < 0) {
862 if (aiovlen != 0)
863 kmem_free(aiov, aiovlen);
864 return (set_errno(EINVAL));
867 if ((fp = getf(fdes)) == NULL) {
868 if (aiovlen != 0)
869 kmem_free(aiov, aiovlen);
870 return (set_errno(EBADF));
872 if (((fflag = fp->f_flag) & FWRITE) == 0) {
873 error = EBADF;
874 goto out;
876 vp = fp->f_vnode;
877 if (vp->v_type == VREG && count == 0) {
878 goto out;
881 rwflag = 1;
884 * We have to enter the critical region before calling fop_rwlock
885 * to avoid a deadlock with ufs.
887 if (nbl_need_check(vp)) {
888 int svmand;
890 nbl_start_crit(vp, RW_READER);
891 in_crit = 1;
892 error = nbl_svmand(vp, fp->f_cred, &svmand);
893 if (error != 0)
894 goto out;
895 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand,
896 NULL)) {
897 error = EACCES;
898 goto out;
902 (void) fop_rwlock(vp, rwflag, NULL);
904 fileoff = fp->f_offset;
907 * Behaviour is same as write. Please see comments for write.
910 if (vp->v_type == VREG) {
911 if (fileoff >= curproc->p_fsz_ctl) {
912 fop_rwunlock(vp, rwflag, NULL);
913 mutex_enter(&curproc->p_lock);
914 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
915 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
916 mutex_exit(&curproc->p_lock);
917 error = EFBIG;
918 goto out;
920 if (fileoff >= OFFSET_MAX(fp)) {
921 fop_rwunlock(vp, rwflag, NULL);
922 error = EFBIG;
923 goto out;
925 if (fileoff + count > OFFSET_MAX(fp))
926 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
928 auio.uio_loffset = fileoff;
929 auio.uio_iov = aiov;
930 auio.uio_iovcnt = iovcnt;
931 auio.uio_resid = bcount = count;
932 auio.uio_segflg = UIO_USERSPACE;
933 auio.uio_llimit = curproc->p_fsz_ctl;
934 auio.uio_fmode = fflag;
935 auio.uio_extflg = UIO_COPY_DEFAULT;
937 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
939 error = fop_write(vp, &auio, ioflag, fp->f_cred, NULL);
940 count -= auio.uio_resid;
941 CPU_STATS_ENTER_K();
942 cp = CPU;
943 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
944 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
945 CPU_STATS_EXIT_K();
946 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
948 if (vp->v_type == VFIFO) /* Backward compatibility */
949 fp->f_offset = count;
950 else if (((fp->f_flag & FAPPEND) == 0) ||
951 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
952 fp->f_offset = auio.uio_loffset;
953 fop_rwunlock(vp, rwflag, NULL);
955 if (error == EINTR && count != 0)
956 error = 0;
957 out:
958 if (in_crit)
959 nbl_end_crit(vp);
960 releasef(fdes);
961 if (aiovlen != 0)
962 kmem_free(aiov, aiovlen);
963 if (error)
964 return (set_errno(error));
965 return (count);
968 ssize_t
969 preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
970 off_t extended_offset)
972 struct uio auio;
973 struct iovec buf[IOV_MAX_STACK], *aiov = buf;
974 int aiovlen = 0;
975 file_t *fp;
976 register vnode_t *vp;
977 struct cpu *cp;
978 int fflag, ioflag, rwflag;
979 ssize_t count, bcount;
980 int error = 0;
981 int i;
983 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
984 uoff_t fileoff = ((uoff_t)extended_offset << 32) |
985 (uoff_t)offset;
986 #else /* _SYSCALL32_IMPL || _ILP32 */
987 uoff_t fileoff = (uoff_t)(ulong_t)offset;
988 #endif /* _SYSCALL32_IMPR || _ILP32 */
989 const uoff_t maxoff = MAXOFFSET_T;
991 int in_crit = 0;
993 if (iovcnt <= 0 || iovcnt > IOV_MAX)
994 return (set_errno(EINVAL));
996 if (iovcnt > IOV_MAX_STACK) {
997 aiovlen = iovcnt * sizeof (iovec_t);
998 aiov = kmem_alloc(aiovlen, KM_SLEEP);
1001 #ifdef _SYSCALL32_IMPL
1003 * 32-bit callers need to have their iovec expanded,
1004 * while ensuring that they can't move more than 2Gbytes
1005 * of data in a single call.
1007 if (get_udatamodel() == DATAMODEL_ILP32) {
1008 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1009 int aiov32len;
1010 ssize32_t count32;
1012 aiov32len = iovcnt * sizeof (iovec32_t);
1013 if (aiovlen != 0)
1014 aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
1016 if (copyin(iovp, aiov32, aiov32len)) {
1017 if (aiovlen != 0) {
1018 kmem_free(aiov32, aiov32len);
1019 kmem_free(aiov, aiovlen);
1021 return (set_errno(EFAULT));
1024 count32 = 0;
1025 for (i = 0; i < iovcnt; i++) {
1026 ssize32_t iovlen32 = aiov32[i].iov_len;
1027 count32 += iovlen32;
1028 if (iovlen32 < 0 || count32 < 0) {
1029 if (aiovlen != 0) {
1030 kmem_free(aiov32, aiov32len);
1031 kmem_free(aiov, aiovlen);
1033 return (set_errno(EINVAL));
1035 aiov[i].iov_len = iovlen32;
1036 aiov[i].iov_base =
1037 (caddr_t)(uintptr_t)aiov32[i].iov_base;
1039 if (aiovlen != 0)
1040 kmem_free(aiov32, aiov32len);
1041 } else
1042 #endif /* _SYSCALL32_IMPL */
1043 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1044 if (aiovlen != 0)
1045 kmem_free(aiov, aiovlen);
1046 return (set_errno(EFAULT));
1049 count = 0;
1050 for (i = 0; i < iovcnt; i++) {
1051 ssize_t iovlen = aiov[i].iov_len;
1052 count += iovlen;
1053 if (iovlen < 0 || count < 0) {
1054 if (aiovlen != 0)
1055 kmem_free(aiov, aiovlen);
1056 return (set_errno(EINVAL));
1060 if ((bcount = (ssize_t)count) < 0) {
1061 if (aiovlen != 0)
1062 kmem_free(aiov, aiovlen);
1063 return (set_errno(EINVAL));
1065 if ((fp = getf(fdes)) == NULL) {
1066 if (aiovlen != 0)
1067 kmem_free(aiov, aiovlen);
1068 return (set_errno(EBADF));
1070 if (((fflag = fp->f_flag) & FREAD) == 0) {
1071 error = EBADF;
1072 goto out;
1074 vp = fp->f_vnode;
1075 rwflag = 0;
1076 if (vp->v_type == VREG) {
1078 if (bcount == 0)
1079 goto out;
1082 * return EINVAL for offsets that cannot be
1083 * represented in an off_t.
1085 if (fileoff > maxoff) {
1086 error = EINVAL;
1087 goto out;
1090 if (fileoff + bcount > maxoff)
1091 bcount = (ssize_t)((uoff_t)maxoff - fileoff);
1092 } else if (vp->v_type == VFIFO) {
1093 error = ESPIPE;
1094 goto out;
1097 * We have to enter the critical region before calling fop_rwlock
1098 * to avoid a deadlock with ufs.
1100 if (nbl_need_check(vp)) {
1101 int svmand;
1103 nbl_start_crit(vp, RW_READER);
1104 in_crit = 1;
1105 error = nbl_svmand(vp, fp->f_cred, &svmand);
1106 if (error != 0)
1107 goto out;
1108 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1109 NULL)) {
1110 error = EACCES;
1111 goto out;
1115 (void) fop_rwlock(vp, rwflag, NULL);
1118 * Behaviour is same as read(2). Please see comments in
1119 * read(2).
1122 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
1123 struct vattr va;
1124 va.va_mask = VATTR_SIZE;
1125 if ((error =
1126 fop_getattr(vp, &va, 0, fp->f_cred, NULL))) {
1127 fop_rwunlock(vp, rwflag, NULL);
1128 goto out;
1130 if (fileoff >= va.va_size) {
1131 fop_rwunlock(vp, rwflag, NULL);
1132 count = 0;
1133 goto out;
1134 } else {
1135 fop_rwunlock(vp, rwflag, NULL);
1136 error = EOVERFLOW;
1137 goto out;
1140 if ((vp->v_type == VREG) &&
1141 (fileoff + count > OFFSET_MAX(fp))) {
1142 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1144 auio.uio_loffset = fileoff;
1145 auio.uio_iov = aiov;
1146 auio.uio_iovcnt = iovcnt;
1147 auio.uio_resid = bcount = count;
1148 auio.uio_segflg = UIO_USERSPACE;
1149 auio.uio_llimit = MAXOFFSET_T;
1150 auio.uio_fmode = fflag;
1151 if (bcount <= copyout_max_cached)
1152 auio.uio_extflg = UIO_COPY_CACHED;
1153 else
1154 auio.uio_extflg = UIO_COPY_DEFAULT;
1156 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1157 error = fop_read(vp, &auio, ioflag, fp->f_cred, NULL);
1158 count -= auio.uio_resid;
1159 CPU_STATS_ENTER_K();
1160 cp = CPU;
1161 CPU_STATS_ADDQ(cp, sys, sysread, 1);
1162 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
1163 CPU_STATS_EXIT_K();
1164 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1166 fop_rwunlock(vp, rwflag, NULL);
1168 if (error == EINTR && count != 0)
1169 error = 0;
1170 out:
1171 if (in_crit)
1172 nbl_end_crit(vp);
1173 releasef(fdes);
1174 if (aiovlen != 0)
1175 kmem_free(aiov, aiovlen);
1176 if (error)
1177 return (set_errno(error));
1178 return (count);
1181 ssize_t
1182 pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
1183 off_t extended_offset)
1185 struct uio auio;
1186 struct iovec buf[IOV_MAX_STACK], *aiov = buf;
1187 int aiovlen = 0;
1188 file_t *fp;
1189 register vnode_t *vp;
1190 struct cpu *cp;
1191 int fflag, ioflag, rwflag;
1192 ssize_t count, bcount;
1193 int error = 0;
1194 int i;
1196 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1197 uoff_t fileoff = ((uoff_t)extended_offset << 32) |
1198 (uoff_t)offset;
1199 #else /* _SYSCALL32_IMPL || _ILP32 */
1200 uoff_t fileoff = (uoff_t)(ulong_t)offset;
1201 #endif /* _SYSCALL32_IMPR || _ILP32 */
1202 const uoff_t maxoff = MAXOFFSET_T;
1204 int in_crit = 0;
1206 if (iovcnt <= 0 || iovcnt > IOV_MAX)
1207 return (set_errno(EINVAL));
1209 if (iovcnt > IOV_MAX_STACK) {
1210 aiovlen = iovcnt * sizeof (iovec_t);
1211 aiov = kmem_alloc(aiovlen, KM_SLEEP);
1214 #ifdef _SYSCALL32_IMPL
1216 * 32-bit callers need to have their iovec expanded,
1217 * while ensuring that they can't move more than 2Gbytes
1218 * of data in a single call.
1220 if (get_udatamodel() == DATAMODEL_ILP32) {
1221 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1222 int aiov32len;
1223 ssize32_t count32;
1225 aiov32len = iovcnt * sizeof (iovec32_t);
1226 if (aiovlen != 0)
1227 aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
1229 if (copyin(iovp, aiov32, aiov32len)) {
1230 if (aiovlen != 0) {
1231 kmem_free(aiov32, aiov32len);
1232 kmem_free(aiov, aiovlen);
1234 return (set_errno(EFAULT));
1237 count32 = 0;
1238 for (i = 0; i < iovcnt; i++) {
1239 ssize32_t iovlen32 = aiov32[i].iov_len;
1240 count32 += iovlen32;
1241 if (iovlen32 < 0 || count32 < 0) {
1242 if (aiovlen != 0) {
1243 kmem_free(aiov32, aiov32len);
1244 kmem_free(aiov, aiovlen);
1246 return (set_errno(EINVAL));
1248 aiov[i].iov_len = iovlen32;
1249 aiov[i].iov_base =
1250 (caddr_t)(uintptr_t)aiov32[i].iov_base;
1252 if (aiovlen != 0)
1253 kmem_free(aiov32, aiov32len);
1254 } else
1255 #endif /* _SYSCALL32_IMPL */
1256 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1257 if (aiovlen != 0)
1258 kmem_free(aiov, aiovlen);
1259 return (set_errno(EFAULT));
1262 count = 0;
1263 for (i = 0; i < iovcnt; i++) {
1264 ssize_t iovlen = aiov[i].iov_len;
1265 count += iovlen;
1266 if (iovlen < 0 || count < 0) {
1267 if (aiovlen != 0)
1268 kmem_free(aiov, aiovlen);
1269 return (set_errno(EINVAL));
1273 if ((bcount = (ssize_t)count) < 0) {
1274 if (aiovlen != 0)
1275 kmem_free(aiov, aiovlen);
1276 return (set_errno(EINVAL));
1278 if ((fp = getf(fdes)) == NULL) {
1279 if (aiovlen != 0)
1280 kmem_free(aiov, aiovlen);
1281 return (set_errno(EBADF));
1283 if (((fflag = fp->f_flag) & FWRITE) == 0) {
1284 error = EBADF;
1285 goto out;
1287 vp = fp->f_vnode;
1288 rwflag = 1;
1289 if (vp->v_type == VREG) {
1291 if (bcount == 0)
1292 goto out;
1295 * return EINVAL for offsets that cannot be
1296 * represented in an off_t.
1298 if (fileoff > maxoff) {
1299 error = EINVAL;
1300 goto out;
1303 * Take appropriate action if we are trying
1304 * to write above the resource limit.
1306 if (fileoff >= curproc->p_fsz_ctl) {
1307 mutex_enter(&curproc->p_lock);
1309 * Return value ignored because it lists
1310 * actions taken, but we are in an error case.
1311 * We don't have any actions that depend on
1312 * what could happen in this call, so we ignore
1313 * the return value.
1315 (void) rctl_action(
1316 rctlproc_legacy[RLIMIT_FSIZE],
1317 curproc->p_rctls, curproc,
1318 RCA_UNSAFE_SIGINFO);
1319 mutex_exit(&curproc->p_lock);
1321 error = EFBIG;
1322 goto out;
1325 * Don't allow pwritev to cause file sizes to exceed
1326 * maxoff.
1328 if (fileoff == maxoff) {
1329 error = EFBIG;
1330 goto out;
1333 if (fileoff + bcount > maxoff)
1334 bcount = (ssize_t)((uoff_t)maxoff - fileoff);
1335 } else if (vp->v_type == VFIFO) {
1336 error = ESPIPE;
1337 goto out;
1340 * We have to enter the critical region before calling fop_rwlock
1341 * to avoid a deadlock with ufs.
1343 if (nbl_need_check(vp)) {
1344 int svmand;
1346 nbl_start_crit(vp, RW_READER);
1347 in_crit = 1;
1348 error = nbl_svmand(vp, fp->f_cred, &svmand);
1349 if (error != 0)
1350 goto out;
1351 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1352 NULL)) {
1353 error = EACCES;
1354 goto out;
1358 (void) fop_rwlock(vp, rwflag, NULL);
1362 * Behaviour is same as write(2). Please see comments for
1363 * write(2).
1366 if (vp->v_type == VREG) {
1367 if (fileoff >= curproc->p_fsz_ctl) {
1368 fop_rwunlock(vp, rwflag, NULL);
1369 mutex_enter(&curproc->p_lock);
1370 /* see above rctl_action comment */
1371 (void) rctl_action(
1372 rctlproc_legacy[RLIMIT_FSIZE],
1373 curproc->p_rctls,
1374 curproc, RCA_UNSAFE_SIGINFO);
1375 mutex_exit(&curproc->p_lock);
1376 error = EFBIG;
1377 goto out;
1379 if (fileoff >= OFFSET_MAX(fp)) {
1380 fop_rwunlock(vp, rwflag, NULL);
1381 error = EFBIG;
1382 goto out;
1384 if (fileoff + count > OFFSET_MAX(fp))
1385 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1388 auio.uio_loffset = fileoff;
1389 auio.uio_iov = aiov;
1390 auio.uio_iovcnt = iovcnt;
1391 auio.uio_resid = bcount = count;
1392 auio.uio_segflg = UIO_USERSPACE;
1393 auio.uio_llimit = curproc->p_fsz_ctl;
1394 auio.uio_fmode = fflag;
1395 auio.uio_extflg = UIO_COPY_CACHED;
1396 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1397 error = fop_write(vp, &auio, ioflag, fp->f_cred, NULL);
1398 count -= auio.uio_resid;
1399 CPU_STATS_ENTER_K();
1400 cp = CPU;
1401 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1402 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
1403 CPU_STATS_EXIT_K();
1404 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1406 fop_rwunlock(vp, rwflag, NULL);
1408 if (error == EINTR && count != 0)
1409 error = 0;
1410 out:
1411 if (in_crit)
1412 nbl_end_crit(vp);
1413 releasef(fdes);
1414 if (aiovlen != 0)
1415 kmem_free(aiov, aiovlen);
1416 if (error)
1417 return (set_errno(error));
1418 return (count);
1421 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1424 * This syscall supplies 64-bit file offsets to 32-bit applications only.
1426 ssize32_t
1427 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1428 uint32_t offset_2)
1430 struct uio auio;
1431 struct iovec aiov;
1432 file_t *fp;
1433 register vnode_t *vp;
1434 struct cpu *cp;
1435 int fflag, ioflag, rwflag;
1436 ssize_t bcount;
1437 int error = 0;
1438 uoff_t fileoff;
1439 int in_crit = 0;
1441 #if defined(_LITTLE_ENDIAN)
1442 fileoff = ((uoff_t)offset_2 << 32) | (uoff_t)offset_1;
1443 #else
1444 fileoff = ((uoff_t)offset_1 << 32) | (uoff_t)offset_2;
1445 #endif
1447 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
1448 return (set_errno(EINVAL));
1450 if ((fp = getf(fdes)) == NULL)
1451 return (set_errno(EBADF));
1452 if (((fflag = fp->f_flag) & (FREAD)) == 0) {
1453 error = EBADF;
1454 goto out;
1457 rwflag = 0;
1458 vp = fp->f_vnode;
1460 if (vp->v_type == VREG) {
1462 if (bcount == 0)
1463 goto out;
1466 * Same as pread. See comments in pread.
1469 if (fileoff > MAXOFFSET_T) {
1470 error = EINVAL;
1471 goto out;
1473 if (fileoff + bcount > MAXOFFSET_T)
1474 bcount = (ssize_t)(MAXOFFSET_T - fileoff);
1475 } else if (vp->v_type == VFIFO) {
1476 error = ESPIPE;
1477 goto out;
1481 * We have to enter the critical region before calling fop_rwlock
1482 * to avoid a deadlock with ufs.
1484 if (nbl_need_check(vp)) {
1485 int svmand;
1487 nbl_start_crit(vp, RW_READER);
1488 in_crit = 1;
1489 error = nbl_svmand(vp, fp->f_cred, &svmand);
1490 if (error != 0)
1491 goto out;
1492 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
1493 NULL)) {
1494 error = EACCES;
1495 goto out;
1499 aiov.iov_base = cbuf;
1500 aiov.iov_len = bcount;
1501 (void) fop_rwlock(vp, rwflag, NULL);
1502 auio.uio_loffset = fileoff;
1505 * Note: File size can never be greater than MAXOFFSET_T.
1506 * If ever we start supporting 128 bit files the code
1507 * similar to the one in pread at this place should be here.
1508 * Here we avoid the unnecessary fop_getattr() when we
1509 * know that fileoff == MAXOFFSET_T implies that it is always
1510 * greater than or equal to file size.
1512 auio.uio_iov = &aiov;
1513 auio.uio_iovcnt = 1;
1514 auio.uio_resid = bcount;
1515 auio.uio_segflg = UIO_USERSPACE;
1516 auio.uio_llimit = MAXOFFSET_T;
1517 auio.uio_fmode = fflag;
1518 auio.uio_extflg = UIO_COPY_CACHED;
1520 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1522 /* If read sync is not asked for, filter sync flags */
1523 if ((ioflag & FRSYNC) == 0)
1524 ioflag &= ~(FSYNC|FDSYNC);
1525 error = fop_read(vp, &auio, ioflag, fp->f_cred, NULL);
1526 bcount -= auio.uio_resid;
1527 CPU_STATS_ENTER_K();
1528 cp = CPU;
1529 CPU_STATS_ADDQ(cp, sys, sysread, 1);
1530 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
1531 CPU_STATS_EXIT_K();
1532 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
1533 fop_rwunlock(vp, rwflag, NULL);
1535 if (error == EINTR && bcount != 0)
1536 error = 0;
1537 out:
1538 if (in_crit)
1539 nbl_end_crit(vp);
1540 releasef(fdes);
1541 if (error)
1542 return (set_errno(error));
1543 return (bcount);
1547 * This syscall supplies 64-bit file offsets to 32-bit applications only.
1549 ssize32_t
1550 pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1551 uint32_t offset_2)
1553 struct uio auio;
1554 struct iovec aiov;
1555 file_t *fp;
1556 register vnode_t *vp;
1557 struct cpu *cp;
1558 int fflag, ioflag, rwflag;
1559 ssize_t bcount;
1560 int error = 0;
1561 uoff_t fileoff;
1562 int in_crit = 0;
1564 #if defined(_LITTLE_ENDIAN)
1565 fileoff = ((uoff_t)offset_2 << 32) | (uoff_t)offset_1;
1566 #else
1567 fileoff = ((uoff_t)offset_1 << 32) | (uoff_t)offset_2;
1568 #endif
1570 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
1571 return (set_errno(EINVAL));
1572 if ((fp = getf(fdes)) == NULL)
1573 return (set_errno(EBADF));
1574 if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
1575 error = EBADF;
1576 goto out;
1579 rwflag = 1;
1580 vp = fp->f_vnode;
1582 if (vp->v_type == VREG) {
1584 if (bcount == 0)
1585 goto out;
1588 * See comments in pwrite.
1590 if (fileoff > MAXOFFSET_T) {
1591 error = EINVAL;
1592 goto out;
1594 if (fileoff >= curproc->p_fsz_ctl) {
1595 mutex_enter(&curproc->p_lock);
1596 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
1597 curproc->p_rctls, curproc, RCA_SAFE);
1598 mutex_exit(&curproc->p_lock);
1599 error = EFBIG;
1600 goto out;
1602 if (fileoff == MAXOFFSET_T) {
1603 error = EFBIG;
1604 goto out;
1606 if (fileoff + bcount > MAXOFFSET_T)
1607 bcount = (ssize_t)((uoff_t)MAXOFFSET_T - fileoff);
1608 } else if (vp->v_type == VFIFO) {
1609 error = ESPIPE;
1610 goto out;
1614 * We have to enter the critical region before calling fop_rwlock
1615 * to avoid a deadlock with ufs.
1617 if (nbl_need_check(vp)) {
1618 int svmand;
1620 nbl_start_crit(vp, RW_READER);
1621 in_crit = 1;
1622 error = nbl_svmand(vp, fp->f_cred, &svmand);
1623 if (error != 0)
1624 goto out;
1625 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
1626 NULL)) {
1627 error = EACCES;
1628 goto out;
1632 aiov.iov_base = cbuf;
1633 aiov.iov_len = bcount;
1634 (void) fop_rwlock(vp, rwflag, NULL);
1635 auio.uio_loffset = fileoff;
1636 auio.uio_iov = &aiov;
1637 auio.uio_iovcnt = 1;
1638 auio.uio_resid = bcount;
1639 auio.uio_segflg = UIO_USERSPACE;
1640 auio.uio_llimit = curproc->p_fsz_ctl;
1641 auio.uio_fmode = fflag;
1642 auio.uio_extflg = UIO_COPY_CACHED;
1645 * The SUSv4 POSIX specification states:
1646 * The pwrite() function shall be equivalent to write(), except
1647 * that it writes into a given position and does not change
1648 * the file offset (regardless of whether O_APPEND is set).
1649 * To make this be true, we omit the FAPPEND flag from ioflag.
1651 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1653 error = fop_write(vp, &auio, ioflag, fp->f_cred, NULL);
1654 bcount -= auio.uio_resid;
1655 CPU_STATS_ENTER_K();
1656 cp = CPU;
1657 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1658 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
1659 CPU_STATS_EXIT_K();
1660 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
1661 fop_rwunlock(vp, rwflag, NULL);
1663 if (error == EINTR && bcount != 0)
1664 error = 0;
1665 out:
1666 if (in_crit)
1667 nbl_end_crit(vp);
1668 releasef(fdes);
1669 if (error)
1670 return (set_errno(error));
1671 return (bcount);
1674 #endif /* _SYSCALL32_IMPL || _ILP32 */
1676 #ifdef _SYSCALL32_IMPL
1678 * Tail-call elimination of xxx32() down to xxx()
1680 * A number of xxx32 system calls take a len (or count) argument and
1681 * return a number in the range [0,len] or -1 on error.
1682 * Given an ssize32_t input len, the downcall xxx() will return
1683 * a 64-bit value that is -1 or in the range [0,len] which actually
1684 * is a proper return value for the xxx32 call. So even if the xxx32
1685 * calls can be considered as returning a ssize32_t, they are currently
1686 * declared as returning a ssize_t as this enables tail-call elimination.
1688 * The cast of len (or count) to ssize32_t is needed to ensure we pass
1689 * down negative input values as such and let the downcall handle error
1690 * reporting. Functions covered by this comments are:
1692 * rw.c: read32, write32, readv32, writev32.
1693 * socksyscall.c: recv32, recvfrom32, send32, sendto32.
1694 * readlink.c: readlink32.
1697 ssize_t
1698 read32(int32_t fdes, caddr32_t cbuf, size32_t count)
1700 return (read(fdes,
1701 (void *)(uintptr_t)cbuf, (ssize32_t)count));
1704 ssize_t
1705 write32(int32_t fdes, caddr32_t cbuf, size32_t count)
1707 return (write(fdes,
1708 (void *)(uintptr_t)cbuf, (ssize32_t)count));
1711 ssize_t
1712 readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
1714 return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt));
1717 ssize_t
1718 writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
1720 return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt));
1722 #endif /* _SYSCALL32_IMPL */