kernel - Use soft-busy in vop_helper_read_shortcut()
[dragonfly.git] / sys / kern / vfs_helper.c
blobcb8190ebecb17bcf6bedfcd84d413e4574f8b7dd
1 /*
2 * (The copyright below applies to ufs_access())
4 * Copyright (c) 1982, 1986, 1989, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95
37 * $DragonFly: src/sys/kern/vfs_helper.c,v 1.5 2008/05/25 18:34:46 dillon Exp $
40 #include "opt_quota.h"
41 #include "opt_suiddir.h"
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/conf.h>
46 #include <sys/kernel.h>
47 #include <sys/fcntl.h>
48 #include <sys/stat.h>
49 #include <sys/mount.h>
50 #include <sys/unistd.h>
51 #include <sys/vnode.h>
52 #include <sys/file.h> /* XXX */
53 #include <sys/proc.h>
54 #include <sys/priv.h>
55 #include <sys/jail.h>
56 #include <sys/sysctl.h>
57 #include <sys/sfbuf.h>
58 #include <vm/vm_extern.h>
59 #include <vm/vm_object.h>
60 #include <vm/vm_page2.h>
62 #ifdef LWBUF_IS_OPTIMAL
64 static int vm_read_shortcut_enable = 1;
65 SYSCTL_INT(_vm, OID_AUTO, read_shortcut_enable, CTLFLAG_RW,
66 &vm_read_shortcut_enable, 0, "Direct vm_object vop_read shortcut");
68 #endif
71 * vop_helper_access()
73 * Provide standard UNIX semanics for VOP_ACCESS, but without the quota
74 * code. This procedure was basically pulled out of UFS.
76 int
77 vop_helper_access(struct vop_access_args *ap, uid_t ino_uid, gid_t ino_gid,
78 mode_t ino_mode, u_int32_t ino_flags)
80 struct vnode *vp = ap->a_vp;
81 struct ucred *cred = ap->a_cred;
82 mode_t mask, mode = ap->a_mode;
83 gid_t *gp;
84 int i;
85 uid_t proc_uid;
86 gid_t proc_gid;
88 if (ap->a_flags & AT_EACCESS) {
89 proc_uid = cred->cr_uid;
90 proc_gid = cred->cr_gid;
91 } else {
92 proc_uid = cred->cr_ruid;
93 proc_gid = cred->cr_rgid;
97 * Disallow write attempts on read-only filesystems;
98 * unless the file is a socket, fifo, or a block or
99 * character device resident on the filesystem.
101 if (mode & VWRITE) {
102 switch (vp->v_type) {
103 case VDIR:
104 case VLNK:
105 case VREG:
106 case VDATABASE:
107 if (vp->v_mount->mnt_flag & MNT_RDONLY)
108 return (EROFS);
109 break;
110 default:
111 break;
115 /* If immutable bit set, nobody gets to write it. */
116 if ((mode & VWRITE) && (ino_flags & IMMUTABLE))
117 return (EPERM);
119 /* Otherwise, user id 0 always gets access. */
120 if (proc_uid == 0)
121 return (0);
123 mask = 0;
125 /* Otherwise, check the owner. */
126 if (proc_uid == ino_uid) {
127 if (mode & VEXEC)
128 mask |= S_IXUSR;
129 if (mode & VREAD)
130 mask |= S_IRUSR;
131 if (mode & VWRITE)
132 mask |= S_IWUSR;
133 return ((ino_mode & mask) == mask ? 0 : EACCES);
137 * Otherwise, check the groups.
138 * We must special-case the primary group to, if needed, check against
139 * the real gid and not the effective one.
141 if (proc_gid == ino_gid) {
142 if (mode & VEXEC)
143 mask |= S_IXGRP;
144 if (mode & VREAD)
145 mask |= S_IRGRP;
146 if (mode & VWRITE)
147 mask |= S_IWGRP;
148 return ((ino_mode & mask) == mask ? 0 : EACCES);
150 for (i = 1, gp = &cred->cr_groups[1]; i < cred->cr_ngroups; i++, gp++)
151 if (ino_gid == *gp) {
152 if (mode & VEXEC)
153 mask |= S_IXGRP;
154 if (mode & VREAD)
155 mask |= S_IRGRP;
156 if (mode & VWRITE)
157 mask |= S_IWGRP;
158 return ((ino_mode & mask) == mask ? 0 : EACCES);
161 /* Otherwise, check everyone else. */
162 if (mode & VEXEC)
163 mask |= S_IXOTH;
164 if (mode & VREAD)
165 mask |= S_IROTH;
166 if (mode & VWRITE)
167 mask |= S_IWOTH;
168 return ((ino_mode & mask) == mask ? 0 : EACCES);
172 vop_helper_setattr_flags(u_int32_t *ino_flags, u_int32_t vaflags,
173 uid_t uid, struct ucred *cred)
175 int error;
178 * If uid doesn't match only a privileged user can change the flags
180 if (cred->cr_uid != uid &&
181 (error = priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0))) {
182 return(error);
184 if (cred->cr_uid == 0 &&
185 (!jailed(cred)|| jail_chflags_allowed)) {
186 if ((*ino_flags & (SF_NOUNLINK|SF_IMMUTABLE|SF_APPEND)) &&
187 securelevel > 0)
188 return (EPERM);
189 *ino_flags = vaflags;
190 } else {
191 if (*ino_flags & (SF_NOUNLINK|SF_IMMUTABLE|SF_APPEND) ||
192 (vaflags & UF_SETTABLE) != vaflags)
193 return (EPERM);
194 *ino_flags &= SF_SETTABLE;
195 *ino_flags |= vaflags & UF_SETTABLE;
197 return(0);
201 * This helper function may be used by VFSs to implement UNIX initial
202 * ownership semantics when creating new objects inside directories.
204 uid_t
205 vop_helper_create_uid(struct mount *mp, mode_t dmode, uid_t duid,
206 struct ucred *cred, mode_t *modep)
208 #ifdef SUIDDIR
209 if ((mp->mnt_flag & MNT_SUIDDIR) && (dmode & S_ISUID) &&
210 duid != cred->cr_uid && duid) {
211 *modep &= ~07111;
212 return(duid);
214 #endif
215 return(cred->cr_uid);
219 * This helper may be used by VFSs to implement unix chmod semantics.
222 vop_helper_chmod(struct vnode *vp, mode_t new_mode, struct ucred *cred,
223 uid_t cur_uid, gid_t cur_gid, mode_t *cur_modep)
225 int error;
227 if (cred->cr_uid != cur_uid) {
228 error = priv_check_cred(cred, PRIV_VFS_CHMOD, 0);
229 if (error)
230 return (error);
232 if (cred->cr_uid) {
233 if (vp->v_type != VDIR && (*cur_modep & S_ISTXT))
234 return (EFTYPE);
235 if (!groupmember(cur_gid, cred) && (*cur_modep & S_ISGID))
236 return (EPERM);
238 *cur_modep &= ~ALLPERMS;
239 *cur_modep |= new_mode & ALLPERMS;
240 return(0);
244 * This helper may be used by VFSs to implement unix chown semantics.
247 vop_helper_chown(struct vnode *vp, uid_t new_uid, gid_t new_gid,
248 struct ucred *cred,
249 uid_t *cur_uidp, gid_t *cur_gidp, mode_t *cur_modep)
251 gid_t ogid;
252 uid_t ouid;
253 int error;
255 if (new_uid == (uid_t)VNOVAL)
256 new_uid = *cur_uidp;
257 if (new_gid == (gid_t)VNOVAL)
258 new_gid = *cur_gidp;
261 * If we don't own the file, are trying to change the owner
262 * of the file, or are not a member of the target group,
263 * the caller must be privileged or the call fails.
265 if ((cred->cr_uid != *cur_uidp || new_uid != *cur_uidp ||
266 (new_gid != *cur_gidp && !(cred->cr_gid == new_gid ||
267 groupmember(new_gid, cred)))) &&
268 (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) {
269 return (error);
271 ogid = *cur_gidp;
272 ouid = *cur_uidp;
273 /* XXX QUOTA CODE */
274 *cur_uidp = new_uid;
275 *cur_gidp = new_gid;
276 /* XXX QUOTA CODE */
279 * DragonFly clears both SUID and SGID if either the owner or
280 * group is changed and root isn't doing it. If root is doing
281 * it we do not clear SUID/SGID.
283 if (cred->cr_uid != 0 && (ouid != new_uid || ogid != new_gid))
284 *cur_modep &= ~(S_ISUID | S_ISGID);
285 return(0);
288 #ifdef LWBUF_IS_OPTIMAL
291 * A VFS can call this function to try to dispose of a read request
292 * directly from the VM system, pretty much bypassing almost all VFS
293 * overhead except for atime updates.
295 * If 0 is returned some or all of the uio was handled. The caller must
296 * check the uio and handle the remainder.
298 * The caller must fail on a non-zero error.
301 vop_helper_read_shortcut(struct vop_read_args *ap)
303 struct vnode *vp;
304 struct uio *uio;
305 struct lwbuf *lwb;
306 struct lwbuf lwb_cache;
307 vm_object_t obj;
308 vm_page_t m;
309 int offset;
310 int n;
311 int error;
313 vp = ap->a_vp;
314 uio = ap->a_uio;
317 * We can't short-cut if there is no VM object or this is a special
318 * UIO_NOCOPY read (typically from VOP_STRATEGY()). We also can't
319 * do this if we cannot extract the filesize from the vnode.
321 if (vm_read_shortcut_enable == 0)
322 return(0);
323 if (vp->v_object == NULL || uio->uio_segflg == UIO_NOCOPY)
324 return(0);
325 if (vp->v_filesize == NOOFFSET)
326 return(0);
327 if (uio->uio_resid == 0)
328 return(0);
331 * Iterate the uio on a page-by-page basis
333 * XXX can we leave the object held shared during the uiomove()?
335 obj = vp->v_object;
336 vm_object_hold_shared(obj);
338 error = 0;
339 while (uio->uio_resid && error == 0) {
340 offset = (int)uio->uio_offset & PAGE_MASK;
341 n = PAGE_SIZE - offset;
342 if (n > uio->uio_resid)
343 n = uio->uio_resid;
344 if (vp->v_filesize < uio->uio_offset)
345 break;
346 if (uio->uio_offset + n > vp->v_filesize)
347 n = vp->v_filesize - uio->uio_offset;
348 if (n == 0)
349 break; /* hit EOF */
351 m = vm_page_lookup_sbusy_try(obj, OFF_TO_IDX(uio->uio_offset));
352 if (error || m == NULL) {
353 error = 0;
354 break;
356 if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
357 vm_page_sbusy_drop(m);
358 break;
360 lwb = lwbuf_alloc(m, &lwb_cache);
363 * Use a no-fault uiomove() to avoid deadlocking against
364 * our VM object (which could livelock on the same object
365 * due to shared-vs-exclusive), or deadlocking against
366 * our busied page. Returns EFAULT on any fault which
367 * winds up diving a vnode.
369 error = uiomove_nofault((char *)lwbuf_kva(lwb) + offset,
370 n, uio);
372 vm_page_flag_set(m, PG_REFERENCED);
373 lwbuf_free(lwb);
374 vm_page_sbusy_drop(m);
376 vm_object_drop(obj);
379 * Ignore EFAULT since we used uiomove_nofault(), causes caller
380 * to fall-back to normal code for this case.
382 if (error == EFAULT)
383 error = 0;
385 return (error);
388 #else
391 * If lwbuf's aren't optimal then it's best to just use the buffer
392 * cache.
395 vop_helper_read_shortcut(struct vop_read_args *ap)
397 return(0);
400 #endif