1 #define MSNFS /* HACK HACK */
5 * File operations used by nfsd. Some of these have been ripped from
6 * other parts of the kernel because they weren't in ksyms.c, others
7 * are partial duplicates with added or changed functionality.
9 * Note that several functions dget() the dentry upon which they want
10 * to act, most notably those that create directory entries. Response
11 * dentry's are dput()'d if necessary in the release callback.
12 * So if you notice code paths that apparently fail to dput() the
13 * dentry, don't worry--they have been taken care of.
15 * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
18 #include <linux/config.h>
19 #include <linux/version.h>
20 #include <linux/string.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/locks.h>
25 #include <linux/major.h>
26 #include <linux/ext2_fs.h>
27 #include <linux/proc_fs.h>
28 #include <linux/stat.h>
29 #include <linux/fcntl.h>
30 #include <linux/net.h>
31 #include <linux/unistd.h>
32 #include <linux/malloc.h>
34 #define __NO_VERSION__
35 #include <linux/module.h>
37 #include <linux/sunrpc/svc.h>
38 #include <linux/nfsd/nfsd.h>
40 #include <linux/nfs3.h>
41 #include <linux/nfsd/xdr3.h>
42 #endif /* CONFIG_NFSD_V3 */
43 #include <linux/nfsd/nfsfh.h>
44 #include <linux/quotaops.h>
46 #include <asm/uaccess.h>
48 #define NFSDDBG_FACILITY NFSDDBG_FILEOP
52 /* We must ignore files (but only files) which might have mandatory
53 * locks on them because there is no way to know if the accesser has
56 #define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && MANDATORY_LOCK(i))
59 * This is a cache of readahead params that help us choose the proper
60 * readahead strategy. Initially, we set all readahead parameters to 0
61 * and let the VFS handle things.
62 * If you increase the number of cached files very much, you'll need to
63 * add a hash table here.
66 struct raparms
*p_next
;
70 unsigned long p_reada
,
77 static struct raparms
* raparml
;
78 static struct raparms
* raparm_cache
;
81 * Look up one component of a pathname.
82 * N.B. After this call _both_ fhp and resfh need an fh_put
84 * If the lookup would cross a mountpoint, and the mounted filesystem
85 * is exported to the client with NFSEXP_CROSSMNT, then the lookup is
86 * accepted as it stands and the mounted directory is
87 * returned. Otherwise the covered directory is returned.
88 * NOTE: this mountpoint crossing is not supported properly by all
89 * clients and is explicitly disallowed for NFSv3
90 * NeilBrown <neilb@cse.unsw.edu.au>
93 nfsd_lookup(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
, const char *name
,
94 int len
, struct svc_fh
*resfh
)
96 struct svc_export
*exp
;
97 struct dentry
*dparent
;
98 struct dentry
*dentry
;
101 dprintk("nfsd: nfsd_lookup(fh %s, %s)\n", SVCFH_fmt(fhp
), name
);
103 /* Obtain dentry and export. */
104 err
= fh_verify(rqstp
, fhp
, S_IFDIR
, MAY_EXEC
);
108 dparent
= fhp
->fh_dentry
;
109 exp
= fhp
->fh_export
;
113 /* Lookup the name, but don't follow links */
114 if (strcmp(name
, ".")==0) {
115 dentry
= dget(dparent
);
116 } else if (strcmp(name
, "..")==0) {
117 /* checking mountpoint crossing is very different when stepping up */
118 if (dparent
== exp
->ex_dentry
) {
119 if (!EX_CROSSMNT(exp
))
120 dentry
= dget(dparent
); /* .. == . just like at / */
123 struct svc_export
*exp2
= NULL
;
125 struct vfsmount
*mnt
= mntget(exp
->ex_mnt
);
126 dentry
= dget(dparent
);
127 while(follow_up(&mnt
, &dentry
))
129 dp
= dget(dentry
->d_parent
);
132 for ( ; exp2
== NULL
&& dp
->d_parent
!= dp
;
134 exp2
= exp_get(exp
->ex_client
, dp
->d_inode
->i_dev
, dp
->d_inode
->i_ino
);
137 dentry
= dget(dparent
);
144 dentry
= dget(dparent
->d_parent
);
147 dentry
= lookup_one(name
, dparent
);
148 err
= PTR_ERR(dentry
);
152 * check if we have crossed a mount point ...
154 if (d_mountpoint(dentry
)) {
155 struct svc_export
*exp2
= NULL
;
156 struct vfsmount
*mnt
= mntget(exp
->ex_mnt
);
157 struct dentry
*mounts
= dget(dentry
);
158 while (follow_down(&mnt
,&mounts
)&&d_mountpoint(mounts
))
160 exp2
= exp_get(rqstp
->rq_client
,
161 mounts
->d_inode
->i_dev
,
162 mounts
->d_inode
->i_ino
);
163 if (exp2
&& EX_CROSSMNT(exp2
)) {
164 /* successfully crossed mount point */
174 * Note: we compose the file handle now, but as the
175 * dentry may be negative, it may need to be updated.
177 err
= fh_compose(resfh
, exp
, dentry
);
178 if (!err
&& !dentry
->d_inode
)
189 * Set various file attributes.
190 * N.B. After this call fhp needs an fh_put
193 nfsd_setattr(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
, struct iattr
*iap
)
195 struct dentry
*dentry
;
197 int accmode
= MAY_SATTR
;
203 if (iap
->ia_valid
& (ATTR_ATIME
| ATTR_MTIME
| ATTR_SIZE
))
204 accmode
|= MAY_WRITE
|MAY_OWNER_OVERRIDE
;
205 if (iap
->ia_valid
& ATTR_SIZE
)
209 err
= fh_verify(rqstp
, fhp
, ftype
, accmode
);
210 if (err
|| !iap
->ia_valid
)
213 dentry
= fhp
->fh_dentry
;
214 inode
= dentry
->d_inode
;
216 err
= inode_change_ok(inode
, iap
);
217 /* could be a "touch" (utimes) request where the user is not the owner but does
218 * have write permission. In this case the user should be allowed to set
219 * both times to the current time. We could just assume any such SETATTR
220 * is intended to set the times to "now", but we do a couple of simple tests
221 * to increase our confidence.
223 #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
224 #define MAX_TOUCH_TIME_ERROR (30*60)
226 && (iap
->ia_valid
& BOTH_TIME_SET
) == BOTH_TIME_SET
227 && iap
->ia_mtime
== iap
->ia_ctime
229 /* looks good. now just make sure time is in the right ballpark.
230 * solaris, at least, doesn't seem to care what the time request is
232 time_t delta
= iap
->ia_atime
- CURRENT_TIME
;
233 if (delta
<0) delta
= -delta
;
234 if (delta
< MAX_TOUCH_TIME_ERROR
) {
235 /* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME
236 * this will cause notify_change to set these times to "now"
238 iap
->ia_valid
&= ~BOTH_TIME_SET
;
239 err
= inode_change_ok(inode
, iap
);
246 /* The size case is special. It changes the file as well as the attributes. */
247 if (iap
->ia_valid
& ATTR_SIZE
) {
248 if (iap
->ia_size
< inode
->i_size
) {
249 err
= nfsd_permission(fhp
->fh_export
, dentry
, MAY_TRUNC
|MAY_OWNER_OVERRIDE
);
255 * If we are changing the size of the file, then
256 * we need to break all leases.
258 err
= get_lease(inode
, FMODE_WRITE
);
262 err
= get_write_access(inode
);
266 err
= locks_verify_truncate(inode
, NULL
, iap
->ia_size
);
268 put_write_access(inode
);
274 imode
= inode
->i_mode
;
275 if (iap
->ia_valid
& ATTR_MODE
) {
276 iap
->ia_mode
&= S_IALLUGO
;
277 imode
= iap
->ia_mode
|= (imode
& ~S_IALLUGO
);
280 /* Revoke setuid/setgid bit on chown/chgrp */
281 if ((iap
->ia_valid
& ATTR_UID
) && (imode
& S_ISUID
)
282 && iap
->ia_uid
!= inode
->i_uid
) {
283 iap
->ia_valid
|= ATTR_MODE
;
284 iap
->ia_mode
= imode
&= ~S_ISUID
;
286 if ((iap
->ia_valid
& ATTR_GID
) && (imode
& S_ISGID
)
287 && iap
->ia_gid
!= inode
->i_gid
) {
288 iap
->ia_valid
|= ATTR_MODE
;
289 iap
->ia_mode
= imode
&= ~S_ISGID
;
292 /* Change the attributes. */
295 iap
->ia_valid
|= ATTR_CTIME
;
297 /* DQUOT_TRANSFER needs both ia_uid and ia_gid defined */
298 if (iap
->ia_valid
& (ATTR_UID
|ATTR_GID
)) {
299 if (! (iap
->ia_valid
& ATTR_UID
))
300 iap
->ia_uid
= inode
->i_uid
;
301 if (! (iap
->ia_valid
& ATTR_GID
))
302 iap
->ia_gid
= inode
->i_gid
;
303 iap
->ia_valid
|= ATTR_UID
|ATTR_GID
;
305 #endif /* CONFIG_QUOTA */
307 if (iap
->ia_valid
& ATTR_SIZE
) {
312 if (iap
->ia_valid
& (ATTR_UID
|ATTR_GID
))
313 err
= DQUOT_TRANSFER(dentry
, iap
);
316 err
= notify_change(dentry
, iap
);
319 put_write_access(inode
);
323 if (EX_ISSYNC(fhp
->fh_export
))
324 write_inode_now(inode
, 1);
334 #ifdef CONFIG_NFSD_V3
336 * Check server access rights to a file system object
342 static struct accessmap nfs3_regaccess
[] = {
343 { NFS3_ACCESS_READ
, MAY_READ
},
344 { NFS3_ACCESS_EXECUTE
, MAY_EXEC
},
345 { NFS3_ACCESS_MODIFY
, MAY_WRITE
|MAY_TRUNC
},
346 { NFS3_ACCESS_EXTEND
, MAY_WRITE
},
351 static struct accessmap nfs3_diraccess
[] = {
352 { NFS3_ACCESS_READ
, MAY_READ
},
353 { NFS3_ACCESS_LOOKUP
, MAY_EXEC
},
354 { NFS3_ACCESS_MODIFY
, MAY_EXEC
|MAY_WRITE
|MAY_TRUNC
},
355 { NFS3_ACCESS_EXTEND
, MAY_EXEC
|MAY_WRITE
},
356 { NFS3_ACCESS_DELETE
, MAY_REMOVE
},
361 static struct accessmap nfs3_anyaccess
[] = {
362 /* XXX: should we try to cover read/write here for clients that
363 * rely on us to do their access checking for special files? */
369 nfsd_access(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
, u32
*access
)
371 struct accessmap
*map
;
372 struct svc_export
*export
;
373 struct dentry
*dentry
;
374 u32 query
, result
= 0;
377 error
= fh_verify(rqstp
, fhp
, 0, MAY_NOP
);
381 export
= fhp
->fh_export
;
382 dentry
= fhp
->fh_dentry
;
384 if (S_ISREG(dentry
->d_inode
->i_mode
))
385 map
= nfs3_regaccess
;
386 else if (S_ISDIR(dentry
->d_inode
->i_mode
))
387 map
= nfs3_diraccess
;
389 map
= nfs3_anyaccess
;
393 for (; map
->access
; map
++) {
394 if (map
->access
& query
) {
396 err2
= nfsd_permission(export
, dentry
, map
->how
);
399 result
|= map
->access
;
402 /* the following error codes just mean the access was not allowed,
403 * rather than an error occurred */
407 /* simply don't "or" in the access bit. */
420 #endif /* CONFIG_NFSD_V3 */
425 * Open an existing file or directory.
426 * The access argument indicates the type of open (read/write/lock)
427 * N.B. After this call fhp needs an fh_put
430 nfsd_open(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
, int type
,
431 int access
, struct file
*filp
)
433 struct dentry
*dentry
;
437 /* If we get here, then the client has already done an "open", and (hopefully)
438 * checked permission - so allow OWNER_OVERRIDE in case a chmod has now revoked
440 err
= fh_verify(rqstp
, fhp
, type
, access
| MAY_OWNER_OVERRIDE
);
444 dentry
= fhp
->fh_dentry
;
445 inode
= dentry
->d_inode
;
447 /* Disallow access to files with the append-only bit set or
448 * with mandatory locking enabled
451 if (IS_APPEND(inode
) || IS_ISMNDLK(inode
))
457 * Check to see if there are any leases on this file.
458 * This may block while leases are broken.
460 err
= get_lease(inode
, (access
& MAY_WRITE
) ? FMODE_WRITE
: 0);
464 if ((access
& MAY_WRITE
) && (err
= get_write_access(inode
)) != 0)
467 memset(filp
, 0, sizeof(*filp
));
468 filp
->f_op
= fops_get(inode
->i_fop
);
469 atomic_set(&filp
->f_count
, 1);
470 filp
->f_dentry
= dentry
;
471 if (access
& MAY_WRITE
) {
472 filp
->f_flags
= O_WRONLY
|O_LARGEFILE
;
473 filp
->f_mode
= FMODE_WRITE
;
476 filp
->f_flags
= O_RDONLY
|O_LARGEFILE
;
477 filp
->f_mode
= FMODE_READ
;
481 if (filp
->f_op
&& filp
->f_op
->open
) {
482 err
= filp
->f_op
->open(inode
, filp
);
484 fops_put(filp
->f_op
);
485 if (access
& MAY_WRITE
)
486 put_write_access(inode
);
488 /* I nearly added put_filp() call here, but this filp
489 * is really on callers stack frame. -DaveM
491 atomic_dec(&filp
->f_count
);
505 nfsd_close(struct file
*filp
)
507 struct dentry
*dentry
= filp
->f_dentry
;
508 struct inode
*inode
= dentry
->d_inode
;
510 if (filp
->f_op
&& filp
->f_op
->release
)
511 filp
->f_op
->release(inode
, filp
);
512 fops_put(filp
->f_op
);
513 if (filp
->f_mode
& FMODE_WRITE
)
514 put_write_access(inode
);
519 * As this calls fsync (not fdatasync) there is no need for a write_inode
523 nfsd_sync(struct file
*filp
)
525 dprintk("nfsd: sync file %s\n", filp
->f_dentry
->d_name
.name
);
526 down(&filp
->f_dentry
->d_inode
->i_sem
);
527 filp
->f_op
->fsync(filp
, filp
->f_dentry
, 0);
528 up(&filp
->f_dentry
->d_inode
->i_sem
);
532 nfsd_sync_dir(struct dentry
*dp
)
534 struct inode
*inode
= dp
->d_inode
;
535 int (*fsync
) (struct file
*, struct dentry
*, int);
537 if (inode
->i_fop
&& (fsync
= inode
->i_fop
->fsync
)) {
543 * Obtain the readahead parameters for the file
544 * specified by (dev, ino).
546 static inline struct raparms
*
547 nfsd_get_raparms(dev_t dev
, ino_t ino
)
549 struct raparms
*ra
, **rap
, **frap
= NULL
;
552 for (rap
= &raparm_cache
; (ra
= *rap
); rap
= &ra
->p_next
) {
553 if (ra
->p_ino
== ino
&& ra
->p_dev
== dev
)
556 if (ra
->p_count
== 0)
559 depth
= nfsdstats
.ra_size
*11/10;
564 memset(ra
, 0, sizeof(*ra
));
568 if (rap
!= &raparm_cache
) {
570 ra
->p_next
= raparm_cache
;
574 nfsdstats
.ra_depth
[depth
*10/nfsdstats
.ra_size
]++;
579 * Read data from a file. count must contain the requested read count
580 * on entry. On return, *count contains the number of bytes actually read.
581 * N.B. After this call fhp needs an fh_put
584 nfsd_read(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
, loff_t offset
,
585 char *buf
, unsigned long *count
)
592 err
= nfsd_open(rqstp
, fhp
, S_IFREG
, MAY_READ
, &file
);
596 if (!file
.f_op
->read
)
599 if ((fhp
->fh_export
->ex_flags
& NFSEXP_MSNFS
) &&
600 (!lock_may_read(file
.f_dentry
->d_inode
, offset
, *count
)))
604 /* Get readahead parameters */
605 ra
= nfsd_get_raparms(fhp
->fh_export
->ex_dev
, fhp
->fh_dentry
->d_inode
->i_ino
);
607 file
.f_reada
= ra
->p_reada
;
608 file
.f_ramax
= ra
->p_ramax
;
609 file
.f_raend
= ra
->p_raend
;
610 file
.f_ralen
= ra
->p_ralen
;
611 file
.f_rawin
= ra
->p_rawin
;
615 oldfs
= get_fs(); set_fs(KERNEL_DS
);
616 err
= file
.f_op
->read(&file
, buf
, *count
, &file
.f_pos
);
619 /* Write back readahead params */
621 dprintk("nfsd: raparms %ld %ld %ld %ld %ld\n",
622 file
.f_reada
, file
.f_ramax
, file
.f_raend
,
623 file
.f_ralen
, file
.f_rawin
);
624 ra
->p_reada
= file
.f_reada
;
625 ra
->p_ramax
= file
.f_ramax
;
626 ra
->p_raend
= file
.f_raend
;
627 ra
->p_ralen
= file
.f_ralen
;
628 ra
->p_rawin
= file
.f_rawin
;
633 nfsdstats
.io_read
+= err
;
645 * Write data to a file.
646 * The stable flag requests synchronous writes.
647 * N.B. After this call fhp needs an fh_put
650 nfsd_write(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
, loff_t offset
,
651 char *buf
, unsigned long cnt
, int *stablep
)
653 struct svc_export
*exp
;
655 struct dentry
*dentry
;
659 int stable
= *stablep
;
661 err
= nfsd_open(rqstp
, fhp
, S_IFREG
, MAY_WRITE
, &file
);
667 if (!file
.f_op
->write
)
670 if ((fhp
->fh_export
->ex_flags
& NFSEXP_MSNFS
) &&
671 (!lock_may_write(file
.f_dentry
->d_inode
, offset
, cnt
)))
675 dentry
= file
.f_dentry
;
676 inode
= dentry
->d_inode
;
677 exp
= fhp
->fh_export
;
680 * Request sync writes if
681 * - the sync export option has been set, or
682 * - the client requested O_SYNC behavior (NFSv3 feature).
683 * - The file system doesn't support fsync().
684 * When gathered writes have been configured for this volume,
685 * flushing the data to disk is handled separately below.
688 if (file
.f_op
->fsync
== 0) {/* COMMIT3 cannot work */
690 *stablep
= 2; /* FILE_SYNC */
695 if (stable
&& !EX_WGATHER(exp
))
696 file
.f_flags
|= O_SYNC
;
698 file
.f_pos
= offset
; /* set write offset */
700 /* Write the data. */
701 oldfs
= get_fs(); set_fs(KERNEL_DS
);
702 err
= file
.f_op
->write(&file
, buf
, cnt
, &file
.f_pos
);
704 nfsdstats
.io_write
+= cnt
;
707 /* clear setuid/setgid flag after write */
708 if (err
>= 0 && (inode
->i_mode
& (S_ISUID
| S_ISGID
))) {
711 ia
.ia_valid
= ATTR_MODE
;
712 ia
.ia_mode
= inode
->i_mode
& ~(S_ISUID
| S_ISGID
);
713 notify_change(dentry
, &ia
);
716 if (err
>= 0 && stable
) {
717 static unsigned long last_ino
;
718 static kdev_t last_dev
= NODEV
;
721 * Gathered writes: If another process is currently
722 * writing to the file, there's a high chance
723 * this is another nfsd (triggered by a bulk write
724 * from a client's biod). Rather than syncing the
725 * file with each write request, we sleep for 10 msec.
727 * I don't know if this roughly approximates
728 * C. Juszak's idea of gathered writes, but it's a
729 * nice and simple solution (IMHO), and it seems to
732 if (EX_WGATHER(exp
) && (atomic_read(&inode
->i_writecount
) > 1
733 || (last_ino
== inode
->i_ino
&& last_dev
== inode
->i_dev
))) {
735 interruptible_sleep_on_timeout(&inode
->i_wait
, 10 * HZ
/ 1000);
737 dprintk("nfsd: write defer %d\n", current
->pid
);
738 /* FIXME: Olaf commented this out [gam3] */
739 set_current_state(TASK_UNINTERRUPTIBLE
);
740 schedule_timeout((HZ
+99)/100);
741 current
->state
= TASK_RUNNING
;
742 dprintk("nfsd: write resume %d\n", current
->pid
);
746 if (inode
->i_state
& I_DIRTY
) {
747 dprintk("nfsd: write sync %d\n", current
->pid
);
751 wake_up(&inode
->i_wait
);
753 last_ino
= inode
->i_ino
;
754 last_dev
= inode
->i_dev
;
757 dprintk("nfsd: write complete err=%d\n", err
);
769 #ifdef CONFIG_NFSD_V3
771 * Commit all pending writes to stable storage.
772 * Strictly speaking, we could sync just the indicated file region here,
773 * but there's currently no way we can ask the VFS to do so.
775 * Unfortunately we cannot lock the file to make sure we return full WCC
776 * data to the client, as locking happens lower down in the filesystem.
779 nfsd_commit(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
,
780 off_t offset
, unsigned long count
)
785 if ((err
= nfsd_open(rqstp
, fhp
, S_IFREG
, MAY_WRITE
, &file
)) != 0)
787 if (EX_ISSYNC(fhp
->fh_export
)) {
788 if (file
.f_op
&& file
.f_op
->fsync
) {
791 err
= nfserr_notsupp
;
798 #endif /* CONFIG_NFSD_V3 */
801 * Create a file (regular, directory, device, fifo); UNIX sockets
802 * not yet implemented.
803 * If the response fh has been verified, the parent directory should
804 * already be locked. Note that the parent directory is left locked.
806 * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
809 nfsd_create(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
,
810 char *fname
, int flen
, struct iattr
*iap
,
811 int type
, dev_t rdev
, struct svc_fh
*resfhp
)
813 struct dentry
*dentry
, *dchild
;
821 if (isdotent(fname
, flen
))
824 err
= fh_verify(rqstp
, fhp
, S_IFDIR
, MAY_CREATE
);
828 dentry
= fhp
->fh_dentry
;
829 dirp
= dentry
->d_inode
;
832 if(!dirp
->i_op
|| !dirp
->i_op
->lookup
)
835 * Check whether the response file handle has been verified yet.
836 * If it has, the parent directory should already be locked.
838 if (!resfhp
->fh_dentry
) {
839 /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
841 dchild
= lookup_one(fname
, dentry
);
842 err
= PTR_ERR(dchild
);
845 err
= fh_compose(resfhp
, fhp
->fh_export
, dchild
);
849 /* called from nfsd_proc_create */
850 dchild
= resfhp
->fh_dentry
;
851 if (!fhp
->fh_locked
) {
852 /* not actually possible */
854 "nfsd_create: parent %s/%s not locked!\n",
855 dentry
->d_parent
->d_name
.name
,
856 dentry
->d_name
.name
);
862 * Make sure the child dentry is still negative ...
865 if (dchild
->d_inode
) {
866 dprintk("nfsd_create: dentry %s/%s not negative!\n",
867 dentry
->d_name
.name
, dchild
->d_name
.name
);
871 if (!(iap
->ia_valid
& ATTR_MODE
))
873 iap
->ia_mode
= (iap
->ia_mode
& S_IALLUGO
) | type
;
876 * Get the dir op function pointer.
881 err
= vfs_create(dirp
, dchild
, iap
->ia_mode
);
884 err
= vfs_mkdir(dirp
, dchild
, iap
->ia_mode
);
890 err
= vfs_mknod(dirp
, dchild
, iap
->ia_mode
, rdev
);
893 printk("nfsd: bad file type %o in nfsd_create\n", type
);
899 if (EX_ISSYNC(fhp
->fh_export
)) {
900 nfsd_sync_dir(dentry
);
901 write_inode_now(dchild
->d_inode
, 1);
905 /* Set file attributes. Mode has already been set and
906 * setting uid/gid works only for root. Irix appears to
907 * send along the gid when it tries to implement setgid
908 * directories via NFS.
911 if ((iap
->ia_valid
&= ~(ATTR_UID
|ATTR_GID
|ATTR_MODE
)) != 0)
912 err
= nfsd_setattr(rqstp
, resfhp
, iap
);
914 * Update the file handle to get the new inode info.
917 err
= fh_update(resfhp
);
926 #ifdef CONFIG_NFSD_V3
928 * NFSv3 version of nfsd_create
931 nfsd_create_v3(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
,
932 char *fname
, int flen
, struct iattr
*iap
,
933 struct svc_fh
*resfhp
, int createmode
, u32
*verifier
)
935 struct dentry
*dentry
, *dchild
;
938 __u32 v_mtime
=0, v_atime
=0;
945 if (isdotent(fname
, flen
))
947 if (!(iap
->ia_valid
& ATTR_MODE
))
949 err
= fh_verify(rqstp
, fhp
, S_IFDIR
, MAY_CREATE
);
953 dentry
= fhp
->fh_dentry
;
954 dirp
= dentry
->d_inode
;
956 /* Get all the sanity checks out of the way before
957 * we lock the parent. */
959 if(!dirp
->i_op
|| !dirp
->i_op
->lookup
)
964 * Compose the response file handle.
966 dchild
= lookup_one(fname
, dentry
);
967 err
= PTR_ERR(dchild
);
971 err
= fh_compose(resfhp
, fhp
->fh_export
, dchild
);
975 if (createmode
== NFS3_CREATE_EXCLUSIVE
) {
976 /* while the verifier would fit in mtime+atime,
977 * solaris7 gets confused (bugid 4218508) if these have
978 * the high bit set, so we use the mode as well
980 v_mtime
= verifier
[0]&0x7fffffff;
981 v_atime
= verifier
[1]&0x7fffffff;
983 | ((verifier
[0]&0x80000000) >> (32-7)) /* u+x */
984 | ((verifier
[1]&0x80000000) >> (32-9)) /* u+r */
988 if (dchild
->d_inode
) {
991 switch (createmode
) {
992 case NFS3_CREATE_UNCHECKED
:
993 if (! S_ISREG(dchild
->d_inode
->i_mode
))
996 iap
->ia_valid
&= ATTR_SIZE
;
1000 case NFS3_CREATE_EXCLUSIVE
:
1001 if ( dchild
->d_inode
->i_mtime
== v_mtime
1002 && dchild
->d_inode
->i_atime
== v_atime
1003 && dchild
->d_inode
->i_mode
== v_mode
1004 && dchild
->d_inode
->i_size
== 0 )
1007 case NFS3_CREATE_GUARDED
:
1013 err
= vfs_create(dirp
, dchild
, iap
->ia_mode
);
1017 if (EX_ISSYNC(fhp
->fh_export
)) {
1018 nfsd_sync_dir(dentry
);
1019 /* setattr will sync the child (or not) */
1023 * Update the filehandle to get the new inode info.
1025 err
= fh_update(resfhp
);
1029 if (createmode
== NFS3_CREATE_EXCLUSIVE
) {
1030 /* Cram the verifier into atime/mtime/mode */
1031 iap
->ia_valid
= ATTR_MTIME
|ATTR_ATIME
1032 | ATTR_MTIME_SET
|ATTR_ATIME_SET
1034 iap
->ia_mtime
= v_mtime
;
1035 iap
->ia_atime
= v_atime
;
1036 iap
->ia_mode
= v_mode
;
1039 /* Set file attributes.
1040 * Mode has already been set but we might need to reset it
1041 * for CREATE_EXCLUSIVE
1042 * Irix appears to send along the gid when it tries to
1043 * implement setgid directories via NFS. Clear out all that cruft.
1046 if ((iap
->ia_valid
&= ~(ATTR_UID
|ATTR_GID
)) != 0)
1047 err
= nfsd_setattr(rqstp
, resfhp
, iap
);
1054 err
= nfserrno(err
);
1057 #endif /* CONFIG_NFSD_V3 */
1060 * Read a symlink. On entry, *lenp must contain the maximum path length that
1061 * fits into the buffer. On return, it contains the true length.
1062 * N.B. After this call fhp needs an fh_put
1065 nfsd_readlink(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
, char *buf
, int *lenp
)
1067 struct dentry
*dentry
;
1068 struct inode
*inode
;
1072 err
= fh_verify(rqstp
, fhp
, S_IFLNK
, MAY_NOP
);
1076 dentry
= fhp
->fh_dentry
;
1077 inode
= dentry
->d_inode
;
1080 if (!inode
->i_op
|| !inode
->i_op
->readlink
)
1083 UPDATE_ATIME(inode
);
1084 /* N.B. Why does this call need a get_fs()??
1085 * Remove the set_fs and watch the fireworks:-) --okir
1088 oldfs
= get_fs(); set_fs(KERNEL_DS
);
1089 err
= inode
->i_op
->readlink(dentry
, buf
, *lenp
);
1100 err
= nfserrno(err
);
1105 * Create a symlink and look up its inode
1106 * N.B. After this call _both_ fhp and resfhp need an fh_put
1109 nfsd_symlink(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
,
1110 char *fname
, int flen
,
1111 char *path
, int plen
,
1112 struct svc_fh
*resfhp
,
1115 struct dentry
*dentry
, *dnew
;
1122 if (isdotent(fname
, flen
))
1125 err
= fh_verify(rqstp
, fhp
, S_IFDIR
, MAY_CREATE
);
1129 dentry
= fhp
->fh_dentry
;
1130 dnew
= lookup_one(fname
, dentry
);
1131 err
= PTR_ERR(dnew
);
1135 err
= vfs_symlink(dentry
->d_inode
, dnew
, path
);
1137 if (EX_ISSYNC(fhp
->fh_export
))
1138 nfsd_sync_dir(dentry
);
1140 iap
->ia_valid
&= ATTR_MODE
/* ~(ATTR_MODE|ATTR_UID|ATTR_GID)*/;
1141 if (iap
->ia_valid
) {
1142 iap
->ia_valid
|= ATTR_CTIME
;
1143 iap
->ia_mode
= (iap
->ia_mode
&S_IALLUGO
)
1145 err
= notify_change(dnew
, iap
);
1146 if (!err
&& EX_ISSYNC(fhp
->fh_export
))
1147 write_inode_now(dentry
->d_inode
, 1);
1151 err
= nfserrno(err
);
1154 /* Compose the fh so the dentry will be freed ... */
1155 cerr
= fh_compose(resfhp
, fhp
->fh_export
, dnew
);
1156 if (err
==0) err
= cerr
;
1161 err
= nfserrno(err
);
1167 * N.B. After this call _both_ ffhp and tfhp need an fh_put
1170 nfsd_link(struct svc_rqst
*rqstp
, struct svc_fh
*ffhp
,
1171 char *fname
, int len
, struct svc_fh
*tfhp
)
1173 struct dentry
*ddir
, *dnew
, *dold
;
1174 struct inode
*dirp
, *dest
;
1177 err
= fh_verify(rqstp
, ffhp
, S_IFDIR
, MAY_CREATE
);
1180 err
= fh_verify(rqstp
, tfhp
, -S_IFDIR
, MAY_NOP
);
1188 if (isdotent(fname
, len
))
1192 ddir
= ffhp
->fh_dentry
;
1193 dirp
= ddir
->d_inode
;
1195 dnew
= lookup_one(fname
, ddir
);
1196 err
= PTR_ERR(dnew
);
1200 dold
= tfhp
->fh_dentry
;
1201 dest
= dold
->d_inode
;
1203 err
= vfs_link(dold
, dirp
, dnew
);
1205 if (EX_ISSYNC(ffhp
->fh_export
)) {
1206 nfsd_sync_dir(ddir
);
1207 write_inode_now(dest
, 1);
1210 if (err
== -EXDEV
&& rqstp
->rq_vers
== 2)
1213 err
= nfserrno(err
);
1222 err
= nfserrno(err
);
1228 * N.B. After this call _both_ ffhp and tfhp need an fh_put
1231 nfsd_rename(struct svc_rqst
*rqstp
, struct svc_fh
*ffhp
, char *fname
, int flen
,
1232 struct svc_fh
*tfhp
, char *tname
, int tlen
)
1234 struct dentry
*fdentry
, *tdentry
, *odentry
, *ndentry
;
1235 struct inode
*fdir
, *tdir
;
1238 err
= fh_verify(rqstp
, ffhp
, S_IFDIR
, MAY_REMOVE
);
1241 err
= fh_verify(rqstp
, tfhp
, S_IFDIR
, MAY_CREATE
);
1245 fdentry
= ffhp
->fh_dentry
;
1246 fdir
= fdentry
->d_inode
;
1248 tdentry
= tfhp
->fh_dentry
;
1249 tdir
= tdentry
->d_inode
;
1251 err
= (rqstp
->rq_vers
== 2) ? nfserr_acces
: nfserr_xdev
;
1252 if (fdir
->i_dev
!= tdir
->i_dev
)
1256 if (!flen
|| isdotent(fname
, flen
) || !tlen
|| isdotent(tname
, tlen
))
1259 /* cannot use fh_lock as we need deadlock protective ordering
1260 * so do it by hand */
1261 double_down(&tdir
->i_sem
, &fdir
->i_sem
);
1262 ffhp
->fh_locked
= tfhp
->fh_locked
= 1;
1266 odentry
= lookup_one(fname
, fdentry
);
1267 err
= PTR_ERR(odentry
);
1268 if (IS_ERR(odentry
))
1272 if (!odentry
->d_inode
)
1275 ndentry
= lookup_one(tname
, tdentry
);
1276 err
= PTR_ERR(ndentry
);
1277 if (IS_ERR(ndentry
))
1282 if ((ffhp
->fh_export
->ex_flags
& NFSEXP_MSNFS
) &&
1283 ((atomic_read(&odentry
->d_count
) > 1)
1284 || (atomic_read(&ndentry
->d_count
) > 1))) {
1288 err
= vfs_rename(fdir
, odentry
, tdir
, ndentry
);
1289 if (!err
&& EX_ISSYNC(tfhp
->fh_export
)) {
1290 nfsd_sync_dir(tdentry
);
1291 nfsd_sync_dir(fdentry
);
1299 err
= nfserrno(err
);
1301 /* we cannot reply on fh_unlock on the two filehandles,
1302 * as that would do the wrong thing if the two directories
1303 * were the same, so again we do it by hand
1305 fill_post_wcc(ffhp
);
1306 fill_post_wcc(tfhp
);
1307 double_up(&tdir
->i_sem
, &fdir
->i_sem
);
1308 ffhp
->fh_locked
= tfhp
->fh_locked
= 0;
1315 * Unlink a file or directory
1316 * N.B. After this call fhp needs an fh_put
1319 nfsd_unlink(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
, int type
,
1320 char *fname
, int flen
)
1322 struct dentry
*dentry
, *rdentry
;
1327 if (!flen
|| isdotent(fname
, flen
))
1329 err
= fh_verify(rqstp
, fhp
, S_IFDIR
, MAY_REMOVE
);
1334 dentry
= fhp
->fh_dentry
;
1335 dirp
= dentry
->d_inode
;
1337 rdentry
= lookup_one(fname
, dentry
);
1338 err
= PTR_ERR(rdentry
);
1339 if (IS_ERR(rdentry
))
1342 if (!rdentry
->d_inode
) {
1348 if (type
!= S_IFDIR
) { /* It's UNLINK */
1350 if ((fhp
->fh_export
->ex_flags
& NFSEXP_MSNFS
) &&
1351 (atomic_read(&rdentry
->d_count
) > 1)) {
1355 err
= vfs_unlink(dirp
, rdentry
);
1356 } else { /* It's RMDIR */
1357 err
= vfs_rmdir(dirp
, rdentry
);
1364 if (EX_ISSYNC(fhp
->fh_export
))
1365 nfsd_sync_dir(dentry
);
1371 err
= nfserrno(err
);
1376 * Read entries from a directory.
1377 * The verifier is an NFSv3 thing we ignore for now.
1380 nfsd_readdir(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
, loff_t offset
,
1381 encode_dent_fn func
, u32
*buffer
, int *countp
, u32
*verf
)
1383 struct inode
*inode
;
1385 int oldlen
, eof
, err
;
1387 struct readdir_cd cd
;
1389 err
= nfsd_open(rqstp
, fhp
, S_IFDIR
, MAY_READ
, &file
);
1392 if (offset
> ~(u32
) 0)
1395 err
= nfserr_notdir
;
1396 if (!file
.f_op
->readdir
)
1398 file
.f_pos
= offset
;
1400 /* Set up the readdir context */
1401 memset(&cd
, 0, sizeof(cd
));
1404 cd
.buflen
= *countp
; /* count of words */
1408 * Read the directory entries. This silly loop is necessary because
1409 * readdir() is not guaranteed to fill up the entire buffer, but
1410 * may choose to do less.
1412 inode
= file
.f_dentry
->d_inode
;
1413 down(&inode
->i_sem
);
1418 dprintk("nfsd: f_op->readdir(%x/%ld @ %d) buflen = %d (%d)\n",
1419 file.f_inode->i_dev, file.f_inode->i_ino,
1420 (int) file.f_pos, (int) oldlen, (int) cd.buflen);
1422 err
= file
.f_op
->readdir(&file
, &cd
, (filldir_t
) func
);
1425 if (oldlen
== cd
.buflen
)
1432 /* If we didn't fill the buffer completely, we're at EOF */
1436 if (rqstp
->rq_vers
== 3)
1437 (void)xdr_encode_hyper(cd
.offset
, file
.f_pos
);
1439 *cd
.offset
= htonl(file
.f_pos
);
1443 *p
++ = 0; /* no more entries */
1444 *p
++ = htonl(eof
); /* end of directory */
1445 *countp
= (caddr_t
) p
- (caddr_t
) buffer
;
1447 dprintk("nfsd: readdir result %d bytes, eof %d offset %d\n",
1449 cd
.offset
? ntohl(*cd
.offset
) : -1);
1458 err
= nfserrno(err
);
1463 * Get file system stats
1464 * N.B. After this call fhp needs an fh_put
1467 nfsd_statfs(struct svc_rqst
*rqstp
, struct svc_fh
*fhp
, struct statfs
*stat
)
1469 int err
= fh_verify(rqstp
, fhp
, 0, MAY_NOP
);
1470 if (!err
&& vfs_statfs(fhp
->fh_dentry
->d_inode
->i_sb
,stat
))
1476 * Check for a user's access permissions to this inode.
1479 nfsd_permission(struct svc_export
*exp
, struct dentry
*dentry
, int acc
)
1481 struct inode
*inode
= dentry
->d_inode
;
1487 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
1489 (acc
& MAY_READ
)? " read" : "",
1490 (acc
& MAY_WRITE
)? " write" : "",
1491 (acc
& MAY_EXEC
)? " exec" : "",
1492 (acc
& MAY_SATTR
)? " sattr" : "",
1493 (acc
& MAY_TRUNC
)? " trunc" : "",
1494 (acc
& MAY_LOCK
)? " lock" : "",
1495 (acc
& MAY_OWNER_OVERRIDE
)? " owneroverride" : "",
1497 IS_IMMUTABLE(inode
)? " immut" : "",
1498 IS_APPEND(inode
)? " append" : "",
1499 IS_RDONLY(inode
)? " ro" : "");
1500 dprintk(" owner %d/%d user %d/%d\n",
1501 inode
->i_uid
, inode
->i_gid
, current
->fsuid
, current
->fsgid
);
1504 if (acc
& (MAY_WRITE
| MAY_SATTR
| MAY_TRUNC
)) {
1505 if (EX_RDONLY(exp
) || IS_RDONLY(inode
))
1507 if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode
))
1510 if ((acc
& MAY_TRUNC
) && IS_APPEND(inode
))
1513 if (acc
& MAY_LOCK
) {
1514 /* If we cannot rely on authentication in NLM requests,
1515 * just allow locks, otherwise require read permission, or
1518 if (exp
->ex_flags
& NFSEXP_NOAUTHNLM
)
1521 acc
= MAY_READ
| MAY_OWNER_OVERRIDE
;
1524 * The file owner always gets access permission for accesses that
1525 * would normally be checked at open time. This is to make
1526 * file access work even when the client has done a fchmod(fd, 0).
1528 * However, `cp foo bar' should fail nevertheless when bar is
1529 * readonly. A sensible way to do this might be to reject all
1530 * attempts to truncate a read-only file, because a creat() call
1531 * always implies file truncation.
1532 * ... but this isn't really fair. A process may reasonably call
1533 * ftruncate on an open file descriptor on a file with perm 000.
1534 * We must trust the client to do permission checking - using "ACCESS"
1537 if ((acc
& MAY_OWNER_OVERRIDE
) &&
1538 inode
->i_uid
== current
->fsuid
)
1542 err
= permission(inode
, acc
& (MAY_READ
|MAY_WRITE
|MAY_EXEC
));
1544 /* Allow read access to binaries even when mode 111 */
1545 if (err
== -EACCES
&& S_ISREG(inode
->i_mode
) && acc
== MAY_READ
)
1546 err
= permission(inode
, MAY_EXEC
);
1548 return err
? nfserrno(err
) : 0;
1552 nfsd_racache_shutdown(void)
1556 dprintk("nfsd: freeing readahead buffers.\n");
1558 raparm_cache
= raparml
= NULL
;
1561 * Initialize readahead param cache
1564 nfsd_racache_init(int cache_size
)
1570 raparml
= kmalloc(sizeof(struct raparms
) * cache_size
, GFP_KERNEL
);
1572 if (raparml
!= NULL
) {
1573 dprintk("nfsd: allocating %d readahead buffers.\n",
1575 memset(raparml
, 0, sizeof(struct raparms
) * cache_size
);
1576 for (i
= 0; i
< cache_size
- 1; i
++) {
1577 raparml
[i
].p_next
= raparml
+ i
+ 1;
1579 raparm_cache
= raparml
;
1582 "nfsd: Could not allocate memory read-ahead cache.\n");
1585 nfsdstats
.ra_size
= cache_size
;