2 * Copyright (c) 1994 Jan-Simon Pendry
4 * The Regents of the University of California. All rights reserved.
6 * This code is derived from software contributed to Berkeley by
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
38 * $FreeBSD: src/sys/miscfs/union/union_subr.c,v 1.43.2.2 2001/12/25 01:44:45 dillon Exp $
39 * $DragonFly: src/sys/vfs/union/union_subr.c,v 1.28 2007/05/06 19:23:35 dillon Exp $
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/vnode.h>
47 #include <sys/namei.h>
48 #include <sys/malloc.h>
49 #include <sys/fcntl.h>
51 #include <sys/filedesc.h>
52 #include <sys/module.h>
53 #include <sys/mount.h>
56 #include <vm/vm_extern.h> /* for vnode_pager_setsize */
57 #include <vm/vm_zone.h>
58 #include <vm/vm_object.h> /* for vm cache coherency */
61 extern int union_init (void);
63 /* must be power of two, otherwise change UNION_HASH() */
66 /* unsigned int ... */
67 #define UNION_HASH(u, l) \
68 (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1))
70 static LIST_HEAD(unhead
, union_node
) unhead
[NHASH
];
71 static int unvplock
[NHASH
];
73 static void union_dircache_r (struct vnode
*vp
, struct vnode
***vppp
,
75 static int union_list_lock (int ix
);
76 static void union_list_unlock (int ix
);
77 static int union_relookup (struct union_mount
*um
, struct vnode
*dvp
,
79 struct componentname
*cnp
,
80 struct componentname
*cn
, char *path
,
82 static void union_updatevp (struct union_node
*un
,
83 struct vnode
*uppervp
,
84 struct vnode
*lowervp
);
85 static void union_newlower (struct union_node
*, struct vnode
*);
86 static void union_newupper (struct union_node
*, struct vnode
*);
87 static int union_copyfile (struct vnode
*, struct vnode
*,
88 struct ucred
*, struct thread
*);
89 static int union_vn_create (struct vnode
**, struct union_node
*,
91 static int union_vn_close (struct vnode
*, int, struct ucred
*);
98 for (i
= 0; i
< NHASH
; i
++)
99 LIST_INIT(&unhead
[i
]);
100 bzero((caddr_t
)unvplock
, sizeof(unvplock
));
105 union_list_lock(int ix
)
107 if (unvplock
[ix
] & UNVP_LOCKED
) {
108 unvplock
[ix
] |= UNVP_WANT
;
109 (void) tsleep((caddr_t
) &unvplock
[ix
], 0, "unllck", 0);
112 unvplock
[ix
] |= UNVP_LOCKED
;
117 union_list_unlock(int ix
)
119 unvplock
[ix
] &= ~UNVP_LOCKED
;
121 if (unvplock
[ix
] & UNVP_WANT
) {
122 unvplock
[ix
] &= ~UNVP_WANT
;
123 wakeup((caddr_t
) &unvplock
[ix
]);
130 * The uppervp, if not NULL, must be referenced and not locked by us
131 * The lowervp, if not NULL, must be referenced.
133 * if uppervp and lowervp match pointers already installed, nothing
134 * happens. The passed vp's (when matching) are not adjusted. This
135 * routine may only be called by union_newupper() and union_newlower().
139 union_updatevp(struct union_node
*un
, struct vnode
*uppervp
,
140 struct vnode
*lowervp
)
142 int ohash
= UNION_HASH(un
->un_uppervp
, un
->un_lowervp
);
143 int nhash
= UNION_HASH(uppervp
, lowervp
);
144 int docache
= (lowervp
!= NULLVP
|| uppervp
!= NULLVP
);
148 * Ensure locking is ordered from lower to higher
149 * to avoid deadlocks.
159 if (lhash
!= uhash
) {
160 while (union_list_lock(lhash
))
164 while (union_list_lock(uhash
))
167 if (ohash
!= nhash
|| !docache
) {
168 if (un
->un_flags
& UN_CACHED
) {
169 un
->un_flags
&= ~UN_CACHED
;
170 LIST_REMOVE(un
, un_cache
);
175 union_list_unlock(ohash
);
177 if (un
->un_lowervp
!= lowervp
) {
178 if (un
->un_lowervp
) {
179 vrele(un
->un_lowervp
);
181 kfree(un
->un_path
, M_TEMP
);
185 un
->un_lowervp
= lowervp
;
186 un
->un_lowersz
= VNOVAL
;
189 if (un
->un_uppervp
!= uppervp
) {
191 vrele(un
->un_uppervp
);
192 un
->un_uppervp
= uppervp
;
193 un
->un_uppersz
= VNOVAL
;
196 if (docache
&& (ohash
!= nhash
)) {
197 LIST_INSERT_HEAD(&unhead
[nhash
], un
, un_cache
);
198 un
->un_flags
|= UN_CACHED
;
201 union_list_unlock(nhash
);
205 * Set a new lowervp. The passed lowervp must be referenced and will be
206 * stored in the vp in a referenced state.
210 union_newlower(struct union_node
*un
, struct vnode
*lowervp
)
212 union_updatevp(un
, un
->un_uppervp
, lowervp
);
216 * Set a new uppervp. The passed uppervp must be locked and will be
217 * stored in the vp in a locked state. The caller should not unlock
222 union_newupper(struct union_node
*un
, struct vnode
*uppervp
)
224 union_updatevp(un
, uppervp
, un
->un_lowervp
);
228 * Keep track of size changes in the underlying vnodes.
229 * If the size changes, then callback to the vm layer
230 * giving priority to the upper layer size.
233 union_newsize(struct vnode
*vp
, off_t uppersz
, off_t lowersz
)
235 struct union_node
*un
;
238 /* only interested in regular files */
239 if (vp
->v_type
!= VREG
)
245 if ((uppersz
!= VNOVAL
) && (un
->un_uppersz
!= uppersz
)) {
246 un
->un_uppersz
= uppersz
;
251 if ((lowersz
!= VNOVAL
) && (un
->un_lowersz
!= lowersz
)) {
252 un
->un_lowersz
= lowersz
;
258 UDEBUG(("union: %s size now %ld\n",
259 (uppersz
!= VNOVAL
? "upper" : "lower"), (long)sz
));
260 vnode_pager_setsize(vp
, sz
);
265 * union_allocvp: allocate a union_node and associate it with a
266 * parent union_node and one or two vnodes.
268 * vpp Holds the returned vnode locked and referenced if no
271 * mp Holds the mount point. mp may or may not be busied.
272 * allocvp makes no changes to mp.
274 * dvp Holds the parent union_node to the one we wish to create.
275 * XXX may only be used to traverse an uncopied lowervp-based
278 * dvp may or may not be locked. allocvp makes no changes
281 * upperdvp Holds the parent vnode to uppervp, generally used along
282 * with path component information to create a shadow of
283 * lowervp when uppervp does not exist.
285 * upperdvp is referenced but unlocked on entry, and will be
286 * dereferenced on return.
288 * uppervp Holds the new uppervp vnode to be stored in the
289 * union_node we are allocating. uppervp is referenced but
290 * not locked, and will be dereferenced on return.
292 * lowervp Holds the new lowervp vnode to be stored in the
293 * union_node we are allocating. lowervp is referenced but
294 * not locked, and will be dereferenced on return.
296 * cnp Holds path component information to be coupled with
297 * lowervp and upperdvp to allow unionfs to create an uppervp
298 * later on. Only used if lowervp is valid. The conents
299 * of cnp is only valid for the duration of the call.
301 * docache Determine whether this node should be entered in the
302 * cache or whether it should be destroyed as soon as possible.
304 * all union_nodes are maintained on a singly-linked
305 * list. new nodes are only allocated when they cannot
306 * be found on this list. entries on the list are
307 * removed when the vfs reclaim entry is called.
309 * a single lock is kept for the entire list. this is
310 * needed because the getnewvnode() function can block
311 * waiting for a vnode to become free, in which case there
312 * may be more than one process trying to get the same
313 * vnode. this lock is only taken if we are going to
314 * call getnewvnode, since the kernel itself is single-threaded.
316 * if an entry is found on the list, then call vget() to
317 * take a reference. this is done because there may be
318 * zero references to it and so it needs to removed from
319 * the vnode free list.
323 union_allocvp(struct vnode
**vpp
,
325 struct vnode
*dvp
, /* parent union vnode */
326 struct vnode
*upperdvp
, /* parent vnode of uppervp */
327 struct componentname
*cnp
, /* may be null */
328 struct vnode
*uppervp
, /* may be null */
329 struct vnode
*lowervp
, /* may be null */
333 struct union_node
*un
= 0;
334 struct union_mount
*um
= MOUNTTOUNIONMOUNT(mp
);
335 struct thread
*td
= (cnp
) ? cnp
->cn_td
: curthread
; /* XXX */
340 if (uppervp
== NULLVP
&& lowervp
== NULLVP
)
341 panic("union: unidentifiable allocation");
343 if (uppervp
&& lowervp
&& (uppervp
->v_type
!= lowervp
->v_type
)) {
348 /* detect the root vnode (and aliases) */
350 if ((uppervp
== um
->um_uppervp
) &&
351 ((lowervp
== NULLVP
) || lowervp
== um
->um_lowervp
)) {
352 if (lowervp
== NULLVP
) {
353 lowervp
= um
->um_lowervp
;
354 if (lowervp
!= NULLVP
)
363 } else for (try = 0; try < 3; try++) {
366 if (lowervp
== NULLVP
)
368 hash
= UNION_HASH(uppervp
, lowervp
);
372 if (uppervp
== NULLVP
)
374 hash
= UNION_HASH(uppervp
, NULLVP
);
378 if (lowervp
== NULLVP
)
380 hash
= UNION_HASH(NULLVP
, lowervp
);
384 while (union_list_lock(hash
))
387 for (un
= unhead
[hash
].lh_first
; un
!= 0;
388 un
= un
->un_cache
.le_next
) {
389 if ((un
->un_lowervp
== lowervp
||
390 un
->un_lowervp
== NULLVP
) &&
391 (un
->un_uppervp
== uppervp
||
392 un
->un_uppervp
== NULLVP
) &&
393 (UNIONTOV(un
)->v_mount
== mp
)) {
394 if (vget(UNIONTOV(un
), LK_EXCLUSIVE
|LK_SLEEPFAIL
)) {
395 union_list_unlock(hash
);
402 union_list_unlock(hash
);
410 * Obtain a lock on the union_node. Everything is unlocked
411 * except for dvp, so check that case. If they match, our
412 * new un is already locked. Otherwise we have to lock our
415 * A potential deadlock situation occurs when we are holding
416 * one lock while trying to get another. We must follow
417 * strict ordering rules to avoid it. We try to locate dvp
418 * by scanning up from un_vnode, since the most likely
419 * scenario is un being under dvp.
422 if (dvp
&& un
->un_vnode
!= dvp
) {
423 struct vnode
*scan
= un
->un_vnode
;
426 scan
= VTOUNION(scan
)->un_pvp
;
427 } while (scan
&& scan
->v_tag
== VT_UNION
&& scan
!= dvp
);
430 * our new un is above dvp (we never saw dvp
431 * while moving up the tree).
435 error
= vn_lock(un
->un_vnode
, LK_EXCLUSIVE
);
436 vn_lock(dvp
, LK_EXCLUSIVE
| LK_RETRY
);
440 * our new un is under dvp
442 error
= vn_lock(un
->un_vnode
, LK_EXCLUSIVE
);
444 } else if (dvp
== NULLVP
) {
446 * dvp is NULL, we need to lock un.
448 error
= vn_lock(un
->un_vnode
, LK_EXCLUSIVE
);
451 * dvp == un->un_vnode, we are already locked.
460 * At this point, the union_node is locked and referenced.
462 * uppervp is locked and referenced or NULL, lowervp is
463 * referenced or NULL.
465 UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n",
466 un
, un
->un_vnode
, un
->un_uppervp
,
467 (un
->un_uppervp
? un
->un_uppervp
->v_sysref
.refcnt
: -99),
469 (uppervp
? uppervp
->v_sysref
.refcnt
: -99)
472 if (uppervp
!= un
->un_uppervp
) {
473 KASSERT(uppervp
== NULL
|| uppervp
->v_sysref
.refcnt
> 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", uppervp
->v_sysref
.refcnt
));
474 union_newupper(un
, uppervp
);
475 } else if (uppervp
) {
476 KASSERT(uppervp
->v_sysref
.refcnt
> 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", uppervp
->v_sysref
.refcnt
));
481 * Save information about the lower layer.
482 * This needs to keep track of pathname
483 * and directory information which union_vn_create
486 if (lowervp
!= un
->un_lowervp
) {
487 union_newlower(un
, lowervp
);
488 if (cnp
&& (lowervp
!= NULLVP
)) {
489 un
->un_path
= malloc(cnp
->cn_namelen
+1,
491 bcopy(cnp
->cn_nameptr
, un
->un_path
,
493 un
->un_path
[cnp
->cn_namelen
] = '\0';
495 } else if (lowervp
) {
502 if (upperdvp
!= un
->un_dirvp
) {
505 un
->un_dirvp
= upperdvp
;
506 } else if (upperdvp
) {
516 * otherwise lock the vp list while we call getnewvnode
517 * since that can block.
519 hash
= UNION_HASH(uppervp
, lowervp
);
521 if (union_list_lock(hash
))
526 * Create new node rather then replace old node
529 error
= getnewvnode(VT_UNION
, mp
, vpp
, 0, 0);
532 * If an error occurs clear out vnodes.
544 MALLOC((*vpp
)->v_data
, void *, sizeof(struct union_node
),
547 (*vpp
)->v_flag
|= vflag
;
549 (*vpp
)->v_type
= uppervp
->v_type
;
551 (*vpp
)->v_type
= lowervp
->v_type
;
554 bzero(un
, sizeof(*un
));
557 un
->un_uppervp
= uppervp
;
558 un
->un_uppersz
= VNOVAL
;
559 un
->un_lowervp
= lowervp
;
560 un
->un_lowersz
= VNOVAL
;
561 un
->un_dirvp
= upperdvp
;
562 un
->un_pvp
= dvp
; /* only parent dir in new allocation */
568 if (cnp
&& (lowervp
!= NULLVP
)) {
569 un
->un_path
= kmalloc(cnp
->cn_namelen
+1, M_TEMP
, M_WAITOK
);
570 bcopy(cnp
->cn_nameptr
, un
->un_path
, cnp
->cn_namelen
);
571 un
->un_path
[cnp
->cn_namelen
] = '\0';
578 LIST_INSERT_HEAD(&unhead
[hash
], un
, un_cache
);
579 un
->un_flags
|= UN_CACHED
;
583 * locked refd vpp is returned
588 union_list_unlock(hash
);
594 union_freevp(struct vnode
*vp
)
596 struct union_node
*un
= VTOUNION(vp
);
599 if (un
->un_flags
& UN_CACHED
) {
600 un
->un_flags
&= ~UN_CACHED
;
601 LIST_REMOVE(un
, un_cache
);
603 if (un
->un_pvp
!= NULLVP
) {
607 if (un
->un_uppervp
!= NULLVP
) {
608 vrele(un
->un_uppervp
);
609 un
->un_uppervp
= NULL
;
611 if (un
->un_lowervp
!= NULLVP
) {
612 vrele(un
->un_lowervp
);
613 un
->un_lowervp
= NULL
;
615 if (un
->un_dirvp
!= NULLVP
) {
620 kfree(un
->un_path
, M_TEMP
);
628 * copyfile. copy the vnode (fvp) to the vnode (tvp)
629 * using a sequence of reads and writes. both (fvp)
630 * and (tvp) are locked on entry and exit.
632 * fvp and tvp are both exclusive locked on call, but their refcount's
633 * haven't been bumped at all.
636 union_copyfile(struct vnode
*fvp
, struct vnode
*tvp
, struct ucred
*cred
,
646 * allocate a buffer of size MAXBSIZE.
647 * loop doing reads and writes, keeping track
648 * of the current uio offset.
649 * give up at the first sign of trouble.
652 bzero(&uio
, sizeof(uio
));
655 uio
.uio_segflg
= UIO_SYSSPACE
;
658 buf
= kmalloc(MAXBSIZE
, M_TEMP
, M_WAITOK
);
660 /* ugly loop follows... */
662 off_t offset
= uio
.uio_offset
;
672 iov
.iov_len
= MAXBSIZE
;
673 uio
.uio_resid
= iov
.iov_len
;
674 uio
.uio_rw
= UIO_READ
;
676 if ((error
= VOP_READ(fvp
, &uio
, 0, cred
)) != 0)
680 * Get bytes read, handle read eof case and setup for
683 if ((count
= MAXBSIZE
- uio
.uio_resid
) == 0)
688 * Write until an error occurs or our buffer has been
689 * exhausted, then update the offset for the next read.
691 while (bufoffset
< count
) {
694 iov
.iov_base
= buf
+ bufoffset
;
695 iov
.iov_len
= count
- bufoffset
;
696 uio
.uio_offset
= offset
+ bufoffset
;
697 uio
.uio_rw
= UIO_WRITE
;
698 uio
.uio_resid
= iov
.iov_len
;
700 if ((error
= VOP_WRITE(tvp
, &uio
, 0, cred
)) != 0)
702 bufoffset
+= (count
- bufoffset
) - uio
.uio_resid
;
704 uio
.uio_offset
= offset
+ bufoffset
;
705 } while (error
== 0);
713 * un's vnode is assumed to be locked on entry and remains locked on exit.
717 union_copyup(struct union_node
*un
, int docopy
, struct ucred
*cred
,
721 struct vnode
*lvp
, *uvp
;
724 * If the user does not have read permission, the vnode should not
725 * be copied to upper layer.
727 vn_lock(un
->un_lowervp
, LK_EXCLUSIVE
| LK_RETRY
);
728 error
= VOP_ACCESS(un
->un_lowervp
, VREAD
, cred
);
729 vn_unlock(un
->un_lowervp
);
733 error
= union_vn_create(&uvp
, un
, td
);
737 lvp
= un
->un_lowervp
;
739 KASSERT(uvp
->v_sysref
.refcnt
> 0, ("copy: uvp refcount 0: %d", uvp
->v_sysref
.refcnt
));
742 * XX - should not ignore errors
745 vn_lock(lvp
, LK_EXCLUSIVE
| LK_RETRY
);
746 error
= VOP_OPEN(lvp
, FREAD
, cred
, NULL
);
748 error
= union_copyfile(lvp
, uvp
, cred
, td
);
750 (void) VOP_CLOSE(lvp
, FREAD
);
753 UDEBUG(("union: copied up %s\n", un
->un_path
));
757 union_newupper(un
, uvp
);
758 KASSERT(uvp
->v_sysref
.refcnt
> 0, ("copy: uvp refcount 0: %d", uvp
->v_sysref
.refcnt
));
759 union_vn_close(uvp
, FWRITE
, cred
);
760 KASSERT(uvp
->v_sysref
.refcnt
> 0, ("copy: uvp refcount 0: %d", uvp
->v_sysref
.refcnt
));
762 * Subsequent IOs will go to the top layer, so
763 * call close on the lower vnode and open on the
764 * upper vnode to ensure that the filesystem keeps
765 * its references counts right. This doesn't do
766 * the right thing with (cred) and (FREAD) though.
767 * Ignoring error returns is not right, either.
772 for (i
= 0; i
< un
->un_openl
; i
++) {
773 VOP_CLOSE(lvp
, FREAD
);
774 VOP_OPEN(uvp
, FREAD
, cred
, NULL
);
786 * dvp should be locked on entry and will be locked on return. No
787 * net change in the ref count will occur.
789 * If an error is returned, *vpp will be invalid, otherwise it
790 * will hold a locked, referenced vnode. If *vpp == dvp then
791 * remember that only one exclusive lock is held.
795 union_relookup(struct union_mount
*um
, struct vnode
*dvp
, struct vnode
**vpp
,
796 struct componentname
*cnp
, struct componentname
*cn
, char *path
,
802 * A new componentname structure must be faked up because
803 * there is no way to know where the upper level cnp came
804 * from or what it is being used for. This must duplicate
805 * some of the work done by NDINIT, some of the work done
806 * by namei, some of the work done by lookup and some of
807 * the work done by VOP_LOOKUP when given a CREATE flag.
808 * Conclusion: Horrible.
810 cn
->cn_namelen
= pathlen
;
811 cn
->cn_nameptr
= objcache_get(namei_oc
, M_WAITOK
);
812 bcopy(path
, cn
->cn_nameptr
, cn
->cn_namelen
);
813 cn
->cn_nameptr
[cn
->cn_namelen
] = '\0';
815 cn
->cn_nameiop
= NAMEI_CREATE
;
816 cn
->cn_flags
= CNP_LOCKPARENT
;
817 cn
->cn_td
= cnp
->cn_td
;
818 if (um
->um_op
== UNMNT_ABOVE
)
819 cn
->cn_cred
= cnp
->cn_cred
;
821 cn
->cn_cred
= um
->um_cred
;
822 cn
->cn_consume
= cnp
->cn_consume
;
828 * Pass dvp unlocked and referenced on call to relookup().
830 * If an error occurs, dvp will be returned unlocked and dereferenced.
833 if ((error
= relookup(dvp
, vpp
, cn
)) != 0) {
834 objcache_put(namei_oc
, cn
->cn_nameptr
);
835 vn_lock(dvp
, LK_EXCLUSIVE
| LK_RETRY
);
838 objcache_put(namei_oc
, cn
->cn_nameptr
);
841 * If no error occurs, dvp will be returned locked with the reference
842 * left as before, and vpp will be returned referenced and locked.
844 * We want to return with dvp as it was passed to us, so we get
845 * rid of our reference.
852 * Create a shadow directory in the upper layer.
853 * The new vnode is returned locked.
855 * (um) points to the union mount structure for access to the
856 * the mounting process's credentials.
857 * (dvp) is the directory in which to create the shadow directory,
858 * it is locked (but not ref'd) on entry and return.
859 * (cnp) is the componentname to be created.
860 * (vpp) is the returned newly created shadow directory, which
861 * is returned locked and ref'd
864 union_mkshadow(struct union_mount
*um
, struct vnode
*dvp
,
865 struct componentname
*cnp
, struct vnode
**vpp
)
869 struct thread
*td
= cnp
->cn_td
;
870 struct componentname cn
;
872 error
= union_relookup(um
, dvp
, vpp
, cnp
, &cn
,
873 cnp
->cn_nameptr
, cnp
->cn_namelen
);
887 * policy: when creating the shadow directory in the
888 * upper layer, create it owned by the user who did
889 * the mount, group from parent directory, and mode
890 * 777 modified by umask (ie mostly identical to the
891 * mkdir syscall). (jsp, kb)
896 va
.va_mode
= um
->um_cmode
;
898 error
= VOP_MKDIR(dvp
, vpp
, &cn
, &va
);
904 * Create a whiteout entry in the upper layer.
906 * (um) points to the union mount structure for access to the
907 * the mounting process's credentials.
908 * (dvp) is the directory in which to create the whiteout.
909 * it is locked on entry and return.
910 * (cnp) is the componentname to be created.
913 union_mkwhiteout(struct union_mount
*um
, struct vnode
*dvp
,
914 struct componentname
*cnp
, char *path
)
917 struct thread
*td
= cnp
->cn_td
;
919 struct componentname cn
;
922 KKASSERT(td
->td_proc
);
923 cred
= td
->td_proc
->p_ucred
;
925 error
= union_relookup(um
, dvp
, &wvp
, cnp
, &cn
, path
, strlen(path
));
937 error
= VOP_WHITEOUT(dvp
, &cn
, NAMEI_CREATE
);
942 * union_vn_create: creates and opens a new shadow file
943 * on the upper union layer. this function is similar
944 * in spirit to calling vn_open but it avoids calling namei().
945 * the problem with calling namei is that a) it locks too many
946 * things, and b) it doesn't start at the "right" directory,
947 * whereas relookup is told where to start.
949 * On entry, the vnode associated with un is locked. It remains locked
952 * If no error occurs, *vpp contains a locked referenced vnode for your
953 * use. If an error occurs *vpp iis undefined.
956 union_vn_create(struct vnode
**vpp
, struct union_node
*un
, struct thread
*td
)
961 struct vattr
*vap
= &vat
;
962 int fmode
= FFLAGS(O_WRONLY
|O_CREAT
|O_TRUNC
|O_EXCL
);
965 struct componentname cn
;
967 KKASSERT(td
->td_proc
);
968 cred
= td
->td_proc
->p_ucred
;
969 cmode
= UN_FILEMODE
& ~td
->td_proc
->p_fd
->fd_cmask
;
974 * Build a new componentname structure (for the same
975 * reasons outlines in union_mkshadow).
976 * The difference here is that the file is owned by
977 * the current user, rather than by the person who
978 * did the mount, since the current user needs to be
979 * able to write the file (that's why it is being
980 * copied in the first place).
982 cn
.cn_namelen
= strlen(un
->un_path
);
983 cn
.cn_nameptr
= objcache_get(namei_oc
, M_WAITOK
);
984 bcopy(un
->un_path
, cn
.cn_nameptr
, cn
.cn_namelen
+1);
985 cn
.cn_nameiop
= NAMEI_CREATE
;
986 cn
.cn_flags
= CNP_LOCKPARENT
;
992 * Pass dvp unlocked and referenced on call to relookup().
994 * If an error occurs, dvp will be returned unlocked and dereferenced.
997 error
= relookup(un
->un_dirvp
, &vp
, &cn
);
998 objcache_put(namei_oc
, cn
.cn_nameptr
);
1003 * If no error occurs, dvp will be returned locked with the reference
1004 * left as before, and vpp will be returned referenced and locked.
1008 if (vp
== un
->un_dirvp
)
1016 * Good - there was no race to create the file
1017 * so go ahead and create it. The permissions
1018 * on the file will be 0666 modified by the
1019 * current user's umask. Access to the file, while
1020 * it is unioned, will require access to the top *and*
1021 * bottom files. Access when not unioned will simply
1022 * require access to the top-level file.
1023 * TODO: confirm choice of access permissions.
1026 vap
->va_type
= VREG
;
1027 vap
->va_mode
= cmode
;
1028 error
= VOP_CREATE(un
->un_dirvp
, &vp
, &cn
, vap
);
1033 error
= VOP_OPEN(vp
, fmode
, cred
, NULL
);
1043 union_vn_close(struct vnode
*vp
, int fmode
, struct ucred
*cred
)
1045 return (VOP_CLOSE(vp
, fmode
));
1051 * union_removed_upper:
1053 * called with union_node unlocked. XXX
1057 union_removed_upper(struct union_node
*un
)
1059 struct thread
*td
= curthread
; /* XXX */
1063 * Do not set the uppervp to NULLVP. If lowervp is NULLVP,
1064 * union node will have neither uppervp nor lowervp. We remove
1065 * the union node from cache, so that it will not be referrenced.
1067 union_newupper(un
, NULLVP
);
1068 if (un
->un_dircache
!= 0) {
1069 for (vpp
= un
->un_dircache
; *vpp
!= NULLVP
; vpp
++)
1071 kfree(un
->un_dircache
, M_TEMP
);
1072 un
->un_dircache
= 0;
1075 if (un
->un_flags
& UN_CACHED
) {
1076 un
->un_flags
&= ~UN_CACHED
;
1077 LIST_REMOVE(un
, un_cache
);
1084 * determine whether a whiteout is needed
1085 * during a remove/rmdir operation.
1088 union_dowhiteout(struct union_node
*un
, struct ucred
*cred
, struct thread
*td
)
1092 if (un
->un_lowervp
!= NULLVP
)
1095 if (VOP_GETATTR(un
->un_uppervp
, &va
) == 0 &&
1096 (va
.va_flags
& OPAQUE
))
1103 union_dircache_r(struct vnode
*vp
, struct vnode
***vppp
, int *cntp
)
1105 struct union_node
*un
;
1107 if (vp
->v_tag
!= VT_UNION
) {
1112 panic("union: dircache table too small");
1121 if (un
->un_uppervp
!= NULLVP
)
1122 union_dircache_r(un
->un_uppervp
, vppp
, cntp
);
1123 if (un
->un_lowervp
!= NULLVP
)
1124 union_dircache_r(un
->un_lowervp
, vppp
, cntp
);
1128 union_dircache(struct vnode
*vp
, struct thread
*td
)
1133 struct vnode
**dircache
;
1134 struct union_node
*un
;
1137 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
1138 dircache
= VTOUNION(vp
)->un_dircache
;
1142 if (dircache
== NULL
) {
1144 union_dircache_r(vp
, 0, &cnt
);
1146 dircache
= malloc(cnt
* sizeof(struct vnode
*),
1149 union_dircache_r(vp
, &vpp
, &cnt
);
1155 if (*vpp
++ == VTOUNION(vp
)->un_uppervp
)
1157 } while (*vpp
!= NULLVP
);
1163 /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);*/
1164 UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp
, (*vpp
? (*vpp
)->v_sysref
.refcnt
: -99)));
1166 error
= union_allocvp(&nvp
, vp
->v_mount
, NULLVP
, NULLVP
, NULL
, *vpp
, NULLVP
, 0);
1167 UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp
, (*vpp
? (*vpp
)->v_sysref
.refcnt
: -99)));
1171 VTOUNION(vp
)->un_dircache
= 0;
1173 un
->un_dircache
= dircache
;
1181 * Guarentee coherency with the VM cache by invalidating any clean VM pages
1182 * associated with this write and updating any dirty VM pages. Since our
1183 * vnode is locked, other processes will not be able to read the pages in
1184 * again until after our write completes.
1186 * We also have to be coherent with reads, by flushing any pending dirty
1187 * pages prior to issuing the read.
1189 * XXX this is somewhat of a hack at the moment. To support this properly
1190 * we would have to be able to run VOP_READ and VOP_WRITE through the VM
1191 * cache. Then we wouldn't need to worry about coherency.
1195 union_vm_coherency(struct vnode
*vp
, struct uio
*uio
, int cleanfls
)
1202 if ((object
= vp
->v_object
) == NULL
)
1205 pgoff
= uio
->uio_offset
& PAGE_MASK
;
1206 pstart
= uio
->uio_offset
/ PAGE_SIZE
;
1207 pend
= pstart
+ (uio
->uio_resid
+ pgoff
+ PAGE_MASK
) / PAGE_SIZE
;
1209 vm_object_page_clean(object
, pstart
, pend
, OBJPC_SYNC
);
1211 vm_object_page_remove(object
, pstart
, pend
, TRUE
);
1215 * Module glue to remove #ifdef UNION from vfs_syscalls.c
1218 union_dircheck(struct thread
*td
, struct vnode
**vp
, struct file
*fp
)
1222 if ((*vp
)->v_tag
== VT_UNION
) {
1225 lvp
= union_dircache(*vp
, td
);
1226 if (lvp
!= NULLVP
) {
1230 * If the directory is opaque,
1231 * then don't show lower entries
1233 error
= VOP_GETATTR(*vp
, &va
);
1234 if (va
.va_flags
& OPAQUE
) {
1240 if (lvp
!= NULLVP
) {
1241 error
= VOP_OPEN(lvp
, FREAD
, fp
->f_cred
, NULL
);
1249 error
= vn_close(*vp
, FREAD
);
1253 return -1; /* goto unionread */
1260 union_modevent(module_t mod
, int type
, void *data
)
1264 union_dircheckp
= union_dircheck
;
1267 union_dircheckp
= NULL
;
1275 static moduledata_t union_mod
= {
1281 DECLARE_MODULE(union_dircheck
, union_mod
, SI_SUB_VFS
, SI_ORDER_ANY
);