2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.63 2008/06/10 05:06:20 dillon Exp $
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/namecache.h>
42 #include <sys/vnode.h>
43 #include <sys/lockf.h>
44 #include <sys/event.h>
46 #include <sys/dirent.h>
47 #include <vm/vm_extern.h>
48 #include <vfs/fifofs/fifo.h>
54 /*static int hammer_vop_vnoperate(struct vop_generic_args *);*/
55 static int hammer_vop_fsync(struct vop_fsync_args
*);
56 static int hammer_vop_read(struct vop_read_args
*);
57 static int hammer_vop_write(struct vop_write_args
*);
58 static int hammer_vop_access(struct vop_access_args
*);
59 static int hammer_vop_advlock(struct vop_advlock_args
*);
60 static int hammer_vop_close(struct vop_close_args
*);
61 static int hammer_vop_ncreate(struct vop_ncreate_args
*);
62 static int hammer_vop_getattr(struct vop_getattr_args
*);
63 static int hammer_vop_nresolve(struct vop_nresolve_args
*);
64 static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args
*);
65 static int hammer_vop_nlink(struct vop_nlink_args
*);
66 static int hammer_vop_nmkdir(struct vop_nmkdir_args
*);
67 static int hammer_vop_nmknod(struct vop_nmknod_args
*);
68 static int hammer_vop_open(struct vop_open_args
*);
69 static int hammer_vop_pathconf(struct vop_pathconf_args
*);
70 static int hammer_vop_print(struct vop_print_args
*);
71 static int hammer_vop_readdir(struct vop_readdir_args
*);
72 static int hammer_vop_readlink(struct vop_readlink_args
*);
73 static int hammer_vop_nremove(struct vop_nremove_args
*);
74 static int hammer_vop_nrename(struct vop_nrename_args
*);
75 static int hammer_vop_nrmdir(struct vop_nrmdir_args
*);
76 static int hammer_vop_setattr(struct vop_setattr_args
*);
77 static int hammer_vop_strategy(struct vop_strategy_args
*);
78 static int hammer_vop_nsymlink(struct vop_nsymlink_args
*);
79 static int hammer_vop_nwhiteout(struct vop_nwhiteout_args
*);
80 static int hammer_vop_ioctl(struct vop_ioctl_args
*);
81 static int hammer_vop_mountctl(struct vop_mountctl_args
*);
83 static int hammer_vop_fifoclose (struct vop_close_args
*);
84 static int hammer_vop_fiforead (struct vop_read_args
*);
85 static int hammer_vop_fifowrite (struct vop_write_args
*);
87 static int hammer_vop_specclose (struct vop_close_args
*);
88 static int hammer_vop_specread (struct vop_read_args
*);
89 static int hammer_vop_specwrite (struct vop_write_args
*);
91 struct vop_ops hammer_vnode_vops
= {
92 .vop_default
= vop_defaultop
,
93 .vop_fsync
= hammer_vop_fsync
,
94 .vop_getpages
= vop_stdgetpages
,
95 .vop_putpages
= vop_stdputpages
,
96 .vop_read
= hammer_vop_read
,
97 .vop_write
= hammer_vop_write
,
98 .vop_access
= hammer_vop_access
,
99 .vop_advlock
= hammer_vop_advlock
,
100 .vop_close
= hammer_vop_close
,
101 .vop_ncreate
= hammer_vop_ncreate
,
102 .vop_getattr
= hammer_vop_getattr
,
103 .vop_inactive
= hammer_vop_inactive
,
104 .vop_reclaim
= hammer_vop_reclaim
,
105 .vop_nresolve
= hammer_vop_nresolve
,
106 .vop_nlookupdotdot
= hammer_vop_nlookupdotdot
,
107 .vop_nlink
= hammer_vop_nlink
,
108 .vop_nmkdir
= hammer_vop_nmkdir
,
109 .vop_nmknod
= hammer_vop_nmknod
,
110 .vop_open
= hammer_vop_open
,
111 .vop_pathconf
= hammer_vop_pathconf
,
112 .vop_print
= hammer_vop_print
,
113 .vop_readdir
= hammer_vop_readdir
,
114 .vop_readlink
= hammer_vop_readlink
,
115 .vop_nremove
= hammer_vop_nremove
,
116 .vop_nrename
= hammer_vop_nrename
,
117 .vop_nrmdir
= hammer_vop_nrmdir
,
118 .vop_setattr
= hammer_vop_setattr
,
119 .vop_strategy
= hammer_vop_strategy
,
120 .vop_nsymlink
= hammer_vop_nsymlink
,
121 .vop_nwhiteout
= hammer_vop_nwhiteout
,
122 .vop_ioctl
= hammer_vop_ioctl
,
123 .vop_mountctl
= hammer_vop_mountctl
126 struct vop_ops hammer_spec_vops
= {
127 .vop_default
= spec_vnoperate
,
128 .vop_fsync
= hammer_vop_fsync
,
129 .vop_read
= hammer_vop_specread
,
130 .vop_write
= hammer_vop_specwrite
,
131 .vop_access
= hammer_vop_access
,
132 .vop_close
= hammer_vop_specclose
,
133 .vop_getattr
= hammer_vop_getattr
,
134 .vop_inactive
= hammer_vop_inactive
,
135 .vop_reclaim
= hammer_vop_reclaim
,
136 .vop_setattr
= hammer_vop_setattr
139 struct vop_ops hammer_fifo_vops
= {
140 .vop_default
= fifo_vnoperate
,
141 .vop_fsync
= hammer_vop_fsync
,
142 .vop_read
= hammer_vop_fiforead
,
143 .vop_write
= hammer_vop_fifowrite
,
144 .vop_access
= hammer_vop_access
,
145 .vop_close
= hammer_vop_fifoclose
,
146 .vop_getattr
= hammer_vop_getattr
,
147 .vop_inactive
= hammer_vop_inactive
,
148 .vop_reclaim
= hammer_vop_reclaim
,
149 .vop_setattr
= hammer_vop_setattr
152 #ifdef DEBUG_TRUNCATE
153 struct hammer_inode
*HammerTruncIp
;
156 static int hammer_dounlink(hammer_transaction_t trans
, struct nchandle
*nch
,
157 struct vnode
*dvp
, struct ucred
*cred
, int flags
);
158 static int hammer_vop_strategy_read(struct vop_strategy_args
*ap
);
159 static int hammer_vop_strategy_write(struct vop_strategy_args
*ap
);
160 static void hammer_cleanup_write_io(hammer_inode_t ip
);
161 static void hammer_update_rsv_databufs(hammer_inode_t ip
);
166 hammer_vop_vnoperate(struct vop_generic_args
*)
168 return (VOCALL(&hammer_vnode_vops
, ap
));
173 * hammer_vop_fsync { vp, waitfor }
177 hammer_vop_fsync(struct vop_fsync_args
*ap
)
179 hammer_inode_t ip
= VTOI(ap
->a_vp
);
181 hammer_flush_inode(ip
, HAMMER_FLUSH_SIGNAL
);
182 vfsync(ap
->a_vp
, ap
->a_waitfor
, 1, NULL
, NULL
);
183 if (ap
->a_waitfor
== MNT_WAIT
)
184 hammer_wait_inode(ip
);
189 * hammer_vop_read { vp, uio, ioflag, cred }
193 hammer_vop_read(struct vop_read_args
*ap
)
195 struct hammer_transaction trans
;
204 if (ap
->a_vp
->v_type
!= VREG
)
208 seqcount
= ap
->a_ioflag
>> 16;
210 hammer_start_transaction(&trans
, ip
->hmp
);
213 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
216 while (uio
->uio_resid
> 0 && uio
->uio_offset
< ip
->ino_data
.size
) {
217 offset
= uio
->uio_offset
& HAMMER_BUFMASK
;
219 error
= cluster_read(ap
->a_vp
, ip
->ino_data
.size
,
220 uio
->uio_offset
- offset
, HAMMER_BUFSIZE
,
221 MAXBSIZE
, seqcount
, &bp
);
223 error
= bread(ap
->a_vp
, uio
->uio_offset
- offset
,
224 HAMMER_BUFSIZE
, &bp
);
229 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
230 n
= HAMMER_BUFSIZE
- offset
;
231 if (n
> uio
->uio_resid
)
233 if (n
> ip
->ino_data
.size
- uio
->uio_offset
)
234 n
= (int)(ip
->ino_data
.size
- uio
->uio_offset
);
235 error
= uiomove((char *)bp
->b_data
+ offset
, n
, uio
);
242 if ((ip
->flags
& HAMMER_INODE_RO
) == 0 &&
243 (ip
->hmp
->mp
->mnt_flag
& MNT_NOATIME
) == 0) {
244 ip
->ino_leaf
.atime
= trans
.time
;
245 hammer_modify_inode(ip
, HAMMER_INODE_ITIMES
);
247 hammer_done_transaction(&trans
);
252 * hammer_vop_write { vp, uio, ioflag, cred }
256 hammer_vop_write(struct vop_write_args
*ap
)
258 struct hammer_transaction trans
;
259 struct hammer_inode
*ip
;
269 if (ap
->a_vp
->v_type
!= VREG
)
274 if (ip
->flags
& HAMMER_INODE_RO
)
278 * Create a transaction to cover the operations we perform.
280 hammer_start_transaction(&trans
, ip
->hmp
);
286 if (ap
->a_ioflag
& IO_APPEND
)
287 uio
->uio_offset
= ip
->ino_data
.size
;
290 * Check for illegal write offsets. Valid range is 0...2^63-1
292 if (uio
->uio_offset
< 0 || uio
->uio_offset
+ uio
->uio_resid
<= 0) {
293 hammer_done_transaction(&trans
);
298 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
301 while (uio
->uio_resid
> 0) {
304 if ((error
= hammer_checkspace(trans
.hmp
)) != 0)
308 * Do not allow HAMMER to blow out the buffer cache.
310 * Do not allow HAMMER to blow out system memory by
311 * accumulating too many records. Records are decoupled
312 * from the buffer cache.
314 * Always check at the beginning so separate writes are
315 * not able to bypass this code.
317 * WARNING: Cannot unlock vp when doing a NOCOPY write as
318 * part of a putpages operation. Doing so could cause us
319 * to deadlock against the VM system when we try to re-lock.
321 if ((count
++ & 15) == 0) {
322 if (uio
->uio_segflg
!= UIO_NOCOPY
) {
324 if ((ap
->a_ioflag
& IO_NOBWILL
) == 0)
327 if (ip
->rsv_recs
> hammer_limit_irecs
) {
328 hammer_flush_inode(ip
, HAMMER_FLUSH_SIGNAL
);
329 hammer_wait_inode(ip
);
331 if (uio
->uio_segflg
!= UIO_NOCOPY
)
332 vn_lock(ap
->a_vp
, LK_EXCLUSIVE
|LK_RETRY
);
335 rel_offset
= (int)(uio
->uio_offset
& HAMMER_BUFMASK
);
336 base_offset
= uio
->uio_offset
& ~HAMMER_BUFMASK64
;
337 n
= HAMMER_BUFSIZE
- rel_offset
;
338 if (n
> uio
->uio_resid
)
340 if (uio
->uio_offset
+ n
> ip
->ino_data
.size
) {
341 vnode_pager_setsize(ap
->a_vp
, uio
->uio_offset
+ n
);
345 if (uio
->uio_segflg
== UIO_NOCOPY
) {
347 * Issuing a write with the same data backing the
348 * buffer. Instantiate the buffer to collect the
349 * backing vm pages, then read-in any missing bits.
351 * This case is used by vop_stdputpages().
353 bp
= getblk(ap
->a_vp
, base_offset
,
354 HAMMER_BUFSIZE
, GETBLK_BHEAVY
, 0);
355 if ((bp
->b_flags
& B_CACHE
) == 0) {
357 error
= bread(ap
->a_vp
, base_offset
,
358 HAMMER_BUFSIZE
, &bp
);
360 } else if (rel_offset
== 0 && uio
->uio_resid
>= HAMMER_BUFSIZE
) {
362 * Even though we are entirely overwriting the buffer
363 * we may still have to zero it out to avoid a
364 * mmap/write visibility issue.
366 bp
= getblk(ap
->a_vp
, base_offset
,
367 HAMMER_BUFSIZE
, GETBLK_BHEAVY
, 0);
368 if ((bp
->b_flags
& B_CACHE
) == 0)
370 } else if (base_offset
>= ip
->ino_data
.size
) {
372 * If the base offset of the buffer is beyond the
373 * file EOF, we don't have to issue a read.
375 bp
= getblk(ap
->a_vp
, base_offset
,
376 HAMMER_BUFSIZE
, GETBLK_BHEAVY
, 0);
380 * Partial overwrite, read in any missing bits then
381 * replace the portion being written.
383 error
= bread(ap
->a_vp
, base_offset
,
384 HAMMER_BUFSIZE
, &bp
);
389 error
= uiomove((char *)bp
->b_data
+ rel_offset
,
394 * If we screwed up we have to undo any VM size changes we
400 vtruncbuf(ap
->a_vp
, ip
->ino_data
.size
,
405 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
406 if (ip
->ino_data
.size
< uio
->uio_offset
) {
407 ip
->ino_data
.size
= uio
->uio_offset
;
408 flags
= HAMMER_INODE_DDIRTY
;
409 vnode_pager_setsize(ap
->a_vp
, ip
->ino_data
.size
);
413 ip
->ino_data
.mtime
= trans
.time
;
414 flags
|= HAMMER_INODE_ITIMES
| HAMMER_INODE_BUFS
;
415 flags
|= HAMMER_INODE_DDIRTY
; /* XXX mtime */
416 hammer_modify_inode(ip
, flags
);
419 * Try to keep track of cached dirty data.
421 if ((bp
->b_flags
& B_DIRTY
) == 0) {
423 ++ip
->hmp
->rsv_databufs
;
427 * Final buffer disposition.
429 if (ap
->a_ioflag
& IO_SYNC
) {
431 } else if (ap
->a_ioflag
& IO_DIRECT
) {
434 } else if ((ap
->a_ioflag
>> 16) == IO_SEQMAX
&&
435 (uio
->uio_offset
& HAMMER_BUFMASK
) == 0) {
437 * If seqcount indicates sequential operation and
438 * we just finished filling a buffer, push it out
439 * now to prevent the buffer cache from becoming
440 * too full, which would trigger non-optimal
449 hammer_done_transaction(&trans
);
454 * hammer_vop_access { vp, mode, cred }
458 hammer_vop_access(struct vop_access_args
*ap
)
460 struct hammer_inode
*ip
= VTOI(ap
->a_vp
);
465 uid
= hammer_to_unix_xid(&ip
->ino_data
.uid
);
466 gid
= hammer_to_unix_xid(&ip
->ino_data
.gid
);
468 error
= vop_helper_access(ap
, uid
, gid
, ip
->ino_data
.mode
,
469 ip
->ino_data
.uflags
);
474 * hammer_vop_advlock { vp, id, op, fl, flags }
478 hammer_vop_advlock(struct vop_advlock_args
*ap
)
480 struct hammer_inode
*ip
= VTOI(ap
->a_vp
);
482 return (lf_advlock(ap
, &ip
->advlock
, ip
->ino_data
.size
));
486 * hammer_vop_close { vp, fflag }
490 hammer_vop_close(struct vop_close_args
*ap
)
492 return (vop_stdclose(ap
));
496 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
498 * The operating system has already ensured that the directory entry
499 * does not exist and done all appropriate namespace locking.
503 hammer_vop_ncreate(struct vop_ncreate_args
*ap
)
505 struct hammer_transaction trans
;
506 struct hammer_inode
*dip
;
507 struct hammer_inode
*nip
;
508 struct nchandle
*nch
;
512 dip
= VTOI(ap
->a_dvp
);
514 if (dip
->flags
& HAMMER_INODE_RO
)
516 if ((error
= hammer_checkspace(dip
->hmp
)) != 0)
520 * Create a transaction to cover the operations we perform.
522 hammer_start_transaction(&trans
, dip
->hmp
);
525 * Create a new filesystem object of the requested type. The
526 * returned inode will be referenced and shared-locked to prevent
527 * it from being moved to the flusher.
530 error
= hammer_create_inode(&trans
, ap
->a_vap
, ap
->a_cred
, dip
, &nip
);
532 hkprintf("hammer_create_inode error %d\n", error
);
533 hammer_done_transaction(&trans
);
539 * Add the new filesystem object to the directory. This will also
540 * bump the inode's link count.
542 error
= hammer_ip_add_directory(&trans
, dip
, nch
->ncp
, nip
);
544 hkprintf("hammer_ip_add_directory error %d\n", error
);
550 hammer_rel_inode(nip
, 0);
551 hammer_done_transaction(&trans
);
554 error
= hammer_get_vnode(nip
, ap
->a_vpp
);
555 hammer_done_transaction(&trans
);
556 hammer_rel_inode(nip
, 0);
558 cache_setunresolved(ap
->a_nch
);
559 cache_setvp(ap
->a_nch
, *ap
->a_vpp
);
566 * hammer_vop_getattr { vp, vap }
568 * Retrieve an inode's attribute information. When accessing inodes
569 * historically we fake the atime field to ensure consistent results.
570 * The atime field is stored in the B-Tree element and allowed to be
571 * updated without cycling the element.
575 hammer_vop_getattr(struct vop_getattr_args
*ap
)
577 struct hammer_inode
*ip
= VTOI(ap
->a_vp
);
578 struct vattr
*vap
= ap
->a_vap
;
581 if (cache_check_fsmid_vp(ap
->a_vp
, &ip
->fsmid
) &&
582 (vp
->v_mount
->mnt_flag
& MNT_RDONLY
) == 0 &&
587 hammer_itimes(ap
->a_vp
);
590 vap
->va_fsid
= ip
->hmp
->fsid_udev
;
591 vap
->va_fileid
= ip
->ino_leaf
.base
.obj_id
;
592 vap
->va_mode
= ip
->ino_data
.mode
;
593 vap
->va_nlink
= ip
->ino_data
.nlinks
;
594 vap
->va_uid
= hammer_to_unix_xid(&ip
->ino_data
.uid
);
595 vap
->va_gid
= hammer_to_unix_xid(&ip
->ino_data
.gid
);
598 vap
->va_size
= ip
->ino_data
.size
;
599 if (ip
->flags
& HAMMER_INODE_RO
)
600 hammer_to_timespec(ip
->ino_data
.mtime
, &vap
->va_atime
);
602 hammer_to_timespec(ip
->ino_leaf
.atime
, &vap
->va_atime
);
603 hammer_to_timespec(ip
->ino_data
.mtime
, &vap
->va_mtime
);
604 hammer_to_timespec(ip
->ino_data
.ctime
, &vap
->va_ctime
);
605 vap
->va_flags
= ip
->ino_data
.uflags
;
606 vap
->va_gen
= 1; /* hammer inums are unique for all time */
607 vap
->va_blocksize
= HAMMER_BUFSIZE
;
608 vap
->va_bytes
= (ip
->ino_data
.size
+ 63) & ~63;
609 vap
->va_type
= hammer_get_vnode_type(ip
->ino_data
.obj_type
);
610 vap
->va_filerev
= 0; /* XXX */
611 /* mtime uniquely identifies any adjustments made to the file */
612 vap
->va_fsmid
= ip
->ino_data
.mtime
;
613 vap
->va_uid_uuid
= ip
->ino_data
.uid
;
614 vap
->va_gid_uuid
= ip
->ino_data
.gid
;
615 vap
->va_fsid_uuid
= ip
->hmp
->fsid
;
616 vap
->va_vaflags
= VA_UID_UUID_VALID
| VA_GID_UUID_VALID
|
619 switch (ip
->ino_data
.obj_type
) {
620 case HAMMER_OBJTYPE_CDEV
:
621 case HAMMER_OBJTYPE_BDEV
:
622 vap
->va_rmajor
= ip
->ino_data
.rmajor
;
623 vap
->va_rminor
= ip
->ino_data
.rminor
;
633 * hammer_vop_nresolve { nch, dvp, cred }
635 * Locate the requested directory entry.
639 hammer_vop_nresolve(struct vop_nresolve_args
*ap
)
641 struct hammer_transaction trans
;
642 struct namecache
*ncp
;
646 struct hammer_cursor cursor
;
656 * Misc initialization, plus handle as-of name extensions. Look for
657 * the '@@' extension. Note that as-of files and directories cannot
660 dip
= VTOI(ap
->a_dvp
);
661 ncp
= ap
->a_nch
->ncp
;
662 asof
= dip
->obj_asof
;
666 hammer_simple_transaction(&trans
, dip
->hmp
);
668 for (i
= 0; i
< nlen
; ++i
) {
669 if (ncp
->nc_name
[i
] == '@' && ncp
->nc_name
[i
+1] == '@') {
670 asof
= hammer_str_to_tid(ncp
->nc_name
+ i
+ 2);
671 flags
|= HAMMER_INODE_RO
;
678 * If there is no path component the time extension is relative to
682 ip
= hammer_get_inode(&trans
, &dip
->cache
[1], dip
->obj_id
,
683 asof
, flags
, &error
);
685 error
= hammer_get_vnode(ip
, &vp
);
686 hammer_rel_inode(ip
, 0);
692 cache_setvp(ap
->a_nch
, vp
);
699 * Calculate the namekey and setup the key range for the scan. This
700 * works kinda like a chained hash table where the lower 32 bits
701 * of the namekey synthesize the chain.
703 * The key range is inclusive of both key_beg and key_end.
705 namekey
= hammer_directory_namekey(ncp
->nc_name
, nlen
);
707 error
= hammer_init_cursor(&trans
, &cursor
, &dip
->cache
[0], dip
);
708 cursor
.key_beg
.localization
= HAMMER_LOCALIZE_MISC
;
709 cursor
.key_beg
.obj_id
= dip
->obj_id
;
710 cursor
.key_beg
.key
= namekey
;
711 cursor
.key_beg
.create_tid
= 0;
712 cursor
.key_beg
.delete_tid
= 0;
713 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DIRENTRY
;
714 cursor
.key_beg
.obj_type
= 0;
716 cursor
.key_end
= cursor
.key_beg
;
717 cursor
.key_end
.key
|= 0xFFFFFFFFULL
;
719 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
| HAMMER_CURSOR_ASOF
;
722 * Scan all matching records (the chain), locate the one matching
723 * the requested path component.
725 * The hammer_ip_*() functions merge in-memory records with on-disk
726 * records for the purposes of the search.
731 error
= hammer_ip_first(&cursor
);
733 error
= hammer_ip_resolve_data(&cursor
);
736 if (nlen
== cursor
.leaf
->data_len
- HAMMER_ENTRY_NAME_OFF
&&
737 bcmp(ncp
->nc_name
, cursor
.data
->entry
.name
, nlen
) == 0) {
738 obj_id
= cursor
.data
->entry
.obj_id
;
741 error
= hammer_ip_next(&cursor
);
744 hammer_done_cursor(&cursor
);
746 ip
= hammer_get_inode(&trans
, &dip
->cache
[1],
747 obj_id
, asof
, flags
, &error
);
749 error
= hammer_get_vnode(ip
, &vp
);
750 hammer_rel_inode(ip
, 0);
756 cache_setvp(ap
->a_nch
, vp
);
759 } else if (error
== ENOENT
) {
760 cache_setvp(ap
->a_nch
, NULL
);
763 hammer_done_transaction(&trans
);
768 * hammer_vop_nlookupdotdot { dvp, vpp, cred }
770 * Locate the parent directory of a directory vnode.
772 * dvp is referenced but not locked. *vpp must be returned referenced and
773 * locked. A parent_obj_id of 0 does not necessarily indicate that we are
774 * at the root, instead it could indicate that the directory we were in was
777 * NOTE: as-of sequences are not linked into the directory structure. If
778 * we are at the root with a different asof then the mount point, reload
779 * the same directory with the mount point's asof. I'm not sure what this
780 * will do to NFS. We encode ASOF stamps in NFS file handles so it might not
781 * get confused, but it hasn't been tested.
785 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args
*ap
)
787 struct hammer_transaction trans
;
788 struct hammer_inode
*dip
;
789 struct hammer_inode
*ip
;
790 int64_t parent_obj_id
;
794 dip
= VTOI(ap
->a_dvp
);
795 asof
= dip
->obj_asof
;
796 parent_obj_id
= dip
->ino_data
.parent_obj_id
;
798 if (parent_obj_id
== 0) {
799 if (dip
->obj_id
== HAMMER_OBJID_ROOT
&&
800 asof
!= dip
->hmp
->asof
) {
801 parent_obj_id
= dip
->obj_id
;
802 asof
= dip
->hmp
->asof
;
803 *ap
->a_fakename
= kmalloc(19, M_TEMP
, M_WAITOK
);
804 ksnprintf(*ap
->a_fakename
, 19, "0x%016llx",
812 hammer_simple_transaction(&trans
, dip
->hmp
);
814 ip
= hammer_get_inode(&trans
, &dip
->cache
[1], parent_obj_id
,
815 asof
, dip
->flags
, &error
);
817 error
= hammer_get_vnode(ip
, ap
->a_vpp
);
818 hammer_rel_inode(ip
, 0);
822 hammer_done_transaction(&trans
);
827 * hammer_vop_nlink { nch, dvp, vp, cred }
831 hammer_vop_nlink(struct vop_nlink_args
*ap
)
833 struct hammer_transaction trans
;
834 struct hammer_inode
*dip
;
835 struct hammer_inode
*ip
;
836 struct nchandle
*nch
;
840 dip
= VTOI(ap
->a_dvp
);
843 if (dip
->flags
& HAMMER_INODE_RO
)
845 if (ip
->flags
& HAMMER_INODE_RO
)
847 if ((error
= hammer_checkspace(dip
->hmp
)) != 0)
851 * Create a transaction to cover the operations we perform.
853 hammer_start_transaction(&trans
, dip
->hmp
);
856 * Add the filesystem object to the directory. Note that neither
857 * dip nor ip are referenced or locked, but their vnodes are
858 * referenced. This function will bump the inode's link count.
860 error
= hammer_ip_add_directory(&trans
, dip
, nch
->ncp
, ip
);
866 cache_setunresolved(nch
);
867 cache_setvp(nch
, ap
->a_vp
);
869 hammer_done_transaction(&trans
);
874 * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
876 * The operating system has already ensured that the directory entry
877 * does not exist and done all appropriate namespace locking.
881 hammer_vop_nmkdir(struct vop_nmkdir_args
*ap
)
883 struct hammer_transaction trans
;
884 struct hammer_inode
*dip
;
885 struct hammer_inode
*nip
;
886 struct nchandle
*nch
;
890 dip
= VTOI(ap
->a_dvp
);
892 if (dip
->flags
& HAMMER_INODE_RO
)
894 if ((error
= hammer_checkspace(dip
->hmp
)) != 0)
898 * Create a transaction to cover the operations we perform.
900 hammer_start_transaction(&trans
, dip
->hmp
);
903 * Create a new filesystem object of the requested type. The
904 * returned inode will be referenced but not locked.
906 error
= hammer_create_inode(&trans
, ap
->a_vap
, ap
->a_cred
, dip
, &nip
);
908 hkprintf("hammer_mkdir error %d\n", error
);
909 hammer_done_transaction(&trans
);
914 * Add the new filesystem object to the directory. This will also
915 * bump the inode's link count.
917 error
= hammer_ip_add_directory(&trans
, dip
, nch
->ncp
, nip
);
919 hkprintf("hammer_mkdir (add) error %d\n", error
);
925 hammer_rel_inode(nip
, 0);
928 error
= hammer_get_vnode(nip
, ap
->a_vpp
);
929 hammer_rel_inode(nip
, 0);
931 cache_setunresolved(ap
->a_nch
);
932 cache_setvp(ap
->a_nch
, *ap
->a_vpp
);
935 hammer_done_transaction(&trans
);
940 * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
942 * The operating system has already ensured that the directory entry
943 * does not exist and done all appropriate namespace locking.
947 hammer_vop_nmknod(struct vop_nmknod_args
*ap
)
949 struct hammer_transaction trans
;
950 struct hammer_inode
*dip
;
951 struct hammer_inode
*nip
;
952 struct nchandle
*nch
;
956 dip
= VTOI(ap
->a_dvp
);
958 if (dip
->flags
& HAMMER_INODE_RO
)
960 if ((error
= hammer_checkspace(dip
->hmp
)) != 0)
964 * Create a transaction to cover the operations we perform.
966 hammer_start_transaction(&trans
, dip
->hmp
);
969 * Create a new filesystem object of the requested type. The
970 * returned inode will be referenced but not locked.
972 error
= hammer_create_inode(&trans
, ap
->a_vap
, ap
->a_cred
, dip
, &nip
);
974 hammer_done_transaction(&trans
);
980 * Add the new filesystem object to the directory. This will also
981 * bump the inode's link count.
983 error
= hammer_ip_add_directory(&trans
, dip
, nch
->ncp
, nip
);
989 hammer_rel_inode(nip
, 0);
992 error
= hammer_get_vnode(nip
, ap
->a_vpp
);
993 hammer_rel_inode(nip
, 0);
995 cache_setunresolved(ap
->a_nch
);
996 cache_setvp(ap
->a_nch
, *ap
->a_vpp
);
999 hammer_done_transaction(&trans
);
1004 * hammer_vop_open { vp, mode, cred, fp }
1008 hammer_vop_open(struct vop_open_args
*ap
)
1012 ip
= VTOI(ap
->a_vp
);
1014 if ((ap
->a_mode
& FWRITE
) && (ip
->flags
& HAMMER_INODE_RO
))
1016 return(vop_stdopen(ap
));
1020 * hammer_vop_pathconf { vp, name, retval }
1024 hammer_vop_pathconf(struct vop_pathconf_args
*ap
)
1030 * hammer_vop_print { vp }
1034 hammer_vop_print(struct vop_print_args
*ap
)
1040 * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies }
1044 hammer_vop_readdir(struct vop_readdir_args
*ap
)
1046 struct hammer_transaction trans
;
1047 struct hammer_cursor cursor
;
1048 struct hammer_inode
*ip
;
1050 hammer_base_elm_t base
;
1058 ip
= VTOI(ap
->a_vp
);
1060 saveoff
= uio
->uio_offset
;
1062 if (ap
->a_ncookies
) {
1063 ncookies
= uio
->uio_resid
/ 16 + 1;
1064 if (ncookies
> 1024)
1066 cookies
= kmalloc(ncookies
* sizeof(off_t
), M_TEMP
, M_WAITOK
);
1074 hammer_simple_transaction(&trans
, ip
->hmp
);
1077 * Handle artificial entries
1081 r
= vop_write_dirent(&error
, uio
, ip
->obj_id
, DT_DIR
, 1, ".");
1085 cookies
[cookie_index
] = saveoff
;
1088 if (cookie_index
== ncookies
)
1092 if (ip
->ino_data
.parent_obj_id
) {
1093 r
= vop_write_dirent(&error
, uio
,
1094 ip
->ino_data
.parent_obj_id
,
1097 r
= vop_write_dirent(&error
, uio
,
1098 ip
->obj_id
, DT_DIR
, 2, "..");
1103 cookies
[cookie_index
] = saveoff
;
1106 if (cookie_index
== ncookies
)
1111 * Key range (begin and end inclusive) to scan. Directory keys
1112 * directly translate to a 64 bit 'seek' position.
1114 hammer_init_cursor(&trans
, &cursor
, &ip
->cache
[0], ip
);
1115 cursor
.key_beg
.localization
= HAMMER_LOCALIZE_MISC
;
1116 cursor
.key_beg
.obj_id
= ip
->obj_id
;
1117 cursor
.key_beg
.create_tid
= 0;
1118 cursor
.key_beg
.delete_tid
= 0;
1119 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DIRENTRY
;
1120 cursor
.key_beg
.obj_type
= 0;
1121 cursor
.key_beg
.key
= saveoff
;
1123 cursor
.key_end
= cursor
.key_beg
;
1124 cursor
.key_end
.key
= HAMMER_MAX_KEY
;
1125 cursor
.asof
= ip
->obj_asof
;
1126 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
| HAMMER_CURSOR_ASOF
;
1128 error
= hammer_ip_first(&cursor
);
1130 while (error
== 0) {
1131 error
= hammer_ip_resolve_data(&cursor
);
1134 base
= &cursor
.leaf
->base
;
1135 saveoff
= base
->key
;
1136 KKASSERT(cursor
.leaf
->data_len
> HAMMER_ENTRY_NAME_OFF
);
1138 if (base
->obj_id
!= ip
->obj_id
)
1139 panic("readdir: bad record at %p", cursor
.node
);
1141 r
= vop_write_dirent(
1142 &error
, uio
, cursor
.data
->entry
.obj_id
,
1143 hammer_get_dtype(cursor
.leaf
->base
.obj_type
),
1144 cursor
.leaf
->data_len
- HAMMER_ENTRY_NAME_OFF
,
1145 (void *)cursor
.data
->entry
.name
);
1150 cookies
[cookie_index
] = base
->key
;
1152 if (cookie_index
== ncookies
)
1154 error
= hammer_ip_next(&cursor
);
1156 hammer_done_cursor(&cursor
);
1159 hammer_done_transaction(&trans
);
1162 *ap
->a_eofflag
= (error
== ENOENT
);
1163 uio
->uio_offset
= saveoff
;
1164 if (error
&& cookie_index
== 0) {
1165 if (error
== ENOENT
)
1168 kfree(cookies
, M_TEMP
);
1169 *ap
->a_ncookies
= 0;
1170 *ap
->a_cookies
= NULL
;
1173 if (error
== ENOENT
)
1176 *ap
->a_ncookies
= cookie_index
;
1177 *ap
->a_cookies
= cookies
;
1184 * hammer_vop_readlink { vp, uio, cred }
1188 hammer_vop_readlink(struct vop_readlink_args
*ap
)
1190 struct hammer_transaction trans
;
1191 struct hammer_cursor cursor
;
1192 struct hammer_inode
*ip
;
1195 ip
= VTOI(ap
->a_vp
);
1198 * Shortcut if the symlink data was stuffed into ino_data.
1200 if (ip
->ino_data
.size
<= HAMMER_INODE_BASESYMLEN
) {
1201 error
= uiomove(ip
->ino_data
.ext
.symlink
,
1202 ip
->ino_data
.size
, ap
->a_uio
);
1209 hammer_simple_transaction(&trans
, ip
->hmp
);
1210 hammer_init_cursor(&trans
, &cursor
, &ip
->cache
[0], ip
);
1213 * Key range (begin and end inclusive) to scan. Directory keys
1214 * directly translate to a 64 bit 'seek' position.
1216 cursor
.key_beg
.localization
= HAMMER_LOCALIZE_MISC
; /* XXX */
1217 cursor
.key_beg
.obj_id
= ip
->obj_id
;
1218 cursor
.key_beg
.create_tid
= 0;
1219 cursor
.key_beg
.delete_tid
= 0;
1220 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_FIX
;
1221 cursor
.key_beg
.obj_type
= 0;
1222 cursor
.key_beg
.key
= HAMMER_FIXKEY_SYMLINK
;
1223 cursor
.asof
= ip
->obj_asof
;
1224 cursor
.flags
|= HAMMER_CURSOR_ASOF
;
1226 error
= hammer_ip_lookup(&cursor
);
1228 error
= hammer_ip_resolve_data(&cursor
);
1230 KKASSERT(cursor
.leaf
->data_len
>=
1231 HAMMER_SYMLINK_NAME_OFF
);
1232 error
= uiomove(cursor
.data
->symlink
.name
,
1233 cursor
.leaf
->data_len
-
1234 HAMMER_SYMLINK_NAME_OFF
,
1238 hammer_done_cursor(&cursor
);
1239 hammer_done_transaction(&trans
);
1244 * hammer_vop_nremove { nch, dvp, cred }
1248 hammer_vop_nremove(struct vop_nremove_args
*ap
)
1250 struct hammer_transaction trans
;
1251 struct hammer_inode
*dip
;
1254 dip
= VTOI(ap
->a_dvp
);
1256 if (hammer_nohistory(dip
) == 0 &&
1257 (error
= hammer_checkspace(dip
->hmp
)) != 0) {
1261 hammer_start_transaction(&trans
, dip
->hmp
);
1262 error
= hammer_dounlink(&trans
, ap
->a_nch
, ap
->a_dvp
, ap
->a_cred
, 0);
1263 hammer_done_transaction(&trans
);
1269 * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1273 hammer_vop_nrename(struct vop_nrename_args
*ap
)
1275 struct hammer_transaction trans
;
1276 struct namecache
*fncp
;
1277 struct namecache
*tncp
;
1278 struct hammer_inode
*fdip
;
1279 struct hammer_inode
*tdip
;
1280 struct hammer_inode
*ip
;
1281 struct hammer_cursor cursor
;
1285 fdip
= VTOI(ap
->a_fdvp
);
1286 tdip
= VTOI(ap
->a_tdvp
);
1287 fncp
= ap
->a_fnch
->ncp
;
1288 tncp
= ap
->a_tnch
->ncp
;
1289 ip
= VTOI(fncp
->nc_vp
);
1290 KKASSERT(ip
!= NULL
);
1292 if (fdip
->flags
& HAMMER_INODE_RO
)
1294 if (tdip
->flags
& HAMMER_INODE_RO
)
1296 if (ip
->flags
& HAMMER_INODE_RO
)
1298 if ((error
= hammer_checkspace(fdip
->hmp
)) != 0)
1301 hammer_start_transaction(&trans
, fdip
->hmp
);
1304 * Remove tncp from the target directory and then link ip as
1305 * tncp. XXX pass trans to dounlink
1307 * Force the inode sync-time to match the transaction so it is
1308 * in-sync with the creation of the target directory entry.
1310 error
= hammer_dounlink(&trans
, ap
->a_tnch
, ap
->a_tdvp
, ap
->a_cred
, 0);
1311 if (error
== 0 || error
== ENOENT
) {
1312 error
= hammer_ip_add_directory(&trans
, tdip
, tncp
, ip
);
1314 ip
->ino_data
.parent_obj_id
= tdip
->obj_id
;
1315 hammer_modify_inode(ip
, HAMMER_INODE_DDIRTY
);
1319 goto failed
; /* XXX */
1322 * Locate the record in the originating directory and remove it.
1324 * Calculate the namekey and setup the key range for the scan. This
1325 * works kinda like a chained hash table where the lower 32 bits
1326 * of the namekey synthesize the chain.
1328 * The key range is inclusive of both key_beg and key_end.
1330 namekey
= hammer_directory_namekey(fncp
->nc_name
, fncp
->nc_nlen
);
1332 hammer_init_cursor(&trans
, &cursor
, &fdip
->cache
[0], fdip
);
1333 cursor
.key_beg
.localization
= HAMMER_LOCALIZE_MISC
;
1334 cursor
.key_beg
.obj_id
= fdip
->obj_id
;
1335 cursor
.key_beg
.key
= namekey
;
1336 cursor
.key_beg
.create_tid
= 0;
1337 cursor
.key_beg
.delete_tid
= 0;
1338 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DIRENTRY
;
1339 cursor
.key_beg
.obj_type
= 0;
1341 cursor
.key_end
= cursor
.key_beg
;
1342 cursor
.key_end
.key
|= 0xFFFFFFFFULL
;
1343 cursor
.asof
= fdip
->obj_asof
;
1344 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
| HAMMER_CURSOR_ASOF
;
1347 * Scan all matching records (the chain), locate the one matching
1348 * the requested path component.
1350 * The hammer_ip_*() functions merge in-memory records with on-disk
1351 * records for the purposes of the search.
1353 error
= hammer_ip_first(&cursor
);
1354 while (error
== 0) {
1355 if (hammer_ip_resolve_data(&cursor
) != 0)
1357 nlen
= cursor
.leaf
->data_len
- HAMMER_ENTRY_NAME_OFF
;
1359 if (fncp
->nc_nlen
== nlen
&&
1360 bcmp(fncp
->nc_name
, cursor
.data
->entry
.name
, nlen
) == 0) {
1363 error
= hammer_ip_next(&cursor
);
1367 * If all is ok we have to get the inode so we can adjust nlinks.
1369 * WARNING: hammer_ip_del_directory() may have to terminate the
1370 * cursor to avoid a recursion. It's ok to call hammer_done_cursor()
1374 error
= hammer_ip_del_directory(&trans
, &cursor
, fdip
, ip
);
1377 * XXX A deadlock here will break rename's atomicy for the purposes
1378 * of crash recovery.
1380 if (error
== EDEADLK
) {
1381 hammer_done_cursor(&cursor
);
1386 * Cleanup and tell the kernel that the rename succeeded.
1388 hammer_done_cursor(&cursor
);
1390 cache_rename(ap
->a_fnch
, ap
->a_tnch
);
1393 hammer_done_transaction(&trans
);
1398 * hammer_vop_nrmdir { nch, dvp, cred }
1402 hammer_vop_nrmdir(struct vop_nrmdir_args
*ap
)
1404 struct hammer_transaction trans
;
1405 struct hammer_inode
*dip
;
1408 dip
= VTOI(ap
->a_dvp
);
1410 if (hammer_nohistory(dip
) == 0 &&
1411 (error
= hammer_checkspace(dip
->hmp
)) != 0) {
1415 hammer_start_transaction(&trans
, dip
->hmp
);
1416 error
= hammer_dounlink(&trans
, ap
->a_nch
, ap
->a_dvp
, ap
->a_cred
, 0);
1417 hammer_done_transaction(&trans
);
1423 * hammer_vop_setattr { vp, vap, cred }
1427 hammer_vop_setattr(struct vop_setattr_args
*ap
)
1429 struct hammer_transaction trans
;
1431 struct hammer_inode
*ip
;
1439 ip
= ap
->a_vp
->v_data
;
1442 if (ap
->a_vp
->v_mount
->mnt_flag
& MNT_RDONLY
)
1444 if (ip
->flags
& HAMMER_INODE_RO
)
1446 if (hammer_nohistory(ip
) == 0 &&
1447 (error
= hammer_checkspace(ip
->hmp
)) != 0) {
1451 hammer_start_transaction(&trans
, ip
->hmp
);
1454 if (vap
->va_flags
!= VNOVAL
) {
1455 flags
= ip
->ino_data
.uflags
;
1456 error
= vop_helper_setattr_flags(&flags
, vap
->va_flags
,
1457 hammer_to_unix_xid(&ip
->ino_data
.uid
),
1460 if (ip
->ino_data
.uflags
!= flags
) {
1461 ip
->ino_data
.uflags
= flags
;
1462 modflags
|= HAMMER_INODE_DDIRTY
;
1464 if (ip
->ino_data
.uflags
& (IMMUTABLE
| APPEND
)) {
1471 if (ip
->ino_data
.uflags
& (IMMUTABLE
| APPEND
)) {
1475 if (vap
->va_uid
!= (uid_t
)VNOVAL
|| vap
->va_gid
!= (gid_t
)VNOVAL
) {
1476 mode_t cur_mode
= ip
->ino_data
.mode
;
1477 uid_t cur_uid
= hammer_to_unix_xid(&ip
->ino_data
.uid
);
1478 gid_t cur_gid
= hammer_to_unix_xid(&ip
->ino_data
.gid
);
1482 error
= vop_helper_chown(ap
->a_vp
, vap
->va_uid
, vap
->va_gid
,
1484 &cur_uid
, &cur_gid
, &cur_mode
);
1486 hammer_guid_to_uuid(&uuid_uid
, cur_uid
);
1487 hammer_guid_to_uuid(&uuid_gid
, cur_gid
);
1488 if (bcmp(&uuid_uid
, &ip
->ino_data
.uid
,
1489 sizeof(uuid_uid
)) ||
1490 bcmp(&uuid_gid
, &ip
->ino_data
.gid
,
1491 sizeof(uuid_gid
)) ||
1492 ip
->ino_data
.mode
!= cur_mode
1494 ip
->ino_data
.uid
= uuid_uid
;
1495 ip
->ino_data
.gid
= uuid_gid
;
1496 ip
->ino_data
.mode
= cur_mode
;
1498 modflags
|= HAMMER_INODE_DDIRTY
;
1501 while (vap
->va_size
!= VNOVAL
&& ip
->ino_data
.size
!= vap
->va_size
) {
1502 switch(ap
->a_vp
->v_type
) {
1504 if (vap
->va_size
== ip
->ino_data
.size
)
1507 * XXX break atomicy, we can deadlock the backend
1508 * if we do not release the lock. Probably not a
1511 if (vap
->va_size
< ip
->ino_data
.size
) {
1512 vtruncbuf(ap
->a_vp
, vap
->va_size
,
1516 vnode_pager_setsize(ap
->a_vp
, vap
->va_size
);
1519 ip
->ino_data
.size
= vap
->va_size
;
1520 modflags
|= HAMMER_INODE_DDIRTY
;
1521 aligned_size
= (vap
->va_size
+ HAMMER_BUFMASK
) &
1525 * on-media truncation is cached in the inode until
1526 * the inode is synchronized.
1529 hammer_ip_frontend_trunc(ip
, vap
->va_size
);
1530 hammer_update_rsv_databufs(ip
);
1531 #ifdef DEBUG_TRUNCATE
1532 if (HammerTruncIp
== NULL
)
1535 if ((ip
->flags
& HAMMER_INODE_TRUNCATED
) == 0) {
1536 ip
->flags
|= HAMMER_INODE_TRUNCATED
;
1537 ip
->trunc_off
= vap
->va_size
;
1538 #ifdef DEBUG_TRUNCATE
1539 if (ip
== HammerTruncIp
)
1540 kprintf("truncate1 %016llx\n", ip
->trunc_off
);
1542 } else if (ip
->trunc_off
> vap
->va_size
) {
1543 ip
->trunc_off
= vap
->va_size
;
1544 #ifdef DEBUG_TRUNCATE
1545 if (ip
== HammerTruncIp
)
1546 kprintf("truncate2 %016llx\n", ip
->trunc_off
);
1549 #ifdef DEBUG_TRUNCATE
1550 if (ip
== HammerTruncIp
)
1551 kprintf("truncate3 %016llx (ignored)\n", vap
->va_size
);
1557 * If truncating we have to clean out a portion of
1558 * the last block on-disk. We do this in the
1559 * front-end buffer cache.
1561 if (truncating
&& vap
->va_size
< aligned_size
) {
1565 aligned_size
-= HAMMER_BUFSIZE
;
1567 offset
= vap
->va_size
& HAMMER_BUFMASK
;
1568 error
= bread(ap
->a_vp
, aligned_size
,
1569 HAMMER_BUFSIZE
, &bp
);
1570 hammer_ip_frontend_trunc(ip
, aligned_size
);
1572 bzero(bp
->b_data
+ offset
,
1573 HAMMER_BUFSIZE
- offset
);
1576 kprintf("ERROR %d\n", error
);
1582 if ((ip
->flags
& HAMMER_INODE_TRUNCATED
) == 0) {
1583 ip
->flags
|= HAMMER_INODE_TRUNCATED
;
1584 ip
->trunc_off
= vap
->va_size
;
1585 } else if (ip
->trunc_off
> vap
->va_size
) {
1586 ip
->trunc_off
= vap
->va_size
;
1588 hammer_ip_frontend_trunc(ip
, vap
->va_size
);
1589 ip
->ino_data
.size
= vap
->va_size
;
1590 modflags
|= HAMMER_INODE_DDIRTY
;
1598 if (vap
->va_atime
.tv_sec
!= VNOVAL
) {
1599 ip
->ino_leaf
.atime
=
1600 hammer_timespec_to_transid(&vap
->va_atime
);
1601 modflags
|= HAMMER_INODE_ITIMES
;
1603 if (vap
->va_mtime
.tv_sec
!= VNOVAL
) {
1604 ip
->ino_data
.mtime
=
1605 hammer_timespec_to_transid(&vap
->va_mtime
);
1606 modflags
|= HAMMER_INODE_ITIMES
;
1607 modflags
|= HAMMER_INODE_DDIRTY
; /* XXX mtime */
1609 if (vap
->va_mode
!= (mode_t
)VNOVAL
) {
1610 mode_t cur_mode
= ip
->ino_data
.mode
;
1611 uid_t cur_uid
= hammer_to_unix_xid(&ip
->ino_data
.uid
);
1612 gid_t cur_gid
= hammer_to_unix_xid(&ip
->ino_data
.gid
);
1614 error
= vop_helper_chmod(ap
->a_vp
, vap
->va_mode
, ap
->a_cred
,
1615 cur_uid
, cur_gid
, &cur_mode
);
1616 if (error
== 0 && ip
->ino_data
.mode
!= cur_mode
) {
1617 ip
->ino_data
.mode
= cur_mode
;
1618 modflags
|= HAMMER_INODE_DDIRTY
;
1623 hammer_modify_inode(ip
, modflags
);
1624 hammer_done_transaction(&trans
);
1629 * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1633 hammer_vop_nsymlink(struct vop_nsymlink_args
*ap
)
1635 struct hammer_transaction trans
;
1636 struct hammer_inode
*dip
;
1637 struct hammer_inode
*nip
;
1638 struct nchandle
*nch
;
1639 hammer_record_t record
;
1643 ap
->a_vap
->va_type
= VLNK
;
1646 dip
= VTOI(ap
->a_dvp
);
1648 if (dip
->flags
& HAMMER_INODE_RO
)
1650 if ((error
= hammer_checkspace(dip
->hmp
)) != 0)
1654 * Create a transaction to cover the operations we perform.
1656 hammer_start_transaction(&trans
, dip
->hmp
);
1659 * Create a new filesystem object of the requested type. The
1660 * returned inode will be referenced but not locked.
1663 error
= hammer_create_inode(&trans
, ap
->a_vap
, ap
->a_cred
, dip
, &nip
);
1665 hammer_done_transaction(&trans
);
1671 * Add a record representing the symlink. symlink stores the link
1672 * as pure data, not a string, and is no \0 terminated.
1675 bytes
= strlen(ap
->a_target
);
1677 if (bytes
<= HAMMER_INODE_BASESYMLEN
) {
1678 bcopy(ap
->a_target
, nip
->ino_data
.ext
.symlink
, bytes
);
1680 record
= hammer_alloc_mem_record(nip
, bytes
);
1681 record
->type
= HAMMER_MEM_RECORD_GENERAL
;
1683 record
->leaf
.base
.localization
= HAMMER_LOCALIZE_MISC
;
1684 record
->leaf
.base
.key
= HAMMER_FIXKEY_SYMLINK
;
1685 record
->leaf
.base
.rec_type
= HAMMER_RECTYPE_FIX
;
1686 record
->leaf
.data_len
= bytes
;
1687 KKASSERT(HAMMER_SYMLINK_NAME_OFF
== 0);
1688 bcopy(ap
->a_target
, record
->data
->symlink
.name
, bytes
);
1689 error
= hammer_ip_add_record(&trans
, record
);
1693 * Set the file size to the length of the link.
1696 nip
->ino_data
.size
= bytes
;
1697 hammer_modify_inode(nip
, HAMMER_INODE_DDIRTY
);
1701 error
= hammer_ip_add_directory(&trans
, dip
, nch
->ncp
, nip
);
1707 hammer_rel_inode(nip
, 0);
1710 error
= hammer_get_vnode(nip
, ap
->a_vpp
);
1711 hammer_rel_inode(nip
, 0);
1713 cache_setunresolved(ap
->a_nch
);
1714 cache_setvp(ap
->a_nch
, *ap
->a_vpp
);
1717 hammer_done_transaction(&trans
);
1722 * hammer_vop_nwhiteout { nch, dvp, cred, flags }
1726 hammer_vop_nwhiteout(struct vop_nwhiteout_args
*ap
)
1728 struct hammer_transaction trans
;
1729 struct hammer_inode
*dip
;
1732 dip
= VTOI(ap
->a_dvp
);
1734 if (hammer_nohistory(dip
) == 0 &&
1735 (error
= hammer_checkspace(dip
->hmp
)) != 0) {
1739 hammer_start_transaction(&trans
, dip
->hmp
);
1740 error
= hammer_dounlink(&trans
, ap
->a_nch
, ap
->a_dvp
,
1741 ap
->a_cred
, ap
->a_flags
);
1742 hammer_done_transaction(&trans
);
1748 * hammer_vop_ioctl { vp, command, data, fflag, cred }
1752 hammer_vop_ioctl(struct vop_ioctl_args
*ap
)
1754 struct hammer_inode
*ip
= ap
->a_vp
->v_data
;
1756 return(hammer_ioctl(ip
, ap
->a_command
, ap
->a_data
,
1757 ap
->a_fflag
, ap
->a_cred
));
1762 hammer_vop_mountctl(struct vop_mountctl_args
*ap
)
1767 mp
= ap
->a_head
.a_ops
->head
.vv_mount
;
1770 case MOUNTCTL_SET_EXPORT
:
1771 if (ap
->a_ctllen
!= sizeof(struct export_args
))
1773 error
= hammer_vfs_export(mp
, ap
->a_op
,
1774 (const struct export_args
*)ap
->a_ctl
);
1777 error
= journal_mountctl(ap
);
1784 * hammer_vop_strategy { vp, bio }
1786 * Strategy call, used for regular file read & write only. Note that the
1787 * bp may represent a cluster.
1789 * To simplify operation and allow better optimizations in the future,
1790 * this code does not make any assumptions with regards to buffer alignment
1795 hammer_vop_strategy(struct vop_strategy_args
*ap
)
1800 bp
= ap
->a_bio
->bio_buf
;
1804 error
= hammer_vop_strategy_read(ap
);
1807 error
= hammer_vop_strategy_write(ap
);
1810 bp
->b_error
= error
= EINVAL
;
1811 bp
->b_flags
|= B_ERROR
;
1819 * Read from a regular file. Iterate the related records and fill in the
1820 * BIO/BUF. Gaps are zero-filled.
1822 * The support code in hammer_object.c should be used to deal with mixed
1823 * in-memory and on-disk records.
1829 hammer_vop_strategy_read(struct vop_strategy_args
*ap
)
1831 struct hammer_transaction trans
;
1832 struct hammer_inode
*ip
;
1833 struct hammer_cursor cursor
;
1834 hammer_base_elm_t base
;
1847 ip
= ap
->a_vp
->v_data
;
1849 hammer_simple_transaction(&trans
, ip
->hmp
);
1850 hammer_init_cursor(&trans
, &cursor
, &ip
->cache
[1], ip
);
1853 * Key range (begin and end inclusive) to scan. Note that the key's
1854 * stored in the actual records represent BASE+LEN, not BASE. The
1855 * first record containing bio_offset will have a key > bio_offset.
1857 cursor
.key_beg
.localization
= HAMMER_LOCALIZE_MISC
;
1858 cursor
.key_beg
.obj_id
= ip
->obj_id
;
1859 cursor
.key_beg
.create_tid
= 0;
1860 cursor
.key_beg
.delete_tid
= 0;
1861 cursor
.key_beg
.obj_type
= 0;
1862 cursor
.key_beg
.key
= bio
->bio_offset
+ 1;
1863 cursor
.asof
= ip
->obj_asof
;
1864 cursor
.flags
|= HAMMER_CURSOR_ASOF
| HAMMER_CURSOR_DATAEXTOK
;
1866 cursor
.key_end
= cursor
.key_beg
;
1867 KKASSERT(ip
->ino_data
.obj_type
== HAMMER_OBJTYPE_REGFILE
);
1869 if (ip
->ino_data
.obj_type
== HAMMER_OBJTYPE_DBFILE
) {
1870 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DB
;
1871 cursor
.key_end
.rec_type
= HAMMER_RECTYPE_DB
;
1872 cursor
.key_end
.key
= 0x7FFFFFFFFFFFFFFFLL
;
1876 ran_end
= bio
->bio_offset
+ bp
->b_bufsize
;
1877 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DATA
;
1878 cursor
.key_end
.rec_type
= HAMMER_RECTYPE_DATA
;
1879 tmp64
= ran_end
+ MAXPHYS
+ 1; /* work-around GCC-4 bug */
1880 if (tmp64
< ran_end
)
1881 cursor
.key_end
.key
= 0x7FFFFFFFFFFFFFFFLL
;
1883 cursor
.key_end
.key
= ran_end
+ MAXPHYS
+ 1;
1885 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
;
1887 error
= hammer_ip_first(&cursor
);
1890 while (error
== 0) {
1892 * Get the base file offset of the record. The key for
1893 * data records is (base + bytes) rather then (base).
1895 base
= &cursor
.leaf
->base
;
1896 rec_offset
= base
->key
- cursor
.leaf
->data_len
;
1899 * Calculate the gap, if any, and zero-fill it.
1901 * n is the offset of the start of the record verses our
1902 * current seek offset in the bio.
1904 n
= (int)(rec_offset
- (bio
->bio_offset
+ boff
));
1906 if (n
> bp
->b_bufsize
- boff
)
1907 n
= bp
->b_bufsize
- boff
;
1908 bzero((char *)bp
->b_data
+ boff
, n
);
1914 * Calculate the data offset in the record and the number
1915 * of bytes we can copy.
1917 * There are two degenerate cases. First, boff may already
1918 * be at bp->b_bufsize. Secondly, the data offset within
1919 * the record may exceed the record's size.
1923 n
= cursor
.leaf
->data_len
- roff
;
1925 kprintf("strategy_read: bad n=%d roff=%d\n", n
, roff
);
1927 } else if (n
> bp
->b_bufsize
- boff
) {
1928 n
= bp
->b_bufsize
- boff
;
1932 * Deal with cached truncations. This cool bit of code
1933 * allows truncate()/ftruncate() to avoid having to sync
1936 * If the frontend is truncated then all backend records are
1937 * subject to the frontend's truncation.
1939 * If the backend is truncated then backend records on-disk
1940 * (but not in-memory) are subject to the backend's
1941 * truncation. In-memory records owned by the backend
1942 * represent data written after the truncation point on the
1943 * backend and must not be truncated.
1945 * Truncate operations deal with frontend buffer cache
1946 * buffers and frontend-owned in-memory records synchronously.
1948 if (ip
->flags
& HAMMER_INODE_TRUNCATED
) {
1949 if (hammer_cursor_ondisk(&cursor
) ||
1950 cursor
.iprec
->flush_state
== HAMMER_FST_FLUSH
) {
1951 if (ip
->trunc_off
<= rec_offset
)
1953 else if (ip
->trunc_off
< rec_offset
+ n
)
1954 n
= (int)(ip
->trunc_off
- rec_offset
);
1957 if (ip
->sync_flags
& HAMMER_INODE_TRUNCATED
) {
1958 if (hammer_cursor_ondisk(&cursor
)) {
1959 if (ip
->sync_trunc_off
<= rec_offset
)
1961 else if (ip
->sync_trunc_off
< rec_offset
+ n
)
1962 n
= (int)(ip
->sync_trunc_off
- rec_offset
);
1967 * Try to issue a direct read into our bio if possible,
1968 * otherwise resolve the element data into a hammer_buffer
1971 * WARNING: If we hit the else clause.
1973 if (roff
== 0 && boff
== 0 && n
== bp
->b_bufsize
&&
1974 (rec_offset
& HAMMER_BUFMASK
) == 0) {
1975 error
= hammer_io_direct_read(trans
.hmp
, cursor
.leaf
,
1979 error
= hammer_ip_resolve_data(&cursor
);
1981 bcopy((char *)cursor
.data
+ roff
,
1982 (char *)bp
->b_data
+ boff
, n
);
1989 * Iterate until we have filled the request.
1992 if (boff
== bp
->b_bufsize
)
1994 error
= hammer_ip_next(&cursor
);
1998 * There may have been a gap after the last record
2000 if (error
== ENOENT
)
2002 if (error
== 0 && boff
!= bp
->b_bufsize
) {
2003 KKASSERT(boff
< bp
->b_bufsize
);
2004 bzero((char *)bp
->b_data
+ boff
, bp
->b_bufsize
- boff
);
2005 /* boff = bp->b_bufsize; */
2008 bp
->b_error
= error
;
2010 bp
->b_flags
|= B_ERROR
;
2015 hammer_cache_node(cursor
.node
, &ip
->cache
[1]);
2016 hammer_done_cursor(&cursor
);
2017 hammer_done_transaction(&trans
);
2022 * Write to a regular file. Because this is a strategy call the OS is
2023 * trying to actually sync data to the media. HAMMER can only flush
2024 * the entire inode (so the TID remains properly synchronized).
2026 * Basically all we do here is place the bio on the inode's flush queue
2027 * and activate the flusher.
2031 hammer_vop_strategy_write(struct vop_strategy_args
*ap
)
2033 hammer_record_t record
;
2042 ip
= ap
->a_vp
->v_data
;
2044 if (ip
->flags
& HAMMER_INODE_RO
) {
2045 bp
->b_error
= EROFS
;
2046 bp
->b_flags
|= B_ERROR
;
2048 hammer_cleanup_write_io(ip
);
2053 * Interlock with inode destruction (no in-kernel or directory
2054 * topology visibility). If we queue new IO while trying to
2055 * destroy the inode we can deadlock the vtrunc call in
2056 * hammer_inode_unloadable_check().
2058 if (ip
->flags
& (HAMMER_INODE_DELETING
|HAMMER_INODE_DELETED
)) {
2061 hammer_cleanup_write_io(ip
);
2066 * Attempt to reserve space and issue a direct-write from the
2067 * front-end. If we can't we will queue the BIO to the flusher.
2068 * The bulk/direct-write code will still bcopy if writing less
2069 * then full-sized blocks (at the end of a file).
2071 * If we can the I/O can be issued and an in-memory record will
2072 * be installed to reference the storage until the flusher can get to
2075 * Since we own the high level bio the front-end will not try to
2076 * do a direct-read until the write completes.
2078 KKASSERT((bio
->bio_offset
& HAMMER_BUFMASK
) == 0);
2079 KKASSERT(bio
->bio_offset
< ip
->ino_data
.size
);
2080 if (bio
->bio_offset
+ bp
->b_bufsize
<= ip
->ino_data
.size
)
2081 bytes
= bp
->b_bufsize
;
2083 bytes
= (int)(ip
->ino_data
.size
- bio
->bio_offset
);
2085 record
= hammer_ip_add_bulk(ip
, bio
->bio_offset
, bp
->b_data
,
2088 hammer_io_direct_write(ip
->hmp
, &record
->leaf
, bio
);
2089 hammer_rel_mem_record(record
);
2090 if (ip
->rsv_recs
> hammer_limit_irecs
/ 2)
2091 hammer_flush_inode(ip
, HAMMER_FLUSH_SIGNAL
);
2093 hammer_flush_inode(ip
, 0);
2095 bp
->b_error
= error
;
2096 bp
->b_flags
|= B_ERROR
;
2099 hammer_cleanup_write_io(ip
);
2104 * Clean-up after disposing of a dirty frontend buffer's data.
2105 * This is somewhat heuristical so try to be robust.
2108 hammer_cleanup_write_io(hammer_inode_t ip
)
2110 if (ip
->rsv_databufs
) {
2112 --ip
->hmp
->rsv_databufs
;
2117 * We can lose track of dirty buffer cache buffers if we truncate, this
2118 * routine will resynchronize the count.
2122 hammer_update_rsv_databufs(hammer_inode_t ip
)
2130 RB_FOREACH(bp
, buf_rb_tree
, &ip
->vp
->v_rbdirty_tree
) {
2136 delta
= n
- ip
->rsv_databufs
;
2137 ip
->rsv_databufs
+= delta
;
2138 ip
->hmp
->rsv_databufs
+= delta
;
2142 * dounlink - disconnect a directory entry
2144 * XXX whiteout support not really in yet
2147 hammer_dounlink(hammer_transaction_t trans
, struct nchandle
*nch
,
2148 struct vnode
*dvp
, struct ucred
*cred
, int flags
)
2150 struct namecache
*ncp
;
2153 struct hammer_cursor cursor
;
2158 * Calculate the namekey and setup the key range for the scan. This
2159 * works kinda like a chained hash table where the lower 32 bits
2160 * of the namekey synthesize the chain.
2162 * The key range is inclusive of both key_beg and key_end.
2167 if (dip
->flags
& HAMMER_INODE_RO
)
2170 namekey
= hammer_directory_namekey(ncp
->nc_name
, ncp
->nc_nlen
);
2172 hammer_init_cursor(trans
, &cursor
, &dip
->cache
[0], dip
);
2173 cursor
.key_beg
.localization
= HAMMER_LOCALIZE_MISC
;
2174 cursor
.key_beg
.obj_id
= dip
->obj_id
;
2175 cursor
.key_beg
.key
= namekey
;
2176 cursor
.key_beg
.create_tid
= 0;
2177 cursor
.key_beg
.delete_tid
= 0;
2178 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DIRENTRY
;
2179 cursor
.key_beg
.obj_type
= 0;
2181 cursor
.key_end
= cursor
.key_beg
;
2182 cursor
.key_end
.key
|= 0xFFFFFFFFULL
;
2183 cursor
.asof
= dip
->obj_asof
;
2184 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
| HAMMER_CURSOR_ASOF
;
2187 * Scan all matching records (the chain), locate the one matching
2188 * the requested path component. info->last_error contains the
2189 * error code on search termination and could be 0, ENOENT, or
2192 * The hammer_ip_*() functions merge in-memory records with on-disk
2193 * records for the purposes of the search.
2195 error
= hammer_ip_first(&cursor
);
2197 while (error
== 0) {
2198 error
= hammer_ip_resolve_data(&cursor
);
2201 nlen
= cursor
.leaf
->data_len
- HAMMER_ENTRY_NAME_OFF
;
2203 if (ncp
->nc_nlen
== nlen
&&
2204 bcmp(ncp
->nc_name
, cursor
.data
->entry
.name
, nlen
) == 0) {
2207 error
= hammer_ip_next(&cursor
);
2211 * If all is ok we have to get the inode so we can adjust nlinks.
2213 * If the target is a directory, it must be empty.
2216 ip
= hammer_get_inode(trans
, &dip
->cache
[1],
2217 cursor
.data
->entry
.obj_id
,
2218 dip
->hmp
->asof
, 0, &error
);
2219 if (error
== ENOENT
) {
2220 kprintf("obj_id %016llx\n", cursor
.data
->entry
.obj_id
);
2221 Debugger("ENOENT unlinking object that should exist");
2225 * If we are trying to remove a directory the directory must
2228 * WARNING: hammer_ip_check_directory_empty() may have to
2229 * terminate the cursor to avoid a deadlock. It is ok to
2230 * call hammer_done_cursor() twice.
2232 if (error
== 0 && ip
->ino_data
.obj_type
==
2233 HAMMER_OBJTYPE_DIRECTORY
) {
2234 error
= hammer_ip_check_directory_empty(trans
, ip
);
2238 * Delete the directory entry.
2240 * WARNING: hammer_ip_del_directory() may have to terminate
2241 * the cursor to avoid a deadlock. It is ok to call
2242 * hammer_done_cursor() twice.
2245 error
= hammer_ip_del_directory(trans
, &cursor
,
2249 cache_setunresolved(nch
);
2250 cache_setvp(nch
, NULL
);
2253 cache_inval_vp(ip
->vp
, CINV_DESTROY
);
2255 hammer_rel_inode(ip
, 0);
2257 hammer_done_cursor(&cursor
);
2258 if (error
== EDEADLK
)
2264 /************************************************************************
2265 * FIFO AND SPECFS OPS *
2266 ************************************************************************
2271 hammer_vop_fifoclose (struct vop_close_args
*ap
)
2273 /* XXX update itimes */
2274 return (VOCALL(&fifo_vnode_vops
, &ap
->a_head
));
2278 hammer_vop_fiforead (struct vop_read_args
*ap
)
2282 error
= VOCALL(&fifo_vnode_vops
, &ap
->a_head
);
2283 /* XXX update access time */
2288 hammer_vop_fifowrite (struct vop_write_args
*ap
)
2292 error
= VOCALL(&fifo_vnode_vops
, &ap
->a_head
);
2293 /* XXX update access time */
2298 hammer_vop_specclose (struct vop_close_args
*ap
)
2300 /* XXX update itimes */
2301 return (VOCALL(&spec_vnode_vops
, &ap
->a_head
));
2305 hammer_vop_specread (struct vop_read_args
*ap
)
2307 /* XXX update access time */
2308 return (VOCALL(&spec_vnode_vops
, &ap
->a_head
));
2312 hammer_vop_specwrite (struct vop_write_args
*ap
)
2314 /* XXX update last change time */
2315 return (VOCALL(&spec_vnode_vops
, &ap
->a_head
));