2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.102 2008/10/16 17:24:16 dillon Exp $
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/namecache.h>
42 #include <sys/vnode.h>
43 #include <sys/lockf.h>
44 #include <sys/event.h>
46 #include <sys/dirent.h>
48 #include <vm/vm_extern.h>
49 #include <vfs/fifofs/fifo.h>
55 /*static int hammer_vop_vnoperate(struct vop_generic_args *);*/
56 static int hammer_vop_fsync(struct vop_fsync_args
*);
57 static int hammer_vop_read(struct vop_read_args
*);
58 static int hammer_vop_write(struct vop_write_args
*);
59 static int hammer_vop_access(struct vop_access_args
*);
60 static int hammer_vop_advlock(struct vop_advlock_args
*);
61 static int hammer_vop_close(struct vop_close_args
*);
62 static int hammer_vop_ncreate(struct vop_ncreate_args
*);
63 static int hammer_vop_getattr(struct vop_getattr_args
*);
64 static int hammer_vop_nresolve(struct vop_nresolve_args
*);
65 static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args
*);
66 static int hammer_vop_nlink(struct vop_nlink_args
*);
67 static int hammer_vop_nmkdir(struct vop_nmkdir_args
*);
68 static int hammer_vop_nmknod(struct vop_nmknod_args
*);
69 static int hammer_vop_open(struct vop_open_args
*);
70 static int hammer_vop_print(struct vop_print_args
*);
71 static int hammer_vop_readdir(struct vop_readdir_args
*);
72 static int hammer_vop_readlink(struct vop_readlink_args
*);
73 static int hammer_vop_nremove(struct vop_nremove_args
*);
74 static int hammer_vop_nrename(struct vop_nrename_args
*);
75 static int hammer_vop_nrmdir(struct vop_nrmdir_args
*);
76 static int hammer_vop_markatime(struct vop_markatime_args
*);
77 static int hammer_vop_setattr(struct vop_setattr_args
*);
78 static int hammer_vop_strategy(struct vop_strategy_args
*);
79 static int hammer_vop_bmap(struct vop_bmap_args
*ap
);
80 static int hammer_vop_nsymlink(struct vop_nsymlink_args
*);
81 static int hammer_vop_nwhiteout(struct vop_nwhiteout_args
*);
82 static int hammer_vop_ioctl(struct vop_ioctl_args
*);
83 static int hammer_vop_mountctl(struct vop_mountctl_args
*);
84 static int hammer_vop_kqfilter (struct vop_kqfilter_args
*);
86 static int hammer_vop_fifoclose (struct vop_close_args
*);
87 static int hammer_vop_fiforead (struct vop_read_args
*);
88 static int hammer_vop_fifowrite (struct vop_write_args
*);
89 static int hammer_vop_fifokqfilter (struct vop_kqfilter_args
*);
91 static int hammer_vop_specclose (struct vop_close_args
*);
92 static int hammer_vop_specread (struct vop_read_args
*);
93 static int hammer_vop_specwrite (struct vop_write_args
*);
95 struct vop_ops hammer_vnode_vops
= {
96 .vop_default
= vop_defaultop
,
97 .vop_fsync
= hammer_vop_fsync
,
98 .vop_getpages
= vop_stdgetpages
,
99 .vop_putpages
= vop_stdputpages
,
100 .vop_read
= hammer_vop_read
,
101 .vop_write
= hammer_vop_write
,
102 .vop_access
= hammer_vop_access
,
103 .vop_advlock
= hammer_vop_advlock
,
104 .vop_close
= hammer_vop_close
,
105 .vop_ncreate
= hammer_vop_ncreate
,
106 .vop_getattr
= hammer_vop_getattr
,
107 .vop_inactive
= hammer_vop_inactive
,
108 .vop_reclaim
= hammer_vop_reclaim
,
109 .vop_nresolve
= hammer_vop_nresolve
,
110 .vop_nlookupdotdot
= hammer_vop_nlookupdotdot
,
111 .vop_nlink
= hammer_vop_nlink
,
112 .vop_nmkdir
= hammer_vop_nmkdir
,
113 .vop_nmknod
= hammer_vop_nmknod
,
114 .vop_open
= hammer_vop_open
,
115 .vop_pathconf
= vop_stdpathconf
,
116 .vop_print
= hammer_vop_print
,
117 .vop_readdir
= hammer_vop_readdir
,
118 .vop_readlink
= hammer_vop_readlink
,
119 .vop_nremove
= hammer_vop_nremove
,
120 .vop_nrename
= hammer_vop_nrename
,
121 .vop_nrmdir
= hammer_vop_nrmdir
,
122 .vop_markatime
= hammer_vop_markatime
,
123 .vop_setattr
= hammer_vop_setattr
,
124 .vop_bmap
= hammer_vop_bmap
,
125 .vop_strategy
= hammer_vop_strategy
,
126 .vop_nsymlink
= hammer_vop_nsymlink
,
127 .vop_nwhiteout
= hammer_vop_nwhiteout
,
128 .vop_ioctl
= hammer_vop_ioctl
,
129 .vop_mountctl
= hammer_vop_mountctl
,
130 .vop_kqfilter
= hammer_vop_kqfilter
133 struct vop_ops hammer_spec_vops
= {
134 .vop_default
= spec_vnoperate
,
135 .vop_fsync
= hammer_vop_fsync
,
136 .vop_read
= hammer_vop_specread
,
137 .vop_write
= hammer_vop_specwrite
,
138 .vop_access
= hammer_vop_access
,
139 .vop_close
= hammer_vop_specclose
,
140 .vop_markatime
= hammer_vop_markatime
,
141 .vop_getattr
= hammer_vop_getattr
,
142 .vop_inactive
= hammer_vop_inactive
,
143 .vop_reclaim
= hammer_vop_reclaim
,
144 .vop_setattr
= hammer_vop_setattr
147 struct vop_ops hammer_fifo_vops
= {
148 .vop_default
= fifo_vnoperate
,
149 .vop_fsync
= hammer_vop_fsync
,
150 .vop_read
= hammer_vop_fiforead
,
151 .vop_write
= hammer_vop_fifowrite
,
152 .vop_access
= hammer_vop_access
,
153 .vop_close
= hammer_vop_fifoclose
,
154 .vop_markatime
= hammer_vop_markatime
,
155 .vop_getattr
= hammer_vop_getattr
,
156 .vop_inactive
= hammer_vop_inactive
,
157 .vop_reclaim
= hammer_vop_reclaim
,
158 .vop_setattr
= hammer_vop_setattr
,
159 .vop_kqfilter
= hammer_vop_fifokqfilter
164 hammer_knote(struct vnode
*vp
, int flags
)
167 KNOTE(&vp
->v_pollinfo
.vpi_selinfo
.si_note
, flags
);
170 #ifdef DEBUG_TRUNCATE
171 struct hammer_inode
*HammerTruncIp
;
174 static int hammer_dounlink(hammer_transaction_t trans
, struct nchandle
*nch
,
175 struct vnode
*dvp
, struct ucred
*cred
,
176 int flags
, int isdir
);
177 static int hammer_vop_strategy_read(struct vop_strategy_args
*ap
);
178 static int hammer_vop_strategy_write(struct vop_strategy_args
*ap
);
183 hammer_vop_vnoperate(struct vop_generic_args
*)
185 return (VOCALL(&hammer_vnode_vops
, ap
));
190 * hammer_vop_fsync { vp, waitfor }
192 * fsync() an inode to disk and wait for it to be completely committed
193 * such that the information would not be undone if a crash occured after
198 hammer_vop_fsync(struct vop_fsync_args
*ap
)
200 hammer_inode_t ip
= VTOI(ap
->a_vp
);
202 ++hammer_count_fsyncs
;
203 vfsync(ap
->a_vp
, ap
->a_waitfor
, 1, NULL
, NULL
);
204 hammer_flush_inode(ip
, HAMMER_FLUSH_SIGNAL
);
205 if (ap
->a_waitfor
== MNT_WAIT
) {
207 hammer_wait_inode(ip
);
208 vn_lock(ap
->a_vp
, LK_EXCLUSIVE
| LK_RETRY
);
214 * hammer_vop_read { vp, uio, ioflag, cred }
218 hammer_vop_read(struct vop_read_args
*ap
)
220 struct hammer_transaction trans
;
231 if (ap
->a_vp
->v_type
!= VREG
)
238 * Allow the UIO's size to override the sequential heuristic.
240 blksize
= hammer_blocksize(uio
->uio_offset
);
241 seqcount
= (uio
->uio_resid
+ (blksize
- 1)) / blksize
;
242 ioseqcount
= ap
->a_ioflag
>> 16;
243 if (seqcount
< ioseqcount
)
244 seqcount
= ioseqcount
;
246 hammer_start_transaction(&trans
, ip
->hmp
);
249 * Access the data typically in HAMMER_BUFSIZE blocks via the
250 * buffer cache, but HAMMER may use a variable block size based
253 while (uio
->uio_resid
> 0 && uio
->uio_offset
< ip
->ino_data
.size
) {
257 blksize
= hammer_blocksize(uio
->uio_offset
);
258 offset
= (int)uio
->uio_offset
& (blksize
- 1);
259 base_offset
= uio
->uio_offset
- offset
;
261 if (hammer_cluster_enable
) {
263 * Use file_limit to prevent cluster_read() from
264 * creating buffers of the wrong block size past
267 file_limit
= ip
->ino_data
.size
;
268 if (base_offset
< HAMMER_XDEMARC
&&
269 file_limit
> HAMMER_XDEMARC
) {
270 file_limit
= HAMMER_XDEMARC
;
272 error
= cluster_read(ap
->a_vp
,
273 file_limit
, base_offset
,
277 error
= bread(ap
->a_vp
, base_offset
, blksize
, &bp
);
280 kprintf("error %d\n", error
);
285 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
286 n
= blksize
- offset
;
287 if (n
> uio
->uio_resid
)
289 if (n
> ip
->ino_data
.size
- uio
->uio_offset
)
290 n
= (int)(ip
->ino_data
.size
- uio
->uio_offset
);
291 error
= uiomove((char *)bp
->b_data
+ offset
, n
, uio
);
293 /* data has a lower priority then meta-data */
294 bp
->b_flags
|= B_AGE
;
298 hammer_stats_file_read
+= n
;
300 if ((ip
->flags
& HAMMER_INODE_RO
) == 0 &&
301 (ip
->hmp
->mp
->mnt_flag
& MNT_NOATIME
) == 0) {
302 ip
->ino_data
.atime
= trans
.time
;
303 hammer_modify_inode(ip
, HAMMER_INODE_ATIME
);
305 hammer_done_transaction(&trans
);
310 * hammer_vop_write { vp, uio, ioflag, cred }
314 hammer_vop_write(struct vop_write_args
*ap
)
316 struct hammer_transaction trans
;
317 struct hammer_inode
*ip
;
330 if (ap
->a_vp
->v_type
!= VREG
)
336 seqcount
= ap
->a_ioflag
>> 16;
338 if (ip
->flags
& HAMMER_INODE_RO
)
342 * Create a transaction to cover the operations we perform.
344 hammer_start_transaction(&trans
, hmp
);
350 if (ap
->a_ioflag
& IO_APPEND
)
351 uio
->uio_offset
= ip
->ino_data
.size
;
354 * Check for illegal write offsets. Valid range is 0...2^63-1.
356 * NOTE: the base_off assignment is required to work around what
357 * I consider to be a GCC-4 optimization bug.
359 if (uio
->uio_offset
< 0) {
360 hammer_done_transaction(&trans
);
363 base_offset
= uio
->uio_offset
+ uio
->uio_resid
; /* work around gcc-4 */
364 if (uio
->uio_resid
> 0 && base_offset
<= 0) {
365 hammer_done_transaction(&trans
);
370 * Access the data typically in HAMMER_BUFSIZE blocks via the
371 * buffer cache, but HAMMER may use a variable block size based
374 while (uio
->uio_resid
> 0) {
379 if ((error
= hammer_checkspace(hmp
, HAMMER_CHKSPC_WRITE
)) != 0)
382 blksize
= hammer_blocksize(uio
->uio_offset
);
385 * Do not allow HAMMER to blow out the buffer cache. Very
386 * large UIOs can lockout other processes due to bwillwrite()
389 * The hammer inode is not locked during these operations.
390 * The vnode is locked which can interfere with the pageout
391 * daemon for non-UIO_NOCOPY writes but should not interfere
392 * with the buffer cache. Even so, we cannot afford to
393 * allow the pageout daemon to build up too many dirty buffer
396 * Only call this if we aren't being recursively called from
397 * a virtual disk device (vn), else we may deadlock.
399 if ((ap
->a_ioflag
& IO_RECURSE
) == 0)
403 * Do not allow HAMMER to blow out system memory by
404 * accumulating too many records. Records are so well
405 * decoupled from the buffer cache that it is possible
406 * for userland to push data out to the media via
407 * direct-write, but build up the records queued to the
408 * backend faster then the backend can flush them out.
409 * HAMMER has hit its write limit but the frontend has
410 * no pushback to slow it down.
412 if (hmp
->rsv_recs
> hammer_limit_recs
/ 2) {
414 * Get the inode on the flush list
416 if (ip
->rsv_recs
>= 64)
417 hammer_flush_inode(ip
, HAMMER_FLUSH_SIGNAL
);
418 else if (ip
->rsv_recs
>= 16)
419 hammer_flush_inode(ip
, 0);
422 * Keep the flusher going if the system keeps
425 delta
= hmp
->count_newrecords
-
426 hmp
->last_newrecords
;
427 if (delta
< 0 || delta
> hammer_limit_recs
/ 2) {
428 hmp
->last_newrecords
= hmp
->count_newrecords
;
429 hammer_sync_hmp(hmp
, MNT_NOWAIT
);
433 * If we have gotten behind start slowing
436 delta
= (hmp
->rsv_recs
- hammer_limit_recs
) *
437 hz
/ hammer_limit_recs
;
439 tsleep(&trans
, 0, "hmrslo", delta
);
443 * Calculate the blocksize at the current offset and figure
444 * out how much we can actually write.
446 blkmask
= blksize
- 1;
447 offset
= (int)uio
->uio_offset
& blkmask
;
448 base_offset
= uio
->uio_offset
& ~(int64_t)blkmask
;
449 n
= blksize
- offset
;
450 if (n
> uio
->uio_resid
)
452 if (uio
->uio_offset
+ n
> ip
->ino_data
.size
) {
453 vnode_pager_setsize(ap
->a_vp
, uio
->uio_offset
+ n
);
455 kflags
|= NOTE_EXTEND
;
458 if (uio
->uio_segflg
== UIO_NOCOPY
) {
460 * Issuing a write with the same data backing the
461 * buffer. Instantiate the buffer to collect the
462 * backing vm pages, then read-in any missing bits.
464 * This case is used by vop_stdputpages().
466 bp
= getblk(ap
->a_vp
, base_offset
,
467 blksize
, GETBLK_BHEAVY
, 0);
468 if ((bp
->b_flags
& B_CACHE
) == 0) {
470 error
= bread(ap
->a_vp
, base_offset
,
473 } else if (offset
== 0 && uio
->uio_resid
>= blksize
) {
475 * Even though we are entirely overwriting the buffer
476 * we may still have to zero it out to avoid a
477 * mmap/write visibility issue.
479 bp
= getblk(ap
->a_vp
, base_offset
, blksize
, GETBLK_BHEAVY
, 0);
480 if ((bp
->b_flags
& B_CACHE
) == 0)
482 } else if (base_offset
>= ip
->ino_data
.size
) {
484 * If the base offset of the buffer is beyond the
485 * file EOF, we don't have to issue a read.
487 bp
= getblk(ap
->a_vp
, base_offset
,
488 blksize
, GETBLK_BHEAVY
, 0);
492 * Partial overwrite, read in any missing bits then
493 * replace the portion being written.
495 error
= bread(ap
->a_vp
, base_offset
, blksize
, &bp
);
500 error
= uiomove((char *)bp
->b_data
+ offset
,
505 * If we screwed up we have to undo any VM size changes we
511 vtruncbuf(ap
->a_vp
, ip
->ino_data
.size
,
512 hammer_blocksize(ip
->ino_data
.size
));
516 kflags
|= NOTE_WRITE
;
517 hammer_stats_file_write
+= n
;
518 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
519 if (ip
->ino_data
.size
< uio
->uio_offset
) {
520 ip
->ino_data
.size
= uio
->uio_offset
;
521 flags
= HAMMER_INODE_DDIRTY
;
522 vnode_pager_setsize(ap
->a_vp
, ip
->ino_data
.size
);
526 ip
->ino_data
.mtime
= trans
.time
;
527 flags
|= HAMMER_INODE_MTIME
| HAMMER_INODE_BUFS
;
528 hammer_modify_inode(ip
, flags
);
531 * Once we dirty the buffer any cached zone-X offset
532 * becomes invalid. HAMMER NOTE: no-history mode cannot
533 * allow overwriting over the same data sector unless
534 * we provide UNDOs for the old data, which we don't.
536 bp
->b_bio2
.bio_offset
= NOOFFSET
;
539 * Final buffer disposition.
541 bp
->b_flags
|= B_AGE
;
542 if (ap
->a_ioflag
& IO_SYNC
) {
544 } else if (ap
->a_ioflag
& IO_DIRECT
) {
550 hammer_done_transaction(&trans
);
551 hammer_knote(ap
->a_vp
, kflags
);
556 * hammer_vop_access { vp, mode, cred }
560 hammer_vop_access(struct vop_access_args
*ap
)
562 struct hammer_inode
*ip
= VTOI(ap
->a_vp
);
567 ++hammer_stats_file_iopsr
;
568 uid
= hammer_to_unix_xid(&ip
->ino_data
.uid
);
569 gid
= hammer_to_unix_xid(&ip
->ino_data
.gid
);
571 error
= vop_helper_access(ap
, uid
, gid
, ip
->ino_data
.mode
,
572 ip
->ino_data
.uflags
);
577 * hammer_vop_advlock { vp, id, op, fl, flags }
581 hammer_vop_advlock(struct vop_advlock_args
*ap
)
583 hammer_inode_t ip
= VTOI(ap
->a_vp
);
585 return (lf_advlock(ap
, &ip
->advlock
, ip
->ino_data
.size
));
589 * hammer_vop_close { vp, fflag }
593 hammer_vop_close(struct vop_close_args
*ap
)
595 /*hammer_inode_t ip = VTOI(ap->a_vp);*/
596 return (vop_stdclose(ap
));
600 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
602 * The operating system has already ensured that the directory entry
603 * does not exist and done all appropriate namespace locking.
607 hammer_vop_ncreate(struct vop_ncreate_args
*ap
)
609 struct hammer_transaction trans
;
610 struct hammer_inode
*dip
;
611 struct hammer_inode
*nip
;
612 struct nchandle
*nch
;
616 dip
= VTOI(ap
->a_dvp
);
618 if (dip
->flags
& HAMMER_INODE_RO
)
620 if ((error
= hammer_checkspace(dip
->hmp
, HAMMER_CHKSPC_CREATE
)) != 0)
624 * Create a transaction to cover the operations we perform.
626 hammer_start_transaction(&trans
, dip
->hmp
);
627 ++hammer_stats_file_iopsw
;
630 * Create a new filesystem object of the requested type. The
631 * returned inode will be referenced and shared-locked to prevent
632 * it from being moved to the flusher.
635 error
= hammer_create_inode(&trans
, ap
->a_vap
, ap
->a_cred
,
638 hkprintf("hammer_create_inode error %d\n", error
);
639 hammer_done_transaction(&trans
);
645 * Add the new filesystem object to the directory. This will also
646 * bump the inode's link count.
648 error
= hammer_ip_add_directory(&trans
, dip
,
649 nch
->ncp
->nc_name
, nch
->ncp
->nc_nlen
,
652 hkprintf("hammer_ip_add_directory error %d\n", error
);
658 hammer_rel_inode(nip
, 0);
659 hammer_done_transaction(&trans
);
662 error
= hammer_get_vnode(nip
, ap
->a_vpp
);
663 hammer_done_transaction(&trans
);
664 hammer_rel_inode(nip
, 0);
666 cache_setunresolved(ap
->a_nch
);
667 cache_setvp(ap
->a_nch
, *ap
->a_vpp
);
669 hammer_knote(ap
->a_dvp
, NOTE_WRITE
);
675 * hammer_vop_getattr { vp, vap }
677 * Retrieve an inode's attribute information. When accessing inodes
678 * historically we fake the atime field to ensure consistent results.
679 * The atime field is stored in the B-Tree element and allowed to be
680 * updated without cycling the element.
684 hammer_vop_getattr(struct vop_getattr_args
*ap
)
686 struct hammer_inode
*ip
= VTOI(ap
->a_vp
);
687 struct vattr
*vap
= ap
->a_vap
;
690 * We want the fsid to be different when accessing a filesystem
691 * with different as-of's so programs like diff don't think
692 * the files are the same.
694 * We also want the fsid to be the same when comparing snapshots,
695 * or when comparing mirrors (which might be backed by different
696 * physical devices). HAMMER fsids are based on the PFS's
699 * XXX there is a chance of collision here. The va_fsid reported
700 * by stat is different from the more involved fsid used in the
703 ++hammer_stats_file_iopsr
;
704 vap
->va_fsid
= ip
->pfsm
->fsid_udev
^ (u_int32_t
)ip
->obj_asof
^
705 (u_int32_t
)(ip
->obj_asof
>> 32);
707 vap
->va_fileid
= ip
->ino_leaf
.base
.obj_id
;
708 vap
->va_mode
= ip
->ino_data
.mode
;
709 vap
->va_nlink
= ip
->ino_data
.nlinks
;
710 vap
->va_uid
= hammer_to_unix_xid(&ip
->ino_data
.uid
);
711 vap
->va_gid
= hammer_to_unix_xid(&ip
->ino_data
.gid
);
714 vap
->va_size
= ip
->ino_data
.size
;
717 * Special case for @@PFS softlinks. The actual size of the
718 * expanded softlink is "@@0x%016llx:%05d" == 26 bytes.
719 * or for MAX_TID is "@@-1:%05d" == 10 bytes.
721 if (ip
->ino_data
.obj_type
== HAMMER_OBJTYPE_SOFTLINK
&&
722 ip
->ino_data
.size
== 10 &&
723 ip
->obj_asof
== HAMMER_MAX_TID
&&
724 ip
->obj_localization
== 0 &&
725 strncmp(ip
->ino_data
.ext
.symlink
, "@@PFS", 5) == 0) {
726 if (ip
->pfsm
->pfsd
.mirror_flags
& HAMMER_PFSD_SLAVE
)
733 * We must provide a consistent atime and mtime for snapshots
734 * so people can do a 'tar cf - ... | md5' on them and get
735 * consistent results.
737 if (ip
->flags
& HAMMER_INODE_RO
) {
738 hammer_time_to_timespec(ip
->ino_data
.ctime
, &vap
->va_atime
);
739 hammer_time_to_timespec(ip
->ino_data
.ctime
, &vap
->va_mtime
);
741 hammer_time_to_timespec(ip
->ino_data
.atime
, &vap
->va_atime
);
742 hammer_time_to_timespec(ip
->ino_data
.mtime
, &vap
->va_mtime
);
744 hammer_time_to_timespec(ip
->ino_data
.ctime
, &vap
->va_ctime
);
745 vap
->va_flags
= ip
->ino_data
.uflags
;
746 vap
->va_gen
= 1; /* hammer inums are unique for all time */
747 vap
->va_blocksize
= HAMMER_BUFSIZE
;
748 if (ip
->ino_data
.size
>= HAMMER_XDEMARC
) {
749 vap
->va_bytes
= (ip
->ino_data
.size
+ HAMMER_XBUFMASK64
) &
751 } else if (ip
->ino_data
.size
> HAMMER_BUFSIZE
/ 2) {
752 vap
->va_bytes
= (ip
->ino_data
.size
+ HAMMER_BUFMASK64
) &
755 vap
->va_bytes
= (ip
->ino_data
.size
+ 15) & ~15;
758 vap
->va_type
= hammer_get_vnode_type(ip
->ino_data
.obj_type
);
759 vap
->va_filerev
= 0; /* XXX */
760 /* mtime uniquely identifies any adjustments made to the file XXX */
761 vap
->va_fsmid
= ip
->ino_data
.mtime
;
762 vap
->va_uid_uuid
= ip
->ino_data
.uid
;
763 vap
->va_gid_uuid
= ip
->ino_data
.gid
;
764 vap
->va_fsid_uuid
= ip
->hmp
->fsid
;
765 vap
->va_vaflags
= VA_UID_UUID_VALID
| VA_GID_UUID_VALID
|
768 switch (ip
->ino_data
.obj_type
) {
769 case HAMMER_OBJTYPE_CDEV
:
770 case HAMMER_OBJTYPE_BDEV
:
771 vap
->va_rmajor
= ip
->ino_data
.rmajor
;
772 vap
->va_rminor
= ip
->ino_data
.rminor
;
781 * hammer_vop_nresolve { nch, dvp, cred }
783 * Locate the requested directory entry.
787 hammer_vop_nresolve(struct vop_nresolve_args
*ap
)
789 struct hammer_transaction trans
;
790 struct namecache
*ncp
;
794 struct hammer_cursor cursor
;
803 u_int32_t localization
;
804 u_int32_t max_iterations
;
807 * Misc initialization, plus handle as-of name extensions. Look for
808 * the '@@' extension. Note that as-of files and directories cannot
811 dip
= VTOI(ap
->a_dvp
);
812 ncp
= ap
->a_nch
->ncp
;
813 asof
= dip
->obj_asof
;
814 localization
= dip
->obj_localization
; /* for code consistency */
816 flags
= dip
->flags
& HAMMER_INODE_RO
;
819 hammer_simple_transaction(&trans
, dip
->hmp
);
820 ++hammer_stats_file_iopsr
;
822 for (i
= 0; i
< nlen
; ++i
) {
823 if (ncp
->nc_name
[i
] == '@' && ncp
->nc_name
[i
+1] == '@') {
824 error
= hammer_str_to_tid(ncp
->nc_name
+ i
+ 2,
825 &ispfs
, &asof
, &localization
);
830 if (asof
!= HAMMER_MAX_TID
)
831 flags
|= HAMMER_INODE_RO
;
838 * If this is a PFS softlink we dive into the PFS
840 if (ispfs
&& nlen
== 0) {
841 ip
= hammer_get_inode(&trans
, dip
, HAMMER_OBJID_ROOT
,
845 error
= hammer_get_vnode(ip
, &vp
);
846 hammer_rel_inode(ip
, 0);
852 cache_setvp(ap
->a_nch
, vp
);
859 * If there is no path component the time extension is relative to dip.
860 * e.g. "fubar/@@<snapshot>"
862 * "." is handled by the kernel, but ".@@<snapshot>" is not.
863 * e.g. "fubar/.@@<snapshot>"
865 * ".." is handled by the kernel. We do not currently handle
868 if (nlen
== 0 || (nlen
== 1 && ncp
->nc_name
[0] == '.')) {
869 ip
= hammer_get_inode(&trans
, dip
, dip
->obj_id
,
870 asof
, dip
->obj_localization
,
873 error
= hammer_get_vnode(ip
, &vp
);
874 hammer_rel_inode(ip
, 0);
880 cache_setvp(ap
->a_nch
, vp
);
887 * Calculate the namekey and setup the key range for the scan. This
888 * works kinda like a chained hash table where the lower 32 bits
889 * of the namekey synthesize the chain.
891 * The key range is inclusive of both key_beg and key_end.
893 namekey
= hammer_directory_namekey(dip
, ncp
->nc_name
, nlen
,
896 error
= hammer_init_cursor(&trans
, &cursor
, &dip
->cache
[1], dip
);
897 cursor
.key_beg
.localization
= dip
->obj_localization
+
898 HAMMER_LOCALIZE_MISC
;
899 cursor
.key_beg
.obj_id
= dip
->obj_id
;
900 cursor
.key_beg
.key
= namekey
;
901 cursor
.key_beg
.create_tid
= 0;
902 cursor
.key_beg
.delete_tid
= 0;
903 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DIRENTRY
;
904 cursor
.key_beg
.obj_type
= 0;
906 cursor
.key_end
= cursor
.key_beg
;
907 cursor
.key_end
.key
+= max_iterations
;
909 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
| HAMMER_CURSOR_ASOF
;
912 * Scan all matching records (the chain), locate the one matching
913 * the requested path component.
915 * The hammer_ip_*() functions merge in-memory records with on-disk
916 * records for the purposes of the search.
919 localization
= HAMMER_DEF_LOCALIZATION
;
922 error
= hammer_ip_first(&cursor
);
924 error
= hammer_ip_resolve_data(&cursor
);
927 if (nlen
== cursor
.leaf
->data_len
- HAMMER_ENTRY_NAME_OFF
&&
928 bcmp(ncp
->nc_name
, cursor
.data
->entry
.name
, nlen
) == 0) {
929 obj_id
= cursor
.data
->entry
.obj_id
;
930 localization
= cursor
.data
->entry
.localization
;
933 error
= hammer_ip_next(&cursor
);
936 hammer_done_cursor(&cursor
);
939 * Lookup the obj_id. This should always succeed. If it does not
940 * the filesystem may be damaged and we return a dummy inode.
943 ip
= hammer_get_inode(&trans
, dip
, obj_id
,
946 if (error
== ENOENT
) {
947 kprintf("HAMMER: WARNING: Missing "
948 "inode for dirent \"%s\"\n"
949 "\tobj_id = %016llx\n",
950 ncp
->nc_name
, (long long)obj_id
);
952 ip
= hammer_get_dummy_inode(&trans
, dip
, obj_id
,
957 error
= hammer_get_vnode(ip
, &vp
);
958 hammer_rel_inode(ip
, 0);
964 cache_setvp(ap
->a_nch
, vp
);
967 } else if (error
== ENOENT
) {
968 cache_setvp(ap
->a_nch
, NULL
);
971 hammer_done_transaction(&trans
);
976 * hammer_vop_nlookupdotdot { dvp, vpp, cred }
978 * Locate the parent directory of a directory vnode.
980 * dvp is referenced but not locked. *vpp must be returned referenced and
981 * locked. A parent_obj_id of 0 does not necessarily indicate that we are
982 * at the root, instead it could indicate that the directory we were in was
985 * NOTE: as-of sequences are not linked into the directory structure. If
986 * we are at the root with a different asof then the mount point, reload
987 * the same directory with the mount point's asof. I'm not sure what this
988 * will do to NFS. We encode ASOF stamps in NFS file handles so it might not
989 * get confused, but it hasn't been tested.
993 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args
*ap
)
995 struct hammer_transaction trans
;
996 struct hammer_inode
*dip
;
997 struct hammer_inode
*ip
;
998 int64_t parent_obj_id
;
999 u_int32_t parent_obj_localization
;
1003 dip
= VTOI(ap
->a_dvp
);
1004 asof
= dip
->obj_asof
;
1007 * Whos are parent? This could be the root of a pseudo-filesystem
1008 * whos parent is in another localization domain.
1010 parent_obj_id
= dip
->ino_data
.parent_obj_id
;
1011 if (dip
->obj_id
== HAMMER_OBJID_ROOT
)
1012 parent_obj_localization
= dip
->ino_data
.ext
.obj
.parent_obj_localization
;
1014 parent_obj_localization
= dip
->obj_localization
;
1016 if (parent_obj_id
== 0) {
1017 if (dip
->obj_id
== HAMMER_OBJID_ROOT
&&
1018 asof
!= dip
->hmp
->asof
) {
1019 parent_obj_id
= dip
->obj_id
;
1020 asof
= dip
->hmp
->asof
;
1021 *ap
->a_fakename
= kmalloc(19, M_TEMP
, M_WAITOK
);
1022 ksnprintf(*ap
->a_fakename
, 19, "0x%016llx",
1030 hammer_simple_transaction(&trans
, dip
->hmp
);
1031 ++hammer_stats_file_iopsr
;
1033 ip
= hammer_get_inode(&trans
, dip
, parent_obj_id
,
1034 asof
, parent_obj_localization
,
1035 dip
->flags
, &error
);
1037 error
= hammer_get_vnode(ip
, ap
->a_vpp
);
1038 hammer_rel_inode(ip
, 0);
1042 hammer_done_transaction(&trans
);
1047 * hammer_vop_nlink { nch, dvp, vp, cred }
1051 hammer_vop_nlink(struct vop_nlink_args
*ap
)
1053 struct hammer_transaction trans
;
1054 struct hammer_inode
*dip
;
1055 struct hammer_inode
*ip
;
1056 struct nchandle
*nch
;
1059 if (ap
->a_dvp
->v_mount
!= ap
->a_vp
->v_mount
)
1063 dip
= VTOI(ap
->a_dvp
);
1064 ip
= VTOI(ap
->a_vp
);
1066 if (dip
->obj_localization
!= ip
->obj_localization
)
1069 if (dip
->flags
& HAMMER_INODE_RO
)
1071 if (ip
->flags
& HAMMER_INODE_RO
)
1073 if ((error
= hammer_checkspace(dip
->hmp
, HAMMER_CHKSPC_CREATE
)) != 0)
1077 * Create a transaction to cover the operations we perform.
1079 hammer_start_transaction(&trans
, dip
->hmp
);
1080 ++hammer_stats_file_iopsw
;
1083 * Add the filesystem object to the directory. Note that neither
1084 * dip nor ip are referenced or locked, but their vnodes are
1085 * referenced. This function will bump the inode's link count.
1087 error
= hammer_ip_add_directory(&trans
, dip
,
1088 nch
->ncp
->nc_name
, nch
->ncp
->nc_nlen
,
1095 cache_setunresolved(nch
);
1096 cache_setvp(nch
, ap
->a_vp
);
1098 hammer_done_transaction(&trans
);
1099 hammer_knote(ap
->a_vp
, NOTE_LINK
);
1100 hammer_knote(ap
->a_dvp
, NOTE_WRITE
);
1105 * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
1107 * The operating system has already ensured that the directory entry
1108 * does not exist and done all appropriate namespace locking.
1112 hammer_vop_nmkdir(struct vop_nmkdir_args
*ap
)
1114 struct hammer_transaction trans
;
1115 struct hammer_inode
*dip
;
1116 struct hammer_inode
*nip
;
1117 struct nchandle
*nch
;
1121 dip
= VTOI(ap
->a_dvp
);
1123 if (dip
->flags
& HAMMER_INODE_RO
)
1125 if ((error
= hammer_checkspace(dip
->hmp
, HAMMER_CHKSPC_CREATE
)) != 0)
1129 * Create a transaction to cover the operations we perform.
1131 hammer_start_transaction(&trans
, dip
->hmp
);
1132 ++hammer_stats_file_iopsw
;
1135 * Create a new filesystem object of the requested type. The
1136 * returned inode will be referenced but not locked.
1138 error
= hammer_create_inode(&trans
, ap
->a_vap
, ap
->a_cred
,
1141 hkprintf("hammer_mkdir error %d\n", error
);
1142 hammer_done_transaction(&trans
);
1147 * Add the new filesystem object to the directory. This will also
1148 * bump the inode's link count.
1150 error
= hammer_ip_add_directory(&trans
, dip
,
1151 nch
->ncp
->nc_name
, nch
->ncp
->nc_nlen
,
1154 hkprintf("hammer_mkdir (add) error %d\n", error
);
1160 hammer_rel_inode(nip
, 0);
1163 error
= hammer_get_vnode(nip
, ap
->a_vpp
);
1164 hammer_rel_inode(nip
, 0);
1166 cache_setunresolved(ap
->a_nch
);
1167 cache_setvp(ap
->a_nch
, *ap
->a_vpp
);
1170 hammer_done_transaction(&trans
);
1172 hammer_knote(ap
->a_dvp
, NOTE_WRITE
| NOTE_LINK
);
1177 * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
1179 * The operating system has already ensured that the directory entry
1180 * does not exist and done all appropriate namespace locking.
1184 hammer_vop_nmknod(struct vop_nmknod_args
*ap
)
1186 struct hammer_transaction trans
;
1187 struct hammer_inode
*dip
;
1188 struct hammer_inode
*nip
;
1189 struct nchandle
*nch
;
1193 dip
= VTOI(ap
->a_dvp
);
1195 if (dip
->flags
& HAMMER_INODE_RO
)
1197 if ((error
= hammer_checkspace(dip
->hmp
, HAMMER_CHKSPC_CREATE
)) != 0)
1201 * Create a transaction to cover the operations we perform.
1203 hammer_start_transaction(&trans
, dip
->hmp
);
1204 ++hammer_stats_file_iopsw
;
1207 * Create a new filesystem object of the requested type. The
1208 * returned inode will be referenced but not locked.
1210 * If mknod specifies a directory a pseudo-fs is created.
1212 error
= hammer_create_inode(&trans
, ap
->a_vap
, ap
->a_cred
,
1215 hammer_done_transaction(&trans
);
1221 * Add the new filesystem object to the directory. This will also
1222 * bump the inode's link count.
1224 error
= hammer_ip_add_directory(&trans
, dip
,
1225 nch
->ncp
->nc_name
, nch
->ncp
->nc_nlen
,
1232 hammer_rel_inode(nip
, 0);
1235 error
= hammer_get_vnode(nip
, ap
->a_vpp
);
1236 hammer_rel_inode(nip
, 0);
1238 cache_setunresolved(ap
->a_nch
);
1239 cache_setvp(ap
->a_nch
, *ap
->a_vpp
);
1242 hammer_done_transaction(&trans
);
1244 hammer_knote(ap
->a_dvp
, NOTE_WRITE
);
1249 * hammer_vop_open { vp, mode, cred, fp }
1253 hammer_vop_open(struct vop_open_args
*ap
)
1257 ++hammer_stats_file_iopsr
;
1258 ip
= VTOI(ap
->a_vp
);
1260 if ((ap
->a_mode
& FWRITE
) && (ip
->flags
& HAMMER_INODE_RO
))
1262 return(vop_stdopen(ap
));
1266 * hammer_vop_print { vp }
1270 hammer_vop_print(struct vop_print_args
*ap
)
1276 * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies }
1280 hammer_vop_readdir(struct vop_readdir_args
*ap
)
1282 struct hammer_transaction trans
;
1283 struct hammer_cursor cursor
;
1284 struct hammer_inode
*ip
;
1286 hammer_base_elm_t base
;
1295 ++hammer_stats_file_iopsr
;
1296 ip
= VTOI(ap
->a_vp
);
1298 saveoff
= uio
->uio_offset
;
1300 if (ap
->a_ncookies
) {
1301 ncookies
= uio
->uio_resid
/ 16 + 1;
1302 if (ncookies
> 1024)
1304 cookies
= kmalloc(ncookies
* sizeof(off_t
), M_TEMP
, M_WAITOK
);
1312 hammer_simple_transaction(&trans
, ip
->hmp
);
1315 * Handle artificial entries
1317 * It should be noted that the minimum value for a directory
1318 * hash key on-media is 0x0000000100000000, so we can use anything
1319 * less then that to represent our 'special' key space.
1323 r
= vop_write_dirent(&error
, uio
, ip
->obj_id
, DT_DIR
, 1, ".");
1327 cookies
[cookie_index
] = saveoff
;
1330 if (cookie_index
== ncookies
)
1334 if (ip
->ino_data
.parent_obj_id
) {
1335 r
= vop_write_dirent(&error
, uio
,
1336 ip
->ino_data
.parent_obj_id
,
1339 r
= vop_write_dirent(&error
, uio
,
1340 ip
->obj_id
, DT_DIR
, 2, "..");
1345 cookies
[cookie_index
] = saveoff
;
1348 if (cookie_index
== ncookies
)
1353 * Key range (begin and end inclusive) to scan. Directory keys
1354 * directly translate to a 64 bit 'seek' position.
1356 hammer_init_cursor(&trans
, &cursor
, &ip
->cache
[1], ip
);
1357 cursor
.key_beg
.localization
= ip
->obj_localization
+
1358 HAMMER_LOCALIZE_MISC
;
1359 cursor
.key_beg
.obj_id
= ip
->obj_id
;
1360 cursor
.key_beg
.create_tid
= 0;
1361 cursor
.key_beg
.delete_tid
= 0;
1362 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DIRENTRY
;
1363 cursor
.key_beg
.obj_type
= 0;
1364 cursor
.key_beg
.key
= saveoff
;
1366 cursor
.key_end
= cursor
.key_beg
;
1367 cursor
.key_end
.key
= HAMMER_MAX_KEY
;
1368 cursor
.asof
= ip
->obj_asof
;
1369 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
| HAMMER_CURSOR_ASOF
;
1371 error
= hammer_ip_first(&cursor
);
1373 while (error
== 0) {
1374 error
= hammer_ip_resolve_data(&cursor
);
1377 base
= &cursor
.leaf
->base
;
1378 saveoff
= base
->key
;
1379 KKASSERT(cursor
.leaf
->data_len
> HAMMER_ENTRY_NAME_OFF
);
1381 if (base
->obj_id
!= ip
->obj_id
)
1382 panic("readdir: bad record at %p", cursor
.node
);
1385 * Convert pseudo-filesystems into softlinks
1387 dtype
= hammer_get_dtype(cursor
.leaf
->base
.obj_type
);
1388 r
= vop_write_dirent(
1389 &error
, uio
, cursor
.data
->entry
.obj_id
,
1391 cursor
.leaf
->data_len
- HAMMER_ENTRY_NAME_OFF
,
1392 (void *)cursor
.data
->entry
.name
);
1397 cookies
[cookie_index
] = base
->key
;
1399 if (cookie_index
== ncookies
)
1401 error
= hammer_ip_next(&cursor
);
1403 hammer_done_cursor(&cursor
);
1406 hammer_done_transaction(&trans
);
1409 *ap
->a_eofflag
= (error
== ENOENT
);
1410 uio
->uio_offset
= saveoff
;
1411 if (error
&& cookie_index
== 0) {
1412 if (error
== ENOENT
)
1415 kfree(cookies
, M_TEMP
);
1416 *ap
->a_ncookies
= 0;
1417 *ap
->a_cookies
= NULL
;
1420 if (error
== ENOENT
)
1423 *ap
->a_ncookies
= cookie_index
;
1424 *ap
->a_cookies
= cookies
;
1431 * hammer_vop_readlink { vp, uio, cred }
1435 hammer_vop_readlink(struct vop_readlink_args
*ap
)
1437 struct hammer_transaction trans
;
1438 struct hammer_cursor cursor
;
1439 struct hammer_inode
*ip
;
1441 u_int32_t localization
;
1442 hammer_pseudofs_inmem_t pfsm
;
1445 ip
= VTOI(ap
->a_vp
);
1448 * Shortcut if the symlink data was stuffed into ino_data.
1450 * Also expand special "@@PFS%05d" softlinks (expansion only
1451 * occurs for non-historical (current) accesses made from the
1452 * primary filesystem).
1454 if (ip
->ino_data
.size
<= HAMMER_INODE_BASESYMLEN
) {
1458 ptr
= ip
->ino_data
.ext
.symlink
;
1459 bytes
= (int)ip
->ino_data
.size
;
1461 ip
->obj_asof
== HAMMER_MAX_TID
&&
1462 ip
->obj_localization
== 0 &&
1463 strncmp(ptr
, "@@PFS", 5) == 0) {
1464 hammer_simple_transaction(&trans
, ip
->hmp
);
1465 bcopy(ptr
+ 5, buf
, 5);
1467 localization
= strtoul(buf
, NULL
, 10) << 16;
1468 pfsm
= hammer_load_pseudofs(&trans
, localization
,
1471 if (pfsm
->pfsd
.mirror_flags
&
1472 HAMMER_PFSD_SLAVE
) {
1473 /* vap->va_size == 26 */
1474 ksnprintf(buf
, sizeof(buf
),
1476 pfsm
->pfsd
.sync_end_tid
,
1477 localization
>> 16);
1479 /* vap->va_size == 10 */
1480 ksnprintf(buf
, sizeof(buf
),
1482 localization
>> 16);
1484 ksnprintf(buf
, sizeof(buf
),
1487 localization
>> 16);
1491 bytes
= strlen(buf
);
1494 hammer_rel_pseudofs(trans
.hmp
, pfsm
);
1495 hammer_done_transaction(&trans
);
1497 error
= uiomove(ptr
, bytes
, ap
->a_uio
);
1504 hammer_simple_transaction(&trans
, ip
->hmp
);
1505 ++hammer_stats_file_iopsr
;
1506 hammer_init_cursor(&trans
, &cursor
, &ip
->cache
[1], ip
);
1509 * Key range (begin and end inclusive) to scan. Directory keys
1510 * directly translate to a 64 bit 'seek' position.
1512 cursor
.key_beg
.localization
= ip
->obj_localization
+
1513 HAMMER_LOCALIZE_MISC
;
1514 cursor
.key_beg
.obj_id
= ip
->obj_id
;
1515 cursor
.key_beg
.create_tid
= 0;
1516 cursor
.key_beg
.delete_tid
= 0;
1517 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_FIX
;
1518 cursor
.key_beg
.obj_type
= 0;
1519 cursor
.key_beg
.key
= HAMMER_FIXKEY_SYMLINK
;
1520 cursor
.asof
= ip
->obj_asof
;
1521 cursor
.flags
|= HAMMER_CURSOR_ASOF
;
1523 error
= hammer_ip_lookup(&cursor
);
1525 error
= hammer_ip_resolve_data(&cursor
);
1527 KKASSERT(cursor
.leaf
->data_len
>=
1528 HAMMER_SYMLINK_NAME_OFF
);
1529 error
= uiomove(cursor
.data
->symlink
.name
,
1530 cursor
.leaf
->data_len
-
1531 HAMMER_SYMLINK_NAME_OFF
,
1535 hammer_done_cursor(&cursor
);
1536 hammer_done_transaction(&trans
);
1541 * hammer_vop_nremove { nch, dvp, cred }
1545 hammer_vop_nremove(struct vop_nremove_args
*ap
)
1547 struct hammer_transaction trans
;
1548 struct hammer_inode
*dip
;
1551 dip
= VTOI(ap
->a_dvp
);
1553 if (hammer_nohistory(dip
) == 0 &&
1554 (error
= hammer_checkspace(dip
->hmp
, HAMMER_CHKSPC_REMOVE
)) != 0) {
1558 hammer_start_transaction(&trans
, dip
->hmp
);
1559 ++hammer_stats_file_iopsw
;
1560 error
= hammer_dounlink(&trans
, ap
->a_nch
, ap
->a_dvp
, ap
->a_cred
, 0, 0);
1561 hammer_done_transaction(&trans
);
1563 hammer_knote(ap
->a_dvp
, NOTE_WRITE
);
1568 * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1572 hammer_vop_nrename(struct vop_nrename_args
*ap
)
1574 struct hammer_transaction trans
;
1575 struct namecache
*fncp
;
1576 struct namecache
*tncp
;
1577 struct hammer_inode
*fdip
;
1578 struct hammer_inode
*tdip
;
1579 struct hammer_inode
*ip
;
1580 struct hammer_cursor cursor
;
1582 u_int32_t max_iterations
;
1585 if (ap
->a_fdvp
->v_mount
!= ap
->a_tdvp
->v_mount
)
1587 if (ap
->a_fdvp
->v_mount
!= ap
->a_fnch
->ncp
->nc_vp
->v_mount
)
1590 fdip
= VTOI(ap
->a_fdvp
);
1591 tdip
= VTOI(ap
->a_tdvp
);
1592 fncp
= ap
->a_fnch
->ncp
;
1593 tncp
= ap
->a_tnch
->ncp
;
1594 ip
= VTOI(fncp
->nc_vp
);
1595 KKASSERT(ip
!= NULL
);
1597 if (fdip
->obj_localization
!= tdip
->obj_localization
)
1599 if (fdip
->obj_localization
!= ip
->obj_localization
)
1602 if (fdip
->flags
& HAMMER_INODE_RO
)
1604 if (tdip
->flags
& HAMMER_INODE_RO
)
1606 if (ip
->flags
& HAMMER_INODE_RO
)
1608 if ((error
= hammer_checkspace(fdip
->hmp
, HAMMER_CHKSPC_CREATE
)) != 0)
1611 hammer_start_transaction(&trans
, fdip
->hmp
);
1612 ++hammer_stats_file_iopsw
;
1615 * Remove tncp from the target directory and then link ip as
1616 * tncp. XXX pass trans to dounlink
1618 * Force the inode sync-time to match the transaction so it is
1619 * in-sync with the creation of the target directory entry.
1621 error
= hammer_dounlink(&trans
, ap
->a_tnch
, ap
->a_tdvp
,
1623 if (error
== 0 || error
== ENOENT
) {
1624 error
= hammer_ip_add_directory(&trans
, tdip
,
1625 tncp
->nc_name
, tncp
->nc_nlen
,
1628 ip
->ino_data
.parent_obj_id
= tdip
->obj_id
;
1629 ip
->ino_data
.ctime
= trans
.time
;
1630 hammer_modify_inode(ip
, HAMMER_INODE_DDIRTY
);
1634 goto failed
; /* XXX */
1637 * Locate the record in the originating directory and remove it.
1639 * Calculate the namekey and setup the key range for the scan. This
1640 * works kinda like a chained hash table where the lower 32 bits
1641 * of the namekey synthesize the chain.
1643 * The key range is inclusive of both key_beg and key_end.
1645 namekey
= hammer_directory_namekey(fdip
, fncp
->nc_name
, fncp
->nc_nlen
,
1648 hammer_init_cursor(&trans
, &cursor
, &fdip
->cache
[1], fdip
);
1649 cursor
.key_beg
.localization
= fdip
->obj_localization
+
1650 HAMMER_LOCALIZE_MISC
;
1651 cursor
.key_beg
.obj_id
= fdip
->obj_id
;
1652 cursor
.key_beg
.key
= namekey
;
1653 cursor
.key_beg
.create_tid
= 0;
1654 cursor
.key_beg
.delete_tid
= 0;
1655 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DIRENTRY
;
1656 cursor
.key_beg
.obj_type
= 0;
1658 cursor
.key_end
= cursor
.key_beg
;
1659 cursor
.key_end
.key
+= max_iterations
;
1660 cursor
.asof
= fdip
->obj_asof
;
1661 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
| HAMMER_CURSOR_ASOF
;
1664 * Scan all matching records (the chain), locate the one matching
1665 * the requested path component.
1667 * The hammer_ip_*() functions merge in-memory records with on-disk
1668 * records for the purposes of the search.
1670 error
= hammer_ip_first(&cursor
);
1671 while (error
== 0) {
1672 if (hammer_ip_resolve_data(&cursor
) != 0)
1674 nlen
= cursor
.leaf
->data_len
- HAMMER_ENTRY_NAME_OFF
;
1676 if (fncp
->nc_nlen
== nlen
&&
1677 bcmp(fncp
->nc_name
, cursor
.data
->entry
.name
, nlen
) == 0) {
1680 error
= hammer_ip_next(&cursor
);
1684 * If all is ok we have to get the inode so we can adjust nlinks.
1686 * WARNING: hammer_ip_del_directory() may have to terminate the
1687 * cursor to avoid a recursion. It's ok to call hammer_done_cursor()
1691 error
= hammer_ip_del_directory(&trans
, &cursor
, fdip
, ip
);
1694 * XXX A deadlock here will break rename's atomicy for the purposes
1695 * of crash recovery.
1697 if (error
== EDEADLK
) {
1698 hammer_done_cursor(&cursor
);
1703 * Cleanup and tell the kernel that the rename succeeded.
1705 hammer_done_cursor(&cursor
);
1707 cache_rename(ap
->a_fnch
, ap
->a_tnch
);
1708 hammer_knote(ap
->a_fdvp
, NOTE_WRITE
);
1709 hammer_knote(ap
->a_tdvp
, NOTE_WRITE
);
1711 hammer_knote(ip
->vp
, NOTE_RENAME
);
1715 hammer_done_transaction(&trans
);
1720 * hammer_vop_nrmdir { nch, dvp, cred }
1724 hammer_vop_nrmdir(struct vop_nrmdir_args
*ap
)
1726 struct hammer_transaction trans
;
1727 struct hammer_inode
*dip
;
1730 dip
= VTOI(ap
->a_dvp
);
1732 if (hammer_nohistory(dip
) == 0 &&
1733 (error
= hammer_checkspace(dip
->hmp
, HAMMER_CHKSPC_REMOVE
)) != 0) {
1737 hammer_start_transaction(&trans
, dip
->hmp
);
1738 ++hammer_stats_file_iopsw
;
1739 error
= hammer_dounlink(&trans
, ap
->a_nch
, ap
->a_dvp
, ap
->a_cred
, 0, 1);
1740 hammer_done_transaction(&trans
);
1742 hammer_knote(ap
->a_dvp
, NOTE_WRITE
| NOTE_LINK
);
1747 * hammer_vop_markatime { vp, cred }
1751 hammer_vop_markatime(struct vop_markatime_args
*ap
)
1753 struct hammer_transaction trans
;
1754 struct hammer_inode
*ip
;
1756 ip
= VTOI(ap
->a_vp
);
1757 if (ap
->a_vp
->v_mount
->mnt_flag
& MNT_RDONLY
)
1759 if (ip
->flags
& HAMMER_INODE_RO
)
1761 if (ip
->hmp
->mp
->mnt_flag
& MNT_NOATIME
)
1763 hammer_start_transaction(&trans
, ip
->hmp
);
1764 ++hammer_stats_file_iopsw
;
1766 ip
->ino_data
.atime
= trans
.time
;
1767 hammer_modify_inode(ip
, HAMMER_INODE_ATIME
);
1768 hammer_done_transaction(&trans
);
1769 hammer_knote(ap
->a_vp
, NOTE_ATTRIB
);
1774 * hammer_vop_setattr { vp, vap, cred }
1778 hammer_vop_setattr(struct vop_setattr_args
*ap
)
1780 struct hammer_transaction trans
;
1782 struct hammer_inode
*ip
;
1788 int64_t aligned_size
;
1792 ip
= ap
->a_vp
->v_data
;
1796 if (ap
->a_vp
->v_mount
->mnt_flag
& MNT_RDONLY
)
1798 if (ip
->flags
& HAMMER_INODE_RO
)
1800 if (hammer_nohistory(ip
) == 0 &&
1801 (error
= hammer_checkspace(ip
->hmp
, HAMMER_CHKSPC_REMOVE
)) != 0) {
1805 hammer_start_transaction(&trans
, ip
->hmp
);
1806 ++hammer_stats_file_iopsw
;
1809 if (vap
->va_flags
!= VNOVAL
) {
1810 flags
= ip
->ino_data
.uflags
;
1811 error
= vop_helper_setattr_flags(&flags
, vap
->va_flags
,
1812 hammer_to_unix_xid(&ip
->ino_data
.uid
),
1815 if (ip
->ino_data
.uflags
!= flags
) {
1816 ip
->ino_data
.uflags
= flags
;
1817 ip
->ino_data
.ctime
= trans
.time
;
1818 modflags
|= HAMMER_INODE_DDIRTY
;
1819 kflags
|= NOTE_ATTRIB
;
1821 if (ip
->ino_data
.uflags
& (IMMUTABLE
| APPEND
)) {
1828 if (ip
->ino_data
.uflags
& (IMMUTABLE
| APPEND
)) {
1832 if (vap
->va_uid
!= (uid_t
)VNOVAL
|| vap
->va_gid
!= (gid_t
)VNOVAL
) {
1833 mode_t cur_mode
= ip
->ino_data
.mode
;
1834 uid_t cur_uid
= hammer_to_unix_xid(&ip
->ino_data
.uid
);
1835 gid_t cur_gid
= hammer_to_unix_xid(&ip
->ino_data
.gid
);
1839 error
= vop_helper_chown(ap
->a_vp
, vap
->va_uid
, vap
->va_gid
,
1841 &cur_uid
, &cur_gid
, &cur_mode
);
1843 hammer_guid_to_uuid(&uuid_uid
, cur_uid
);
1844 hammer_guid_to_uuid(&uuid_gid
, cur_gid
);
1845 if (bcmp(&uuid_uid
, &ip
->ino_data
.uid
,
1846 sizeof(uuid_uid
)) ||
1847 bcmp(&uuid_gid
, &ip
->ino_data
.gid
,
1848 sizeof(uuid_gid
)) ||
1849 ip
->ino_data
.mode
!= cur_mode
1851 ip
->ino_data
.uid
= uuid_uid
;
1852 ip
->ino_data
.gid
= uuid_gid
;
1853 ip
->ino_data
.mode
= cur_mode
;
1854 ip
->ino_data
.ctime
= trans
.time
;
1855 modflags
|= HAMMER_INODE_DDIRTY
;
1857 kflags
|= NOTE_ATTRIB
;
1860 while (vap
->va_size
!= VNOVAL
&& ip
->ino_data
.size
!= vap
->va_size
) {
1861 switch(ap
->a_vp
->v_type
) {
1863 if (vap
->va_size
== ip
->ino_data
.size
)
1866 * XXX break atomicy, we can deadlock the backend
1867 * if we do not release the lock. Probably not a
1870 blksize
= hammer_blocksize(vap
->va_size
);
1871 if (vap
->va_size
< ip
->ino_data
.size
) {
1872 vtruncbuf(ap
->a_vp
, vap
->va_size
, blksize
);
1874 kflags
|= NOTE_WRITE
;
1876 vnode_pager_setsize(ap
->a_vp
, vap
->va_size
);
1878 kflags
|= NOTE_WRITE
| NOTE_EXTEND
;
1880 ip
->ino_data
.size
= vap
->va_size
;
1881 ip
->ino_data
.mtime
= trans
.time
;
1882 modflags
|= HAMMER_INODE_MTIME
| HAMMER_INODE_DDIRTY
;
1885 * on-media truncation is cached in the inode until
1886 * the inode is synchronized.
1889 hammer_ip_frontend_trunc(ip
, vap
->va_size
);
1890 #ifdef DEBUG_TRUNCATE
1891 if (HammerTruncIp
== NULL
)
1894 if ((ip
->flags
& HAMMER_INODE_TRUNCATED
) == 0) {
1895 ip
->flags
|= HAMMER_INODE_TRUNCATED
;
1896 ip
->trunc_off
= vap
->va_size
;
1897 #ifdef DEBUG_TRUNCATE
1898 if (ip
== HammerTruncIp
)
1899 kprintf("truncate1 %016llx\n", ip
->trunc_off
);
1901 } else if (ip
->trunc_off
> vap
->va_size
) {
1902 ip
->trunc_off
= vap
->va_size
;
1903 #ifdef DEBUG_TRUNCATE
1904 if (ip
== HammerTruncIp
)
1905 kprintf("truncate2 %016llx\n", ip
->trunc_off
);
1908 #ifdef DEBUG_TRUNCATE
1909 if (ip
== HammerTruncIp
)
1910 kprintf("truncate3 %016llx (ignored)\n", vap
->va_size
);
1916 * If truncating we have to clean out a portion of
1917 * the last block on-disk. We do this in the
1918 * front-end buffer cache.
1920 aligned_size
= (vap
->va_size
+ (blksize
- 1)) &
1921 ~(int64_t)(blksize
- 1);
1922 if (truncating
&& vap
->va_size
< aligned_size
) {
1926 aligned_size
-= blksize
;
1928 offset
= (int)vap
->va_size
& (blksize
- 1);
1929 error
= bread(ap
->a_vp
, aligned_size
,
1931 hammer_ip_frontend_trunc(ip
, aligned_size
);
1933 bzero(bp
->b_data
+ offset
,
1935 /* must de-cache direct-io offset */
1936 bp
->b_bio2
.bio_offset
= NOOFFSET
;
1939 kprintf("ERROR %d\n", error
);
1945 if ((ip
->flags
& HAMMER_INODE_TRUNCATED
) == 0) {
1946 ip
->flags
|= HAMMER_INODE_TRUNCATED
;
1947 ip
->trunc_off
= vap
->va_size
;
1948 } else if (ip
->trunc_off
> vap
->va_size
) {
1949 ip
->trunc_off
= vap
->va_size
;
1951 hammer_ip_frontend_trunc(ip
, vap
->va_size
);
1952 ip
->ino_data
.size
= vap
->va_size
;
1953 ip
->ino_data
.mtime
= trans
.time
;
1954 modflags
|= HAMMER_INODE_MTIME
| HAMMER_INODE_DDIRTY
;
1955 kflags
|= NOTE_ATTRIB
;
1963 if (vap
->va_atime
.tv_sec
!= VNOVAL
) {
1964 ip
->ino_data
.atime
= hammer_timespec_to_time(&vap
->va_atime
);
1965 modflags
|= HAMMER_INODE_ATIME
;
1966 kflags
|= NOTE_ATTRIB
;
1968 if (vap
->va_mtime
.tv_sec
!= VNOVAL
) {
1969 ip
->ino_data
.mtime
= hammer_timespec_to_time(&vap
->va_mtime
);
1970 modflags
|= HAMMER_INODE_MTIME
;
1971 kflags
|= NOTE_ATTRIB
;
1973 if (vap
->va_mode
!= (mode_t
)VNOVAL
) {
1974 mode_t cur_mode
= ip
->ino_data
.mode
;
1975 uid_t cur_uid
= hammer_to_unix_xid(&ip
->ino_data
.uid
);
1976 gid_t cur_gid
= hammer_to_unix_xid(&ip
->ino_data
.gid
);
1978 error
= vop_helper_chmod(ap
->a_vp
, vap
->va_mode
, ap
->a_cred
,
1979 cur_uid
, cur_gid
, &cur_mode
);
1980 if (error
== 0 && ip
->ino_data
.mode
!= cur_mode
) {
1981 ip
->ino_data
.mode
= cur_mode
;
1982 ip
->ino_data
.ctime
= trans
.time
;
1983 modflags
|= HAMMER_INODE_DDIRTY
;
1984 kflags
|= NOTE_ATTRIB
;
1989 hammer_modify_inode(ip
, modflags
);
1990 hammer_done_transaction(&trans
);
1991 hammer_knote(ap
->a_vp
, kflags
);
1996 * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
2000 hammer_vop_nsymlink(struct vop_nsymlink_args
*ap
)
2002 struct hammer_transaction trans
;
2003 struct hammer_inode
*dip
;
2004 struct hammer_inode
*nip
;
2005 struct nchandle
*nch
;
2006 hammer_record_t record
;
2010 ap
->a_vap
->va_type
= VLNK
;
2013 dip
= VTOI(ap
->a_dvp
);
2015 if (dip
->flags
& HAMMER_INODE_RO
)
2017 if ((error
= hammer_checkspace(dip
->hmp
, HAMMER_CHKSPC_CREATE
)) != 0)
2021 * Create a transaction to cover the operations we perform.
2023 hammer_start_transaction(&trans
, dip
->hmp
);
2024 ++hammer_stats_file_iopsw
;
2027 * Create a new filesystem object of the requested type. The
2028 * returned inode will be referenced but not locked.
2031 error
= hammer_create_inode(&trans
, ap
->a_vap
, ap
->a_cred
,
2034 hammer_done_transaction(&trans
);
2040 * Add a record representing the symlink. symlink stores the link
2041 * as pure data, not a string, and is no \0 terminated.
2044 bytes
= strlen(ap
->a_target
);
2046 if (bytes
<= HAMMER_INODE_BASESYMLEN
) {
2047 bcopy(ap
->a_target
, nip
->ino_data
.ext
.symlink
, bytes
);
2049 record
= hammer_alloc_mem_record(nip
, bytes
);
2050 record
->type
= HAMMER_MEM_RECORD_GENERAL
;
2052 record
->leaf
.base
.localization
= nip
->obj_localization
+
2053 HAMMER_LOCALIZE_MISC
;
2054 record
->leaf
.base
.key
= HAMMER_FIXKEY_SYMLINK
;
2055 record
->leaf
.base
.rec_type
= HAMMER_RECTYPE_FIX
;
2056 record
->leaf
.data_len
= bytes
;
2057 KKASSERT(HAMMER_SYMLINK_NAME_OFF
== 0);
2058 bcopy(ap
->a_target
, record
->data
->symlink
.name
, bytes
);
2059 error
= hammer_ip_add_record(&trans
, record
);
2063 * Set the file size to the length of the link.
2066 nip
->ino_data
.size
= bytes
;
2067 hammer_modify_inode(nip
, HAMMER_INODE_DDIRTY
);
2071 error
= hammer_ip_add_directory(&trans
, dip
, nch
->ncp
->nc_name
,
2072 nch
->ncp
->nc_nlen
, nip
);
2078 hammer_rel_inode(nip
, 0);
2081 error
= hammer_get_vnode(nip
, ap
->a_vpp
);
2082 hammer_rel_inode(nip
, 0);
2084 cache_setunresolved(ap
->a_nch
);
2085 cache_setvp(ap
->a_nch
, *ap
->a_vpp
);
2086 hammer_knote(ap
->a_dvp
, NOTE_WRITE
);
2089 hammer_done_transaction(&trans
);
2094 * hammer_vop_nwhiteout { nch, dvp, cred, flags }
2098 hammer_vop_nwhiteout(struct vop_nwhiteout_args
*ap
)
2100 struct hammer_transaction trans
;
2101 struct hammer_inode
*dip
;
2104 dip
= VTOI(ap
->a_dvp
);
2106 if (hammer_nohistory(dip
) == 0 &&
2107 (error
= hammer_checkspace(dip
->hmp
, HAMMER_CHKSPC_CREATE
)) != 0) {
2111 hammer_start_transaction(&trans
, dip
->hmp
);
2112 ++hammer_stats_file_iopsw
;
2113 error
= hammer_dounlink(&trans
, ap
->a_nch
, ap
->a_dvp
,
2114 ap
->a_cred
, ap
->a_flags
, -1);
2115 hammer_done_transaction(&trans
);
2121 * hammer_vop_ioctl { vp, command, data, fflag, cred }
2125 hammer_vop_ioctl(struct vop_ioctl_args
*ap
)
2127 struct hammer_inode
*ip
= ap
->a_vp
->v_data
;
2129 ++hammer_stats_file_iopsr
;
2130 return(hammer_ioctl(ip
, ap
->a_command
, ap
->a_data
,
2131 ap
->a_fflag
, ap
->a_cred
));
2136 hammer_vop_mountctl(struct vop_mountctl_args
*ap
)
2141 mp
= ap
->a_head
.a_ops
->head
.vv_mount
;
2144 case MOUNTCTL_SET_EXPORT
:
2145 if (ap
->a_ctllen
!= sizeof(struct export_args
))
2148 error
= hammer_vfs_export(mp
, ap
->a_op
,
2149 (const struct export_args
*)ap
->a_ctl
);
2152 error
= journal_mountctl(ap
);
2159 * hammer_vop_strategy { vp, bio }
2161 * Strategy call, used for regular file read & write only. Note that the
2162 * bp may represent a cluster.
2164 * To simplify operation and allow better optimizations in the future,
2165 * this code does not make any assumptions with regards to buffer alignment
2170 hammer_vop_strategy(struct vop_strategy_args
*ap
)
2175 bp
= ap
->a_bio
->bio_buf
;
2179 error
= hammer_vop_strategy_read(ap
);
2182 error
= hammer_vop_strategy_write(ap
);
2185 bp
->b_error
= error
= EINVAL
;
2186 bp
->b_flags
|= B_ERROR
;
2194 * Read from a regular file. Iterate the related records and fill in the
2195 * BIO/BUF. Gaps are zero-filled.
2197 * The support code in hammer_object.c should be used to deal with mixed
2198 * in-memory and on-disk records.
2200 * NOTE: Can be called from the cluster code with an oversized buf.
2206 hammer_vop_strategy_read(struct vop_strategy_args
*ap
)
2208 struct hammer_transaction trans
;
2209 struct hammer_inode
*ip
;
2210 struct hammer_cursor cursor
;
2211 hammer_base_elm_t base
;
2212 hammer_off_t disk_offset
;
2226 ip
= ap
->a_vp
->v_data
;
2229 * The zone-2 disk offset may have been set by the cluster code via
2230 * a BMAP operation, or else should be NOOFFSET.
2232 * Checking the high bits for a match against zone-2 should suffice.
2234 nbio
= push_bio(bio
);
2235 if ((nbio
->bio_offset
& HAMMER_OFF_ZONE_MASK
) ==
2236 HAMMER_ZONE_LARGE_DATA
) {
2237 error
= hammer_io_direct_read(ip
->hmp
, nbio
, NULL
);
2242 * Well, that sucked. Do it the hard way. If all the stars are
2243 * aligned we may still be able to issue a direct-read.
2245 hammer_simple_transaction(&trans
, ip
->hmp
);
2246 hammer_init_cursor(&trans
, &cursor
, &ip
->cache
[1], ip
);
2249 * Key range (begin and end inclusive) to scan. Note that the key's
2250 * stored in the actual records represent BASE+LEN, not BASE. The
2251 * first record containing bio_offset will have a key > bio_offset.
2253 cursor
.key_beg
.localization
= ip
->obj_localization
+
2254 HAMMER_LOCALIZE_MISC
;
2255 cursor
.key_beg
.obj_id
= ip
->obj_id
;
2256 cursor
.key_beg
.create_tid
= 0;
2257 cursor
.key_beg
.delete_tid
= 0;
2258 cursor
.key_beg
.obj_type
= 0;
2259 cursor
.key_beg
.key
= bio
->bio_offset
+ 1;
2260 cursor
.asof
= ip
->obj_asof
;
2261 cursor
.flags
|= HAMMER_CURSOR_ASOF
;
2263 cursor
.key_end
= cursor
.key_beg
;
2264 KKASSERT(ip
->ino_data
.obj_type
== HAMMER_OBJTYPE_REGFILE
);
2266 if (ip
->ino_data
.obj_type
== HAMMER_OBJTYPE_DBFILE
) {
2267 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DB
;
2268 cursor
.key_end
.rec_type
= HAMMER_RECTYPE_DB
;
2269 cursor
.key_end
.key
= 0x7FFFFFFFFFFFFFFFLL
;
2273 ran_end
= bio
->bio_offset
+ bp
->b_bufsize
;
2274 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DATA
;
2275 cursor
.key_end
.rec_type
= HAMMER_RECTYPE_DATA
;
2276 tmp64
= ran_end
+ MAXPHYS
+ 1; /* work-around GCC-4 bug */
2277 if (tmp64
< ran_end
)
2278 cursor
.key_end
.key
= 0x7FFFFFFFFFFFFFFFLL
;
2280 cursor
.key_end
.key
= ran_end
+ MAXPHYS
+ 1;
2282 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
;
2284 error
= hammer_ip_first(&cursor
);
2287 while (error
== 0) {
2289 * Get the base file offset of the record. The key for
2290 * data records is (base + bytes) rather then (base).
2292 base
= &cursor
.leaf
->base
;
2293 rec_offset
= base
->key
- cursor
.leaf
->data_len
;
2296 * Calculate the gap, if any, and zero-fill it.
2298 * n is the offset of the start of the record verses our
2299 * current seek offset in the bio.
2301 n
= (int)(rec_offset
- (bio
->bio_offset
+ boff
));
2303 if (n
> bp
->b_bufsize
- boff
)
2304 n
= bp
->b_bufsize
- boff
;
2305 bzero((char *)bp
->b_data
+ boff
, n
);
2311 * Calculate the data offset in the record and the number
2312 * of bytes we can copy.
2314 * There are two degenerate cases. First, boff may already
2315 * be at bp->b_bufsize. Secondly, the data offset within
2316 * the record may exceed the record's size.
2320 n
= cursor
.leaf
->data_len
- roff
;
2322 kprintf("strategy_read: bad n=%d roff=%d\n", n
, roff
);
2324 } else if (n
> bp
->b_bufsize
- boff
) {
2325 n
= bp
->b_bufsize
- boff
;
2329 * Deal with cached truncations. This cool bit of code
2330 * allows truncate()/ftruncate() to avoid having to sync
2333 * If the frontend is truncated then all backend records are
2334 * subject to the frontend's truncation.
2336 * If the backend is truncated then backend records on-disk
2337 * (but not in-memory) are subject to the backend's
2338 * truncation. In-memory records owned by the backend
2339 * represent data written after the truncation point on the
2340 * backend and must not be truncated.
2342 * Truncate operations deal with frontend buffer cache
2343 * buffers and frontend-owned in-memory records synchronously.
2345 if (ip
->flags
& HAMMER_INODE_TRUNCATED
) {
2346 if (hammer_cursor_ondisk(&cursor
) ||
2347 cursor
.iprec
->flush_state
== HAMMER_FST_FLUSH
) {
2348 if (ip
->trunc_off
<= rec_offset
)
2350 else if (ip
->trunc_off
< rec_offset
+ n
)
2351 n
= (int)(ip
->trunc_off
- rec_offset
);
2354 if (ip
->sync_flags
& HAMMER_INODE_TRUNCATED
) {
2355 if (hammer_cursor_ondisk(&cursor
)) {
2356 if (ip
->sync_trunc_off
<= rec_offset
)
2358 else if (ip
->sync_trunc_off
< rec_offset
+ n
)
2359 n
= (int)(ip
->sync_trunc_off
- rec_offset
);
2364 * Try to issue a direct read into our bio if possible,
2365 * otherwise resolve the element data into a hammer_buffer
2368 * The buffer on-disk should be zerod past any real
2369 * truncation point, but may not be for any synthesized
2370 * truncation point from above.
2372 disk_offset
= cursor
.leaf
->data_offset
+ roff
;
2373 if (boff
== 0 && n
== bp
->b_bufsize
&&
2374 hammer_cursor_ondisk(&cursor
) &&
2375 (disk_offset
& HAMMER_BUFMASK
) == 0) {
2376 KKASSERT((disk_offset
& HAMMER_OFF_ZONE_MASK
) ==
2377 HAMMER_ZONE_LARGE_DATA
);
2378 nbio
->bio_offset
= disk_offset
;
2379 error
= hammer_io_direct_read(trans
.hmp
, nbio
,
2383 error
= hammer_ip_resolve_data(&cursor
);
2385 bcopy((char *)cursor
.data
+ roff
,
2386 (char *)bp
->b_data
+ boff
, n
);
2393 * Iterate until we have filled the request.
2396 if (boff
== bp
->b_bufsize
)
2398 error
= hammer_ip_next(&cursor
);
2402 * There may have been a gap after the last record
2404 if (error
== ENOENT
)
2406 if (error
== 0 && boff
!= bp
->b_bufsize
) {
2407 KKASSERT(boff
< bp
->b_bufsize
);
2408 bzero((char *)bp
->b_data
+ boff
, bp
->b_bufsize
- boff
);
2409 /* boff = bp->b_bufsize; */
2412 bp
->b_error
= error
;
2414 bp
->b_flags
|= B_ERROR
;
2419 hammer_cache_node(&ip
->cache
[1], cursor
.node
);
2420 hammer_done_cursor(&cursor
);
2421 hammer_done_transaction(&trans
);
2426 * BMAP operation - used to support cluster_read() only.
2428 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
2430 * This routine may return EOPNOTSUPP if the opration is not supported for
2431 * the specified offset. The contents of the pointer arguments do not
2432 * need to be initialized in that case.
2434 * If a disk address is available and properly aligned return 0 with
2435 * *doffsetp set to the zone-2 address, and *runp / *runb set appropriately
2436 * to the run-length relative to that offset. Callers may assume that
2437 * *doffsetp is valid if 0 is returned, even if *runp is not sufficiently
2438 * large, so return EOPNOTSUPP if it is not sufficiently large.
2442 hammer_vop_bmap(struct vop_bmap_args
*ap
)
2444 struct hammer_transaction trans
;
2445 struct hammer_inode
*ip
;
2446 struct hammer_cursor cursor
;
2447 hammer_base_elm_t base
;
2451 int64_t base_offset
;
2452 int64_t base_disk_offset
;
2453 int64_t last_offset
;
2454 hammer_off_t last_disk_offset
;
2455 hammer_off_t disk_offset
;
2460 ++hammer_stats_file_iopsr
;
2461 ip
= ap
->a_vp
->v_data
;
2464 * We can only BMAP regular files. We can't BMAP database files,
2467 if (ip
->ino_data
.obj_type
!= HAMMER_OBJTYPE_REGFILE
)
2471 * bmap is typically called with runp/runb both NULL when used
2472 * for writing. We do not support BMAP for writing atm.
2474 if (ap
->a_cmd
!= BUF_CMD_READ
)
2478 * Scan the B-Tree to acquire blockmap addresses, then translate
2481 hammer_simple_transaction(&trans
, ip
->hmp
);
2483 kprintf("bmap_beg %016llx ip->cache %p\n", ap
->a_loffset
, ip
->cache
[1]);
2485 hammer_init_cursor(&trans
, &cursor
, &ip
->cache
[1], ip
);
2488 * Key range (begin and end inclusive) to scan. Note that the key's
2489 * stored in the actual records represent BASE+LEN, not BASE. The
2490 * first record containing bio_offset will have a key > bio_offset.
2492 cursor
.key_beg
.localization
= ip
->obj_localization
+
2493 HAMMER_LOCALIZE_MISC
;
2494 cursor
.key_beg
.obj_id
= ip
->obj_id
;
2495 cursor
.key_beg
.create_tid
= 0;
2496 cursor
.key_beg
.delete_tid
= 0;
2497 cursor
.key_beg
.obj_type
= 0;
2499 cursor
.key_beg
.key
= ap
->a_loffset
- MAXPHYS
+ 1;
2501 cursor
.key_beg
.key
= ap
->a_loffset
+ 1;
2502 if (cursor
.key_beg
.key
< 0)
2503 cursor
.key_beg
.key
= 0;
2504 cursor
.asof
= ip
->obj_asof
;
2505 cursor
.flags
|= HAMMER_CURSOR_ASOF
;
2507 cursor
.key_end
= cursor
.key_beg
;
2508 KKASSERT(ip
->ino_data
.obj_type
== HAMMER_OBJTYPE_REGFILE
);
2510 ran_end
= ap
->a_loffset
+ MAXPHYS
;
2511 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DATA
;
2512 cursor
.key_end
.rec_type
= HAMMER_RECTYPE_DATA
;
2513 tmp64
= ran_end
+ MAXPHYS
+ 1; /* work-around GCC-4 bug */
2514 if (tmp64
< ran_end
)
2515 cursor
.key_end
.key
= 0x7FFFFFFFFFFFFFFFLL
;
2517 cursor
.key_end
.key
= ran_end
+ MAXPHYS
+ 1;
2519 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
;
2521 error
= hammer_ip_first(&cursor
);
2522 base_offset
= last_offset
= 0;
2523 base_disk_offset
= last_disk_offset
= 0;
2525 while (error
== 0) {
2527 * Get the base file offset of the record. The key for
2528 * data records is (base + bytes) rather then (base).
2530 * NOTE: rec_offset + rec_len may exceed the end-of-file.
2531 * The extra bytes should be zero on-disk and the BMAP op
2532 * should still be ok.
2534 base
= &cursor
.leaf
->base
;
2535 rec_offset
= base
->key
- cursor
.leaf
->data_len
;
2536 rec_len
= cursor
.leaf
->data_len
;
2539 * Incorporate any cached truncation.
2541 * NOTE: Modifications to rec_len based on synthesized
2542 * truncation points remove the guarantee that any extended
2543 * data on disk is zero (since the truncations may not have
2544 * taken place on-media yet).
2546 if (ip
->flags
& HAMMER_INODE_TRUNCATED
) {
2547 if (hammer_cursor_ondisk(&cursor
) ||
2548 cursor
.iprec
->flush_state
== HAMMER_FST_FLUSH
) {
2549 if (ip
->trunc_off
<= rec_offset
)
2551 else if (ip
->trunc_off
< rec_offset
+ rec_len
)
2552 rec_len
= (int)(ip
->trunc_off
- rec_offset
);
2555 if (ip
->sync_flags
& HAMMER_INODE_TRUNCATED
) {
2556 if (hammer_cursor_ondisk(&cursor
)) {
2557 if (ip
->sync_trunc_off
<= rec_offset
)
2559 else if (ip
->sync_trunc_off
< rec_offset
+ rec_len
)
2560 rec_len
= (int)(ip
->sync_trunc_off
- rec_offset
);
2565 * Accumulate information. If we have hit a discontiguous
2566 * block reset base_offset unless we are already beyond the
2567 * requested offset. If we are, that's it, we stop.
2571 if (hammer_cursor_ondisk(&cursor
)) {
2572 disk_offset
= cursor
.leaf
->data_offset
;
2573 if (rec_offset
!= last_offset
||
2574 disk_offset
!= last_disk_offset
) {
2575 if (rec_offset
> ap
->a_loffset
)
2577 base_offset
= rec_offset
;
2578 base_disk_offset
= disk_offset
;
2580 last_offset
= rec_offset
+ rec_len
;
2581 last_disk_offset
= disk_offset
+ rec_len
;
2583 error
= hammer_ip_next(&cursor
);
2587 kprintf("BMAP %016llx: %016llx - %016llx\n",
2588 ap
->a_loffset
, base_offset
, last_offset
);
2589 kprintf("BMAP %16s: %016llx - %016llx\n",
2590 "", base_disk_offset
, last_disk_offset
);
2594 hammer_cache_node(&ip
->cache
[1], cursor
.node
);
2596 kprintf("bmap_end2 %016llx ip->cache %p\n", ap
->a_loffset
, ip
->cache
[1]);
2599 hammer_done_cursor(&cursor
);
2600 hammer_done_transaction(&trans
);
2603 * If we couldn't find any records or the records we did find were
2604 * all behind the requested offset, return failure. A forward
2605 * truncation can leave a hole w/ no on-disk records.
2607 if (last_offset
== 0 || last_offset
< ap
->a_loffset
)
2608 return (EOPNOTSUPP
);
2611 * Figure out the block size at the requested offset and adjust
2612 * our limits so the cluster_read() does not create inappropriately
2613 * sized buffer cache buffers.
2615 blksize
= hammer_blocksize(ap
->a_loffset
);
2616 if (hammer_blocksize(base_offset
) != blksize
) {
2617 base_offset
= hammer_blockdemarc(base_offset
, ap
->a_loffset
);
2619 if (last_offset
!= ap
->a_loffset
&&
2620 hammer_blocksize(last_offset
- 1) != blksize
) {
2621 last_offset
= hammer_blockdemarc(ap
->a_loffset
,
2626 * Returning EOPNOTSUPP simply prevents the direct-IO optimization
2629 disk_offset
= base_disk_offset
+ (ap
->a_loffset
- base_offset
);
2631 if ((disk_offset
& HAMMER_OFF_ZONE_MASK
) != HAMMER_ZONE_LARGE_DATA
) {
2633 * Only large-data zones can be direct-IOd
2636 } else if ((disk_offset
& HAMMER_BUFMASK
) ||
2637 (last_offset
- ap
->a_loffset
) < blksize
) {
2639 * doffsetp is not aligned or the forward run size does
2640 * not cover a whole buffer, disallow the direct I/O.
2647 *ap
->a_doffsetp
= disk_offset
;
2649 *ap
->a_runb
= ap
->a_loffset
- base_offset
;
2650 KKASSERT(*ap
->a_runb
>= 0);
2653 *ap
->a_runp
= last_offset
- ap
->a_loffset
;
2654 KKASSERT(*ap
->a_runp
>= 0);
2662 * Write to a regular file. Because this is a strategy call the OS is
2663 * trying to actually get data onto the media.
2667 hammer_vop_strategy_write(struct vop_strategy_args
*ap
)
2669 hammer_record_t record
;
2680 ip
= ap
->a_vp
->v_data
;
2683 blksize
= hammer_blocksize(bio
->bio_offset
);
2684 KKASSERT(bp
->b_bufsize
== blksize
);
2686 if (ip
->flags
& HAMMER_INODE_RO
) {
2687 bp
->b_error
= EROFS
;
2688 bp
->b_flags
|= B_ERROR
;
2694 * Interlock with inode destruction (no in-kernel or directory
2695 * topology visibility). If we queue new IO while trying to
2696 * destroy the inode we can deadlock the vtrunc call in
2697 * hammer_inode_unloadable_check().
2699 * Besides, there's no point flushing a bp associated with an
2700 * inode that is being destroyed on-media and has no kernel
2703 if ((ip
->flags
| ip
->sync_flags
) &
2704 (HAMMER_INODE_DELETING
|HAMMER_INODE_DELETED
)) {
2711 * Reserve space and issue a direct-write from the front-end.
2712 * NOTE: The direct_io code will hammer_bread/bcopy smaller
2715 * An in-memory record will be installed to reference the storage
2716 * until the flusher can get to it.
2718 * Since we own the high level bio the front-end will not try to
2719 * do a direct-read until the write completes.
2721 * NOTE: The only time we do not reserve a full-sized buffers
2722 * worth of data is if the file is small. We do not try to
2723 * allocate a fragment (from the small-data zone) at the end of
2724 * an otherwise large file as this can lead to wildly separated
2727 KKASSERT((bio
->bio_offset
& HAMMER_BUFMASK
) == 0);
2728 KKASSERT(bio
->bio_offset
< ip
->ino_data
.size
);
2729 if (bio
->bio_offset
|| ip
->ino_data
.size
> HAMMER_BUFSIZE
/ 2)
2730 bytes
= bp
->b_bufsize
;
2732 bytes
= ((int)ip
->ino_data
.size
+ 15) & ~15;
2734 record
= hammer_ip_add_bulk(ip
, bio
->bio_offset
, bp
->b_data
,
2737 hammer_io_direct_write(hmp
, record
, bio
);
2738 if (ip
->rsv_recs
> 1 && hmp
->rsv_recs
> hammer_limit_recs
)
2739 hammer_flush_inode(ip
, 0);
2741 bp
->b_bio2
.bio_offset
= NOOFFSET
;
2742 bp
->b_error
= error
;
2743 bp
->b_flags
|= B_ERROR
;
2750 * dounlink - disconnect a directory entry
2752 * XXX whiteout support not really in yet
2755 hammer_dounlink(hammer_transaction_t trans
, struct nchandle
*nch
,
2756 struct vnode
*dvp
, struct ucred
*cred
,
2757 int flags
, int isdir
)
2759 struct namecache
*ncp
;
2762 struct hammer_cursor cursor
;
2764 u_int32_t max_iterations
;
2768 * Calculate the namekey and setup the key range for the scan. This
2769 * works kinda like a chained hash table where the lower 32 bits
2770 * of the namekey synthesize the chain.
2772 * The key range is inclusive of both key_beg and key_end.
2777 if (dip
->flags
& HAMMER_INODE_RO
)
2780 namekey
= hammer_directory_namekey(dip
, ncp
->nc_name
, ncp
->nc_nlen
,
2783 hammer_init_cursor(trans
, &cursor
, &dip
->cache
[1], dip
);
2784 cursor
.key_beg
.localization
= dip
->obj_localization
+
2785 HAMMER_LOCALIZE_MISC
;
2786 cursor
.key_beg
.obj_id
= dip
->obj_id
;
2787 cursor
.key_beg
.key
= namekey
;
2788 cursor
.key_beg
.create_tid
= 0;
2789 cursor
.key_beg
.delete_tid
= 0;
2790 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_DIRENTRY
;
2791 cursor
.key_beg
.obj_type
= 0;
2793 cursor
.key_end
= cursor
.key_beg
;
2794 cursor
.key_end
.key
+= max_iterations
;
2795 cursor
.asof
= dip
->obj_asof
;
2796 cursor
.flags
|= HAMMER_CURSOR_END_INCLUSIVE
| HAMMER_CURSOR_ASOF
;
2799 * Scan all matching records (the chain), locate the one matching
2800 * the requested path component. info->last_error contains the
2801 * error code on search termination and could be 0, ENOENT, or
2804 * The hammer_ip_*() functions merge in-memory records with on-disk
2805 * records for the purposes of the search.
2807 error
= hammer_ip_first(&cursor
);
2809 while (error
== 0) {
2810 error
= hammer_ip_resolve_data(&cursor
);
2813 nlen
= cursor
.leaf
->data_len
- HAMMER_ENTRY_NAME_OFF
;
2815 if (ncp
->nc_nlen
== nlen
&&
2816 bcmp(ncp
->nc_name
, cursor
.data
->entry
.name
, nlen
) == 0) {
2819 error
= hammer_ip_next(&cursor
);
2823 * If all is ok we have to get the inode so we can adjust nlinks.
2824 * To avoid a deadlock with the flusher we must release the inode
2825 * lock on the directory when acquiring the inode for the entry.
2827 * If the target is a directory, it must be empty.
2830 hammer_unlock(&cursor
.ip
->lock
);
2831 ip
= hammer_get_inode(trans
, dip
, cursor
.data
->entry
.obj_id
,
2833 cursor
.data
->entry
.localization
,
2835 hammer_lock_sh(&cursor
.ip
->lock
);
2836 if (error
== ENOENT
) {
2837 kprintf("HAMMER: WARNING: Removing "
2838 "dirent w/missing inode \"%s\"\n"
2839 "\tobj_id = %016llx\n",
2841 (long long)cursor
.data
->entry
.obj_id
);
2846 * If isdir >= 0 we validate that the entry is or is not a
2847 * directory. If isdir < 0 we don't care.
2849 if (error
== 0 && isdir
>= 0 && ip
) {
2851 ip
->ino_data
.obj_type
!= HAMMER_OBJTYPE_DIRECTORY
) {
2853 } else if (isdir
== 0 &&
2854 ip
->ino_data
.obj_type
== HAMMER_OBJTYPE_DIRECTORY
) {
2860 * If we are trying to remove a directory the directory must
2863 * The check directory code can loop and deadlock/retry. Our
2864 * own cursor's node locks must be released to avoid a 3-way
2865 * deadlock with the flusher if the check directory code
2868 * If any changes whatsoever have been made to the cursor
2869 * set EDEADLK and retry.
2871 if (error
== 0 && ip
&& ip
->ino_data
.obj_type
==
2872 HAMMER_OBJTYPE_DIRECTORY
) {
2873 hammer_unlock_cursor(&cursor
);
2874 error
= hammer_ip_check_directory_empty(trans
, ip
);
2875 hammer_lock_cursor(&cursor
);
2876 if (cursor
.flags
& HAMMER_CURSOR_RETEST
) {
2877 kprintf("HAMMER: Warning: avoided deadlock "
2885 * Delete the directory entry.
2887 * WARNING: hammer_ip_del_directory() may have to terminate
2888 * the cursor to avoid a deadlock. It is ok to call
2889 * hammer_done_cursor() twice.
2892 error
= hammer_ip_del_directory(trans
, &cursor
,
2895 hammer_done_cursor(&cursor
);
2897 cache_setunresolved(nch
);
2898 cache_setvp(nch
, NULL
);
2901 hammer_knote(ip
->vp
, NOTE_DELETE
);
2902 cache_inval_vp(ip
->vp
, CINV_DESTROY
);
2906 hammer_rel_inode(ip
, 0);
2908 hammer_done_cursor(&cursor
);
2910 if (error
== EDEADLK
)
2916 /************************************************************************
2917 * FIFO AND SPECFS OPS *
2918 ************************************************************************
2923 hammer_vop_fifoclose (struct vop_close_args
*ap
)
2925 /* XXX update itimes */
2926 return (VOCALL(&fifo_vnode_vops
, &ap
->a_head
));
2930 hammer_vop_fiforead (struct vop_read_args
*ap
)
2934 error
= VOCALL(&fifo_vnode_vops
, &ap
->a_head
);
2935 /* XXX update access time */
2940 hammer_vop_fifowrite (struct vop_write_args
*ap
)
2944 error
= VOCALL(&fifo_vnode_vops
, &ap
->a_head
);
2945 /* XXX update access time */
2951 hammer_vop_fifokqfilter(struct vop_kqfilter_args
*ap
)
2955 error
= VOCALL(&fifo_vnode_vops
, &ap
->a_head
);
2957 error
= hammer_vop_kqfilter(ap
);
2962 hammer_vop_specclose (struct vop_close_args
*ap
)
2964 /* XXX update itimes */
2965 return (VOCALL(&spec_vnode_vops
, &ap
->a_head
));
2969 hammer_vop_specread (struct vop_read_args
*ap
)
2971 /* XXX update access time */
2972 return (VOCALL(&spec_vnode_vops
, &ap
->a_head
));
2976 hammer_vop_specwrite (struct vop_write_args
*ap
)
2978 /* XXX update last change time */
2979 return (VOCALL(&spec_vnode_vops
, &ap
->a_head
));
2982 /************************************************************************
2984 ************************************************************************
2987 static void filt_hammerdetach(struct knote
*kn
);
2988 static int filt_hammerread(struct knote
*kn
, long hint
);
2989 static int filt_hammerwrite(struct knote
*kn
, long hint
);
2990 static int filt_hammervnode(struct knote
*kn
, long hint
);
2992 static struct filterops hammerread_filtops
=
2993 { 1, NULL
, filt_hammerdetach
, filt_hammerread
};
2994 static struct filterops hammerwrite_filtops
=
2995 { 1, NULL
, filt_hammerdetach
, filt_hammerwrite
};
2996 static struct filterops hammervnode_filtops
=
2997 { 1, NULL
, filt_hammerdetach
, filt_hammervnode
};
3001 hammer_vop_kqfilter(struct vop_kqfilter_args
*ap
)
3003 struct vnode
*vp
= ap
->a_vp
;
3004 struct knote
*kn
= ap
->a_kn
;
3007 switch (kn
->kn_filter
) {
3009 kn
->kn_fop
= &hammerread_filtops
;
3012 kn
->kn_fop
= &hammerwrite_filtops
;
3015 kn
->kn_fop
= &hammervnode_filtops
;
3021 kn
->kn_hook
= (caddr_t
)vp
;
3023 lwkt_gettoken(&ilock
, &vp
->v_pollinfo
.vpi_token
);
3024 SLIST_INSERT_HEAD(&vp
->v_pollinfo
.vpi_selinfo
.si_note
, kn
, kn_selnext
);
3025 lwkt_reltoken(&ilock
);
3031 filt_hammerdetach(struct knote
*kn
)
3033 struct vnode
*vp
= (void *)kn
->kn_hook
;
3036 lwkt_gettoken(&ilock
, &vp
->v_pollinfo
.vpi_token
);
3037 SLIST_REMOVE(&vp
->v_pollinfo
.vpi_selinfo
.si_note
,
3038 kn
, knote
, kn_selnext
);
3039 lwkt_reltoken(&ilock
);
3043 filt_hammerread(struct knote
*kn
, long hint
)
3045 struct vnode
*vp
= (void *)kn
->kn_hook
;
3046 hammer_inode_t ip
= VTOI(vp
);
3048 if (hint
== NOTE_REVOKE
) {
3049 kn
->kn_flags
|= (EV_EOF
| EV_ONESHOT
);
3052 kn
->kn_data
= ip
->ino_data
.size
- kn
->kn_fp
->f_offset
;
3053 return (kn
->kn_data
!= 0);
3057 filt_hammerwrite(struct knote
*kn
, long hint
)
3059 if (hint
== NOTE_REVOKE
)
3060 kn
->kn_flags
|= (EV_EOF
| EV_ONESHOT
);
3066 filt_hammervnode(struct knote
*kn
, long hint
)
3068 if (kn
->kn_sfflags
& hint
)
3069 kn
->kn_fflags
|= hint
;
3070 if (hint
== NOTE_REVOKE
) {
3071 kn
->kn_flags
|= EV_EOF
;
3074 return (kn
->kn_fflags
!= 0);