Implement NFS support and export control for HAMMER.
[dragonfly.git] / sys / vfs / hammer / hammer_vnops.c
blobae075541673d8d3649fc63c27962956959f7d289
1 /*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.27 2008/02/05 20:52:01 dillon Exp $
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/namecache.h>
42 #include <sys/vnode.h>
43 #include <sys/lockf.h>
44 #include <sys/event.h>
45 #include <sys/stat.h>
46 #include <sys/dirent.h>
47 #include <vm/vm_extern.h>
48 #include <vfs/fifofs/fifo.h>
49 #include "hammer.h"
52 * USERFS VNOPS
54 /*static int hammer_vop_vnoperate(struct vop_generic_args *);*/
55 static int hammer_vop_fsync(struct vop_fsync_args *);
56 static int hammer_vop_read(struct vop_read_args *);
57 static int hammer_vop_write(struct vop_write_args *);
58 static int hammer_vop_access(struct vop_access_args *);
59 static int hammer_vop_advlock(struct vop_advlock_args *);
60 static int hammer_vop_close(struct vop_close_args *);
61 static int hammer_vop_ncreate(struct vop_ncreate_args *);
62 static int hammer_vop_getattr(struct vop_getattr_args *);
63 static int hammer_vop_nresolve(struct vop_nresolve_args *);
64 static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *);
65 static int hammer_vop_nlink(struct vop_nlink_args *);
66 static int hammer_vop_nmkdir(struct vop_nmkdir_args *);
67 static int hammer_vop_nmknod(struct vop_nmknod_args *);
68 static int hammer_vop_open(struct vop_open_args *);
69 static int hammer_vop_pathconf(struct vop_pathconf_args *);
70 static int hammer_vop_print(struct vop_print_args *);
71 static int hammer_vop_readdir(struct vop_readdir_args *);
72 static int hammer_vop_readlink(struct vop_readlink_args *);
73 static int hammer_vop_nremove(struct vop_nremove_args *);
74 static int hammer_vop_nrename(struct vop_nrename_args *);
75 static int hammer_vop_nrmdir(struct vop_nrmdir_args *);
76 static int hammer_vop_setattr(struct vop_setattr_args *);
77 static int hammer_vop_strategy(struct vop_strategy_args *);
78 static int hammer_vop_nsymlink(struct vop_nsymlink_args *);
79 static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *);
80 static int hammer_vop_ioctl(struct vop_ioctl_args *);
81 static int hammer_vop_mountctl(struct vop_mountctl_args *);
83 static int hammer_vop_fifoclose (struct vop_close_args *);
84 static int hammer_vop_fiforead (struct vop_read_args *);
85 static int hammer_vop_fifowrite (struct vop_write_args *);
87 static int hammer_vop_specclose (struct vop_close_args *);
88 static int hammer_vop_specread (struct vop_read_args *);
89 static int hammer_vop_specwrite (struct vop_write_args *);
91 struct vop_ops hammer_vnode_vops = {
92 .vop_default = vop_defaultop,
93 .vop_fsync = hammer_vop_fsync,
94 .vop_getpages = vop_stdgetpages,
95 .vop_putpages = vop_stdputpages,
96 .vop_read = hammer_vop_read,
97 .vop_write = hammer_vop_write,
98 .vop_access = hammer_vop_access,
99 .vop_advlock = hammer_vop_advlock,
100 .vop_close = hammer_vop_close,
101 .vop_ncreate = hammer_vop_ncreate,
102 .vop_getattr = hammer_vop_getattr,
103 .vop_inactive = hammer_vop_inactive,
104 .vop_reclaim = hammer_vop_reclaim,
105 .vop_nresolve = hammer_vop_nresolve,
106 .vop_nlookupdotdot = hammer_vop_nlookupdotdot,
107 .vop_nlink = hammer_vop_nlink,
108 .vop_nmkdir = hammer_vop_nmkdir,
109 .vop_nmknod = hammer_vop_nmknod,
110 .vop_open = hammer_vop_open,
111 .vop_pathconf = hammer_vop_pathconf,
112 .vop_print = hammer_vop_print,
113 .vop_readdir = hammer_vop_readdir,
114 .vop_readlink = hammer_vop_readlink,
115 .vop_nremove = hammer_vop_nremove,
116 .vop_nrename = hammer_vop_nrename,
117 .vop_nrmdir = hammer_vop_nrmdir,
118 .vop_setattr = hammer_vop_setattr,
119 .vop_strategy = hammer_vop_strategy,
120 .vop_nsymlink = hammer_vop_nsymlink,
121 .vop_nwhiteout = hammer_vop_nwhiteout,
122 .vop_ioctl = hammer_vop_ioctl,
123 .vop_mountctl = hammer_vop_mountctl
126 struct vop_ops hammer_spec_vops = {
127 .vop_default = spec_vnoperate,
128 .vop_fsync = hammer_vop_fsync,
129 .vop_read = hammer_vop_specread,
130 .vop_write = hammer_vop_specwrite,
131 .vop_access = hammer_vop_access,
132 .vop_close = hammer_vop_specclose,
133 .vop_getattr = hammer_vop_getattr,
134 .vop_inactive = hammer_vop_inactive,
135 .vop_reclaim = hammer_vop_reclaim,
136 .vop_setattr = hammer_vop_setattr
139 struct vop_ops hammer_fifo_vops = {
140 .vop_default = fifo_vnoperate,
141 .vop_fsync = hammer_vop_fsync,
142 .vop_read = hammer_vop_fiforead,
143 .vop_write = hammer_vop_fifowrite,
144 .vop_access = hammer_vop_access,
145 .vop_close = hammer_vop_fifoclose,
146 .vop_getattr = hammer_vop_getattr,
147 .vop_inactive = hammer_vop_inactive,
148 .vop_reclaim = hammer_vop_reclaim,
149 .vop_setattr = hammer_vop_setattr
152 static int hammer_dounlink(struct nchandle *nch, struct vnode *dvp,
153 struct ucred *cred, int flags);
154 static int hammer_vop_strategy_read(struct vop_strategy_args *ap);
155 static int hammer_vop_strategy_write(struct vop_strategy_args *ap);
157 #if 0
158 static
160 hammer_vop_vnoperate(struct vop_generic_args *)
162 return (VOCALL(&hammer_vnode_vops, ap));
164 #endif
167 * hammer_vop_fsync { vp, waitfor }
169 static
171 hammer_vop_fsync(struct vop_fsync_args *ap)
173 hammer_inode_t ip;
174 int error;
176 ip = VTOI(ap->a_vp);
177 error = hammer_sync_inode(ip, ap->a_waitfor, 0);
178 return (error);
182 * hammer_vop_read { vp, uio, ioflag, cred }
184 static
186 hammer_vop_read(struct vop_read_args *ap)
188 struct hammer_transaction trans;
189 hammer_inode_t ip;
190 off_t offset;
191 struct buf *bp;
192 struct uio *uio;
193 int error;
194 int n;
195 int seqcount;
197 if (ap->a_vp->v_type != VREG)
198 return (EINVAL);
199 ip = VTOI(ap->a_vp);
200 error = 0;
201 seqcount = ap->a_ioflag >> 16;
203 hammer_start_transaction(&trans, ip->hmp);
206 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
208 uio = ap->a_uio;
209 while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_rec.ino_size) {
210 offset = uio->uio_offset & HAMMER_BUFMASK;
211 #if 0
212 error = cluster_read(ap->a_vp, ip->ino_rec.ino_size,
213 uio->uio_offset - offset, HAMMER_BUFSIZE,
214 MAXBSIZE, seqcount, &bp);
215 #endif
216 error = bread(ap->a_vp, uio->uio_offset - offset,
217 HAMMER_BUFSIZE, &bp);
218 if (error) {
219 brelse(bp);
220 break;
222 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
223 n = HAMMER_BUFSIZE - offset;
224 if (n > uio->uio_resid)
225 n = uio->uio_resid;
226 if (n > ip->ino_rec.ino_size - uio->uio_offset)
227 n = (int)(ip->ino_rec.ino_size - uio->uio_offset);
228 error = uiomove((char *)bp->b_data + offset, n, uio);
229 if (error) {
230 bqrelse(bp);
231 break;
233 if ((ip->flags & HAMMER_INODE_RO) == 0) {
234 ip->ino_rec.ino_atime = trans.tid;
235 hammer_modify_inode(&trans, ip, HAMMER_INODE_ITIMES);
237 bqrelse(bp);
239 hammer_commit_transaction(&trans);
240 return (error);
244 * hammer_vop_write { vp, uio, ioflag, cred }
246 static
248 hammer_vop_write(struct vop_write_args *ap)
250 struct hammer_transaction trans;
251 struct hammer_inode *ip;
252 struct uio *uio;
253 off_t offset;
254 struct buf *bp;
255 int error;
256 int n;
257 int flags;
259 if (ap->a_vp->v_type != VREG)
260 return (EINVAL);
261 ip = VTOI(ap->a_vp);
262 error = 0;
264 if (ip->flags & HAMMER_INODE_RO)
265 return (EROFS);
268 * Create a transaction to cover the operations we perform.
270 hammer_start_transaction(&trans, ip->hmp);
271 uio = ap->a_uio;
274 * Check append mode
276 if (ap->a_ioflag & IO_APPEND)
277 uio->uio_offset = ip->ino_rec.ino_size;
280 * Check for illegal write offsets. Valid range is 0...2^63-1
282 if (uio->uio_offset < 0 || uio->uio_offset + uio->uio_resid <= 0) {
283 hammer_commit_transaction(&trans);
284 return (EFBIG);
288 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
290 while (uio->uio_resid > 0) {
291 int fixsize = 0;
293 offset = uio->uio_offset & HAMMER_BUFMASK;
294 n = HAMMER_BUFSIZE - offset;
295 if (n > uio->uio_resid)
296 n = uio->uio_resid;
297 if (uio->uio_offset + n > ip->ino_rec.ino_size) {
298 vnode_pager_setsize(ap->a_vp, uio->uio_offset + n);
299 fixsize = 1;
302 if (uio->uio_segflg == UIO_NOCOPY) {
304 * Issuing a write with the same data backing the
305 * buffer. Instantiate the buffer to collect the
306 * backing vm pages, then read-in any missing bits.
308 * This case is used by vop_stdputpages().
310 bp = getblk(ap->a_vp, uio->uio_offset - offset,
311 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
312 if ((bp->b_flags & B_CACHE) == 0) {
313 bqrelse(bp);
314 error = bread(ap->a_vp,
315 uio->uio_offset - offset,
316 HAMMER_BUFSIZE, &bp);
318 } else if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) {
320 * entirely overwrite the buffer
322 bp = getblk(ap->a_vp, uio->uio_offset - offset,
323 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
324 } else if (offset == 0 && uio->uio_offset >= ip->ino_rec.ino_size) {
326 * XXX
328 bp = getblk(ap->a_vp, uio->uio_offset - offset,
329 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
330 vfs_bio_clrbuf(bp);
331 } else {
333 * Partial overwrite, read in any missing bits then
334 * replace the portion being written.
336 error = bread(ap->a_vp, uio->uio_offset - offset,
337 HAMMER_BUFSIZE, &bp);
338 if (error == 0)
339 bheavy(bp);
341 if (error == 0)
342 error = uiomove((char *)bp->b_data + offset, n, uio);
345 * If we screwed up we have to undo any VM size changes we
346 * made.
348 if (error) {
349 brelse(bp);
350 if (fixsize) {
351 vtruncbuf(ap->a_vp, ip->ino_rec.ino_size,
352 HAMMER_BUFSIZE);
354 break;
356 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
357 if (ip->ino_rec.ino_size < uio->uio_offset) {
358 ip->ino_rec.ino_size = uio->uio_offset;
359 flags = HAMMER_INODE_RDIRTY;
360 vnode_pager_setsize(ap->a_vp, ip->ino_rec.ino_size);
361 } else {
362 flags = 0;
364 ip->ino_rec.ino_mtime = trans.tid;
365 flags |= HAMMER_INODE_ITIMES | HAMMER_INODE_BUFS;
366 hammer_modify_inode(&trans, ip, flags);
369 * The file write must be tagged with the same TID as the
370 * inode, for consistency in case the inode changed size.
371 * This guarantees the on-disk data records will have a
372 * TID <= the inode TID representing the size change.
374 * If a prior write has not yet flushed, retain its TID.
376 if (bp->b_tid == 0)
377 bp->b_tid = ip->last_tid;
379 if (ap->a_ioflag & IO_SYNC) {
380 bwrite(bp);
381 } else if (ap->a_ioflag & IO_DIRECT) {
382 bawrite(bp);
383 } else {
384 bdwrite(bp);
387 if (error)
388 hammer_abort_transaction(&trans);
389 else
390 hammer_commit_transaction(&trans);
391 return (error);
395 * hammer_vop_access { vp, mode, cred }
397 static
399 hammer_vop_access(struct vop_access_args *ap)
401 struct hammer_inode *ip = VTOI(ap->a_vp);
402 uid_t uid;
403 gid_t gid;
404 int error;
406 uid = hammer_to_unix_xid(&ip->ino_data.uid);
407 gid = hammer_to_unix_xid(&ip->ino_data.gid);
409 error = vop_helper_access(ap, uid, gid, ip->ino_data.mode,
410 ip->ino_data.uflags);
411 return (error);
415 * hammer_vop_advlock { vp, id, op, fl, flags }
417 static
419 hammer_vop_advlock(struct vop_advlock_args *ap)
421 struct hammer_inode *ip = VTOI(ap->a_vp);
423 return (lf_advlock(ap, &ip->advlock, ip->ino_rec.ino_size));
427 * hammer_vop_close { vp, fflag }
429 static
431 hammer_vop_close(struct vop_close_args *ap)
433 return (vop_stdclose(ap));
437 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
439 * The operating system has already ensured that the directory entry
440 * does not exist and done all appropriate namespace locking.
442 static
444 hammer_vop_ncreate(struct vop_ncreate_args *ap)
446 struct hammer_transaction trans;
447 struct hammer_inode *dip;
448 struct hammer_inode *nip;
449 struct nchandle *nch;
450 int error;
452 nch = ap->a_nch;
453 dip = VTOI(ap->a_dvp);
455 if (dip->flags & HAMMER_INODE_RO)
456 return (EROFS);
459 * Create a transaction to cover the operations we perform.
461 hammer_start_transaction(&trans, dip->hmp);
464 * Create a new filesystem object of the requested type. The
465 * returned inode will be referenced but not locked.
468 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
469 if (error)
470 kprintf("hammer_create_inode error %d\n", error);
471 if (error) {
472 hammer_abort_transaction(&trans);
473 *ap->a_vpp = NULL;
474 return (error);
478 * Add the new filesystem object to the directory. This will also
479 * bump the inode's link count.
481 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
482 if (error)
483 kprintf("hammer_ip_add_directory error %d\n", error);
486 * Finish up.
488 if (error) {
489 hammer_rel_inode(nip, 0);
490 hammer_abort_transaction(&trans);
491 *ap->a_vpp = NULL;
492 } else {
493 hammer_commit_transaction(&trans);
494 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
495 hammer_rel_inode(nip, 0);
496 if (error == 0) {
497 cache_setunresolved(ap->a_nch);
498 cache_setvp(ap->a_nch, *ap->a_vpp);
501 return (error);
505 * hammer_vop_getattr { vp, vap }
507 static
509 hammer_vop_getattr(struct vop_getattr_args *ap)
511 struct hammer_inode *ip = VTOI(ap->a_vp);
512 struct vattr *vap = ap->a_vap;
514 #if 0
515 if (cache_check_fsmid_vp(ap->a_vp, &ip->fsmid) &&
516 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0 &&
517 ip->obj_asof == XXX
519 /* LAZYMOD XXX */
521 hammer_itimes(ap->a_vp);
522 #endif
524 vap->va_fsid = ip->hmp->fsid_udev;
525 vap->va_fileid = ip->ino_rec.base.base.obj_id;
526 vap->va_mode = ip->ino_data.mode;
527 vap->va_nlink = ip->ino_rec.ino_nlinks;
528 vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid);
529 vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid);
530 vap->va_rmajor = 0;
531 vap->va_rminor = 0;
532 vap->va_size = ip->ino_rec.ino_size;
533 hammer_to_timespec(ip->ino_rec.ino_atime, &vap->va_atime);
534 hammer_to_timespec(ip->ino_rec.ino_mtime, &vap->va_mtime);
535 hammer_to_timespec(ip->ino_data.ctime, &vap->va_ctime);
536 vap->va_flags = ip->ino_data.uflags;
537 vap->va_gen = 1; /* hammer inums are unique for all time */
538 vap->va_blocksize = 32768; /* XXX - extract from root volume */
539 vap->va_bytes = ip->ino_rec.ino_size;
540 vap->va_type = hammer_get_vnode_type(ip->ino_rec.base.base.obj_type);
541 vap->va_filerev = 0; /* XXX */
542 /* mtime uniquely identifies any adjustments made to the file */
543 vap->va_fsmid = ip->ino_rec.ino_mtime;
544 vap->va_uid_uuid = ip->ino_data.uid;
545 vap->va_gid_uuid = ip->ino_data.gid;
546 vap->va_fsid_uuid = ip->hmp->fsid;
547 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
548 VA_FSID_UUID_VALID;
550 switch (ip->ino_rec.base.base.obj_type) {
551 case HAMMER_OBJTYPE_CDEV:
552 case HAMMER_OBJTYPE_BDEV:
553 vap->va_rmajor = ip->ino_data.rmajor;
554 vap->va_rminor = ip->ino_data.rminor;
555 break;
556 default:
557 break;
560 return(0);
564 * hammer_vop_nresolve { nch, dvp, cred }
566 * Locate the requested directory entry.
568 static
570 hammer_vop_nresolve(struct vop_nresolve_args *ap)
572 struct namecache *ncp;
573 hammer_inode_t dip;
574 hammer_inode_t ip;
575 hammer_tid_t asof;
576 struct hammer_cursor cursor;
577 union hammer_record_ondisk *rec;
578 struct vnode *vp;
579 int64_t namekey;
580 int error;
581 int i;
582 int nlen;
583 int flags;
584 u_int64_t obj_id;
587 * Misc initialization, plus handle as-of name extensions. Look for
588 * the '@@' extension. Note that as-of files and directories cannot
589 * be modified.
591 dip = VTOI(ap->a_dvp);
592 ncp = ap->a_nch->ncp;
593 asof = dip->obj_asof;
594 nlen = ncp->nc_nlen;
595 flags = dip->flags;
597 for (i = 0; i < nlen; ++i) {
598 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') {
599 asof = hammer_str_to_tid(ncp->nc_name + i + 2);
600 flags |= HAMMER_INODE_RO;
601 break;
604 nlen = i;
607 * If there is no path component the time extension is relative to
608 * dip.
610 if (nlen == 0) {
611 ip = hammer_get_inode(dip->hmp, &dip->cache[1], dip->obj_id,
612 asof, flags, &error);
613 if (error == 0) {
614 error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp);
615 hammer_rel_inode(ip, 0);
616 } else {
617 vp = NULL;
619 if (error == 0) {
620 vn_unlock(vp);
621 cache_setvp(ap->a_nch, vp);
622 vrele(vp);
624 return(error);
628 * Calculate the namekey and setup the key range for the scan. This
629 * works kinda like a chained hash table where the lower 32 bits
630 * of the namekey synthesize the chain.
632 * The key range is inclusive of both key_beg and key_end.
634 namekey = hammer_directory_namekey(ncp->nc_name, nlen);
636 error = hammer_init_cursor_hmp(&cursor, &dip->cache[0], dip->hmp);
637 cursor.key_beg.obj_id = dip->obj_id;
638 cursor.key_beg.key = namekey;
639 cursor.key_beg.create_tid = 0;
640 cursor.key_beg.delete_tid = 0;
641 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
642 cursor.key_beg.obj_type = 0;
644 cursor.key_end = cursor.key_beg;
645 cursor.key_end.key |= 0xFFFFFFFFULL;
646 cursor.asof = asof;
647 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
650 * Scan all matching records (the chain), locate the one matching
651 * the requested path component.
653 * The hammer_ip_*() functions merge in-memory records with on-disk
654 * records for the purposes of the search.
656 if (error == 0)
657 error = hammer_ip_first(&cursor, dip);
659 rec = NULL;
660 obj_id = 0;
662 while (error == 0) {
663 error = hammer_ip_resolve_data(&cursor);
664 if (error)
665 break;
666 rec = cursor.record;
667 if (nlen == rec->entry.base.data_len &&
668 bcmp(ncp->nc_name, cursor.data, nlen) == 0) {
669 obj_id = rec->entry.obj_id;
670 break;
672 error = hammer_ip_next(&cursor);
674 hammer_done_cursor(&cursor);
675 if (error == 0) {
676 ip = hammer_get_inode(dip->hmp, &dip->cache[1],
677 obj_id, asof, flags, &error);
678 if (error == 0) {
679 error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp);
680 hammer_rel_inode(ip, 0);
681 } else {
682 vp = NULL;
684 if (error == 0) {
685 vn_unlock(vp);
686 cache_setvp(ap->a_nch, vp);
687 vrele(vp);
689 } else if (error == ENOENT) {
690 cache_setvp(ap->a_nch, NULL);
692 return (error);
696 * hammer_vop_nlookupdotdot { dvp, vpp, cred }
698 * Locate the parent directory of a directory vnode.
700 * dvp is referenced but not locked. *vpp must be returned referenced and
701 * locked. A parent_obj_id of 0 does not necessarily indicate that we are
702 * at the root, instead it could indicate that the directory we were in was
703 * removed.
705 static
707 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
709 struct hammer_inode *dip;
710 struct hammer_inode *ip;
711 u_int64_t parent_obj_id;
712 int error;
714 dip = VTOI(ap->a_dvp);
715 if ((parent_obj_id = dip->ino_data.parent_obj_id) == 0) {
716 *ap->a_vpp = NULL;
717 return ENOENT;
720 ip = hammer_get_inode(dip->hmp, &dip->cache[1], parent_obj_id,
721 dip->obj_asof, dip->flags, &error);
722 if (ip == NULL) {
723 *ap->a_vpp = NULL;
724 return(error);
726 error = hammer_get_vnode(ip, LK_EXCLUSIVE, ap->a_vpp);
727 hammer_rel_inode(ip, 0);
728 return (error);
732 * hammer_vop_nlink { nch, dvp, vp, cred }
734 static
736 hammer_vop_nlink(struct vop_nlink_args *ap)
738 struct hammer_transaction trans;
739 struct hammer_inode *dip;
740 struct hammer_inode *ip;
741 struct nchandle *nch;
742 int error;
744 nch = ap->a_nch;
745 dip = VTOI(ap->a_dvp);
746 ip = VTOI(ap->a_vp);
748 if (dip->flags & HAMMER_INODE_RO)
749 return (EROFS);
750 if (ip->flags & HAMMER_INODE_RO)
751 return (EROFS);
754 * Create a transaction to cover the operations we perform.
756 hammer_start_transaction(&trans, dip->hmp);
759 * Add the filesystem object to the directory. Note that neither
760 * dip nor ip are referenced or locked, but their vnodes are
761 * referenced. This function will bump the inode's link count.
763 error = hammer_ip_add_directory(&trans, dip, nch->ncp, ip);
766 * Finish up.
768 if (error) {
769 hammer_abort_transaction(&trans);
770 } else {
771 cache_setunresolved(nch);
772 cache_setvp(nch, ap->a_vp);
773 hammer_commit_transaction(&trans);
775 return (error);
779 * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
781 * The operating system has already ensured that the directory entry
782 * does not exist and done all appropriate namespace locking.
784 static
786 hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
788 struct hammer_transaction trans;
789 struct hammer_inode *dip;
790 struct hammer_inode *nip;
791 struct nchandle *nch;
792 int error;
794 nch = ap->a_nch;
795 dip = VTOI(ap->a_dvp);
797 if (dip->flags & HAMMER_INODE_RO)
798 return (EROFS);
801 * Create a transaction to cover the operations we perform.
803 hammer_start_transaction(&trans, dip->hmp);
806 * Create a new filesystem object of the requested type. The
807 * returned inode will be referenced but not locked.
809 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
810 if (error)
811 kprintf("hammer_mkdir error %d\n", error);
812 if (error) {
813 hammer_abort_transaction(&trans);
814 *ap->a_vpp = NULL;
815 return (error);
819 * Add the new filesystem object to the directory. This will also
820 * bump the inode's link count.
822 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
823 if (error)
824 kprintf("hammer_mkdir (add) error %d\n", error);
827 * Finish up.
829 if (error) {
830 hammer_rel_inode(nip, 0);
831 hammer_abort_transaction(&trans);
832 *ap->a_vpp = NULL;
833 } else {
834 hammer_commit_transaction(&trans);
835 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
836 hammer_rel_inode(nip, 0);
837 if (error == 0) {
838 cache_setunresolved(ap->a_nch);
839 cache_setvp(ap->a_nch, *ap->a_vpp);
842 return (error);
846 * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
848 * The operating system has already ensured that the directory entry
849 * does not exist and done all appropriate namespace locking.
851 static
853 hammer_vop_nmknod(struct vop_nmknod_args *ap)
855 struct hammer_transaction trans;
856 struct hammer_inode *dip;
857 struct hammer_inode *nip;
858 struct nchandle *nch;
859 int error;
861 nch = ap->a_nch;
862 dip = VTOI(ap->a_dvp);
864 if (dip->flags & HAMMER_INODE_RO)
865 return (EROFS);
868 * Create a transaction to cover the operations we perform.
870 hammer_start_transaction(&trans, dip->hmp);
873 * Create a new filesystem object of the requested type. The
874 * returned inode will be referenced but not locked.
876 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
877 if (error) {
878 hammer_abort_transaction(&trans);
879 *ap->a_vpp = NULL;
880 return (error);
884 * Add the new filesystem object to the directory. This will also
885 * bump the inode's link count.
887 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
890 * Finish up.
892 if (error) {
893 hammer_rel_inode(nip, 0);
894 hammer_abort_transaction(&trans);
895 *ap->a_vpp = NULL;
896 } else {
897 hammer_commit_transaction(&trans);
898 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
899 hammer_rel_inode(nip, 0);
900 if (error == 0) {
901 cache_setunresolved(ap->a_nch);
902 cache_setvp(ap->a_nch, *ap->a_vpp);
905 return (error);
909 * hammer_vop_open { vp, mode, cred, fp }
911 static
913 hammer_vop_open(struct vop_open_args *ap)
915 if ((ap->a_mode & FWRITE) && (VTOI(ap->a_vp)->flags & HAMMER_INODE_RO))
916 return (EROFS);
918 return(vop_stdopen(ap));
922 * hammer_vop_pathconf { vp, name, retval }
924 static
926 hammer_vop_pathconf(struct vop_pathconf_args *ap)
928 return EOPNOTSUPP;
932 * hammer_vop_print { vp }
934 static
936 hammer_vop_print(struct vop_print_args *ap)
938 return EOPNOTSUPP;
942 * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies }
944 static
946 hammer_vop_readdir(struct vop_readdir_args *ap)
948 struct hammer_cursor cursor;
949 struct hammer_inode *ip;
950 struct uio *uio;
951 hammer_record_ondisk_t rec;
952 hammer_base_elm_t base;
953 int error;
954 int cookie_index;
955 int ncookies;
956 off_t *cookies;
957 off_t saveoff;
958 int r;
960 ip = VTOI(ap->a_vp);
961 uio = ap->a_uio;
962 saveoff = uio->uio_offset;
964 if (ap->a_ncookies) {
965 ncookies = uio->uio_resid / 16 + 1;
966 if (ncookies > 1024)
967 ncookies = 1024;
968 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
969 cookie_index = 0;
970 } else {
971 ncookies = -1;
972 cookies = NULL;
973 cookie_index = 0;
977 * Handle artificial entries
979 error = 0;
980 if (saveoff == 0) {
981 r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, ".");
982 if (r)
983 goto done;
984 if (cookies)
985 cookies[cookie_index] = saveoff;
986 ++saveoff;
987 ++cookie_index;
988 if (cookie_index == ncookies)
989 goto done;
991 if (saveoff == 1) {
992 if (ip->ino_data.parent_obj_id) {
993 r = vop_write_dirent(&error, uio,
994 ip->ino_data.parent_obj_id,
995 DT_DIR, 2, "..");
996 } else {
997 r = vop_write_dirent(&error, uio,
998 ip->obj_id, DT_DIR, 2, "..");
1000 if (r)
1001 goto done;
1002 if (cookies)
1003 cookies[cookie_index] = saveoff;
1004 ++saveoff;
1005 ++cookie_index;
1006 if (cookie_index == ncookies)
1007 goto done;
1011 * Key range (begin and end inclusive) to scan. Directory keys
1012 * directly translate to a 64 bit 'seek' position.
1014 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp);
1015 cursor.key_beg.obj_id = ip->obj_id;
1016 cursor.key_beg.create_tid = 0;
1017 cursor.key_beg.delete_tid = 0;
1018 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1019 cursor.key_beg.obj_type = 0;
1020 cursor.key_beg.key = saveoff;
1022 cursor.key_end = cursor.key_beg;
1023 cursor.key_end.key = HAMMER_MAX_KEY;
1024 cursor.asof = ip->obj_asof;
1025 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
1027 error = hammer_ip_first(&cursor, ip);
1029 while (error == 0) {
1030 error = hammer_ip_resolve_data(&cursor);
1031 if (error)
1032 break;
1033 rec = cursor.record;
1034 base = &rec->base.base;
1035 saveoff = base->key;
1037 if (base->obj_id != ip->obj_id)
1038 panic("readdir: bad record at %p", cursor.node);
1040 r = vop_write_dirent(
1041 &error, uio, rec->entry.obj_id,
1042 hammer_get_dtype(rec->entry.base.base.obj_type),
1043 rec->entry.base.data_len,
1044 (void *)cursor.data);
1045 if (r)
1046 break;
1047 ++saveoff;
1048 if (cookies)
1049 cookies[cookie_index] = base->key;
1050 ++cookie_index;
1051 if (cookie_index == ncookies)
1052 break;
1053 error = hammer_ip_next(&cursor);
1055 hammer_done_cursor(&cursor);
1057 done:
1058 if (ap->a_eofflag)
1059 *ap->a_eofflag = (error == ENOENT);
1060 uio->uio_offset = saveoff;
1061 if (error && cookie_index == 0) {
1062 if (error == ENOENT)
1063 error = 0;
1064 if (cookies) {
1065 kfree(cookies, M_TEMP);
1066 *ap->a_ncookies = 0;
1067 *ap->a_cookies = NULL;
1069 } else {
1070 if (error == ENOENT)
1071 error = 0;
1072 if (cookies) {
1073 *ap->a_ncookies = cookie_index;
1074 *ap->a_cookies = cookies;
1077 return(error);
1081 * hammer_vop_readlink { vp, uio, cred }
1083 static
1085 hammer_vop_readlink(struct vop_readlink_args *ap)
1087 struct hammer_cursor cursor;
1088 struct hammer_inode *ip;
1089 int error;
1091 ip = VTOI(ap->a_vp);
1092 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp);
1095 * Key range (begin and end inclusive) to scan. Directory keys
1096 * directly translate to a 64 bit 'seek' position.
1098 cursor.key_beg.obj_id = ip->obj_id;
1099 cursor.key_beg.create_tid = 0;
1100 cursor.key_beg.delete_tid = 0;
1101 cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX;
1102 cursor.key_beg.obj_type = 0;
1103 cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK;
1104 cursor.asof = ip->obj_asof;
1105 cursor.flags |= HAMMER_CURSOR_ASOF;
1107 error = hammer_ip_lookup(&cursor, ip);
1108 if (error == 0) {
1109 error = hammer_ip_resolve_data(&cursor);
1110 if (error == 0) {
1111 error = uiomove((char *)cursor.data,
1112 cursor.record->generic.base.data_len,
1113 ap->a_uio);
1116 hammer_done_cursor(&cursor);
1117 return(error);
1121 * hammer_vop_nremove { nch, dvp, cred }
1123 static
1125 hammer_vop_nremove(struct vop_nremove_args *ap)
1127 return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0));
1131 * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1133 static
1135 hammer_vop_nrename(struct vop_nrename_args *ap)
1137 struct hammer_transaction trans;
1138 struct namecache *fncp;
1139 struct namecache *tncp;
1140 struct hammer_inode *fdip;
1141 struct hammer_inode *tdip;
1142 struct hammer_inode *ip;
1143 struct hammer_cursor cursor;
1144 union hammer_record_ondisk *rec;
1145 int64_t namekey;
1146 int error;
1148 fdip = VTOI(ap->a_fdvp);
1149 tdip = VTOI(ap->a_tdvp);
1150 fncp = ap->a_fnch->ncp;
1151 tncp = ap->a_tnch->ncp;
1152 ip = VTOI(fncp->nc_vp);
1153 KKASSERT(ip != NULL);
1155 if (fdip->flags & HAMMER_INODE_RO)
1156 return (EROFS);
1157 if (tdip->flags & HAMMER_INODE_RO)
1158 return (EROFS);
1159 if (ip->flags & HAMMER_INODE_RO)
1160 return (EROFS);
1162 hammer_start_transaction(&trans, fdip->hmp);
1165 * Remove tncp from the target directory and then link ip as
1166 * tncp. XXX pass trans to dounlink
1168 error = hammer_dounlink(ap->a_tnch, ap->a_tdvp, ap->a_cred, 0);
1169 if (error == 0 || error == ENOENT)
1170 error = hammer_ip_add_directory(&trans, tdip, tncp, ip);
1171 if (error)
1172 goto failed; /* XXX */
1175 * Locate the record in the originating directory and remove it.
1177 * Calculate the namekey and setup the key range for the scan. This
1178 * works kinda like a chained hash table where the lower 32 bits
1179 * of the namekey synthesize the chain.
1181 * The key range is inclusive of both key_beg and key_end.
1183 namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen);
1184 retry:
1185 hammer_init_cursor_hmp(&cursor, &fdip->cache[0], fdip->hmp);
1186 cursor.key_beg.obj_id = fdip->obj_id;
1187 cursor.key_beg.key = namekey;
1188 cursor.key_beg.create_tid = 0;
1189 cursor.key_beg.delete_tid = 0;
1190 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1191 cursor.key_beg.obj_type = 0;
1193 cursor.key_end = cursor.key_beg;
1194 cursor.key_end.key |= 0xFFFFFFFFULL;
1195 cursor.asof = fdip->obj_asof;
1196 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
1199 * Scan all matching records (the chain), locate the one matching
1200 * the requested path component.
1202 * The hammer_ip_*() functions merge in-memory records with on-disk
1203 * records for the purposes of the search.
1205 error = hammer_ip_first(&cursor, fdip);
1206 while (error == 0) {
1207 if (hammer_ip_resolve_data(&cursor) != 0)
1208 break;
1209 rec = cursor.record;
1210 if (fncp->nc_nlen == rec->entry.base.data_len &&
1211 bcmp(fncp->nc_name, cursor.data, fncp->nc_nlen) == 0) {
1212 break;
1214 error = hammer_ip_next(&cursor);
1218 * If all is ok we have to get the inode so we can adjust nlinks.
1220 * WARNING: hammer_ip_del_directory() may have to terminate the
1221 * cursor to avoid a recursion. It's ok to call hammer_done_cursor()
1222 * twice.
1224 if (error == 0)
1225 error = hammer_ip_del_directory(&trans, &cursor, fdip, ip);
1226 hammer_done_cursor(&cursor);
1227 if (error == 0)
1228 cache_rename(ap->a_fnch, ap->a_tnch);
1229 if (error == EDEADLK)
1230 goto retry;
1231 failed:
1232 if (error == 0) {
1233 hammer_commit_transaction(&trans);
1234 } else {
1235 hammer_abort_transaction(&trans);
1237 return (error);
1241 * hammer_vop_nrmdir { nch, dvp, cred }
1243 static
1245 hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
1247 return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0));
1251 * hammer_vop_setattr { vp, vap, cred }
1253 static
1255 hammer_vop_setattr(struct vop_setattr_args *ap)
1257 struct hammer_transaction trans;
1258 struct hammer_cursor *spike = NULL;
1259 struct vattr *vap;
1260 struct hammer_inode *ip;
1261 int modflags;
1262 int error;
1263 int truncating;
1264 int64_t aligned_size;
1265 u_int32_t flags;
1266 uuid_t uuid;
1268 vap = ap->a_vap;
1269 ip = ap->a_vp->v_data;
1270 modflags = 0;
1272 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1273 return(EROFS);
1274 if (ip->flags & HAMMER_INODE_RO)
1275 return (EROFS);
1277 hammer_start_transaction(&trans, ip->hmp);
1278 error = 0;
1280 if (vap->va_flags != VNOVAL) {
1281 flags = ip->ino_data.uflags;
1282 error = vop_helper_setattr_flags(&flags, vap->va_flags,
1283 hammer_to_unix_xid(&ip->ino_data.uid),
1284 ap->a_cred);
1285 if (error == 0) {
1286 if (ip->ino_data.uflags != flags) {
1287 ip->ino_data.uflags = flags;
1288 modflags |= HAMMER_INODE_DDIRTY;
1290 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1291 error = 0;
1292 goto done;
1295 goto done;
1297 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1298 error = EPERM;
1299 goto done;
1301 if (vap->va_uid != (uid_t)VNOVAL) {
1302 hammer_guid_to_uuid(&uuid, vap->va_uid);
1303 if (bcmp(&uuid, &ip->ino_data.uid, sizeof(uuid)) != 0) {
1304 ip->ino_data.uid = uuid;
1305 modflags |= HAMMER_INODE_DDIRTY;
1308 if (vap->va_gid != (uid_t)VNOVAL) {
1309 hammer_guid_to_uuid(&uuid, vap->va_gid);
1310 if (bcmp(&uuid, &ip->ino_data.gid, sizeof(uuid)) != 0) {
1311 ip->ino_data.gid = uuid;
1312 modflags |= HAMMER_INODE_DDIRTY;
1315 while (vap->va_size != VNOVAL && ip->ino_rec.ino_size != vap->va_size) {
1316 switch(ap->a_vp->v_type) {
1317 case VREG:
1318 if (vap->va_size == ip->ino_rec.ino_size)
1319 break;
1320 if (vap->va_size < ip->ino_rec.ino_size) {
1321 vtruncbuf(ap->a_vp, vap->va_size,
1322 HAMMER_BUFSIZE);
1323 truncating = 1;
1324 } else {
1325 vnode_pager_setsize(ap->a_vp, vap->va_size);
1326 truncating = 0;
1328 ip->ino_rec.ino_size = vap->va_size;
1329 modflags |= HAMMER_INODE_RDIRTY;
1330 aligned_size = (vap->va_size + HAMMER_BUFMASK) &
1331 ~(int64_t)HAMMER_BUFMASK;
1333 if (truncating) {
1334 error = hammer_ip_delete_range(&trans, ip,
1335 aligned_size,
1336 0x7FFFFFFFFFFFFFFFLL,
1337 &spike);
1340 * If truncating we have to clean out a portion of
1341 * the last block on-disk.
1343 if (truncating && error == 0 &&
1344 vap->va_size < aligned_size) {
1345 struct buf *bp;
1346 int offset;
1348 offset = vap->va_size & HAMMER_BUFMASK;
1349 error = bread(ap->a_vp,
1350 aligned_size - HAMMER_BUFSIZE,
1351 HAMMER_BUFSIZE, &bp);
1352 if (error == 0) {
1353 bzero(bp->b_data + offset,
1354 HAMMER_BUFSIZE - offset);
1355 bdwrite(bp);
1356 } else {
1357 brelse(bp);
1360 break;
1361 case VDATABASE:
1362 error = hammer_ip_delete_range(&trans, ip,
1363 vap->va_size,
1364 0x7FFFFFFFFFFFFFFFLL,
1365 &spike);
1366 ip->ino_rec.ino_size = vap->va_size;
1367 modflags |= HAMMER_INODE_RDIRTY;
1368 break;
1369 default:
1370 error = EINVAL;
1371 goto done;
1373 if (error == ENOSPC) {
1374 error = hammer_spike(&spike);
1375 if (error == 0)
1376 continue;
1378 KKASSERT(spike == NULL);
1379 break;
1381 if (vap->va_atime.tv_sec != VNOVAL) {
1382 ip->ino_rec.ino_atime =
1383 hammer_timespec_to_transid(&vap->va_atime);
1384 modflags |= HAMMER_INODE_ITIMES;
1386 if (vap->va_mtime.tv_sec != VNOVAL) {
1387 ip->ino_rec.ino_mtime =
1388 hammer_timespec_to_transid(&vap->va_mtime);
1389 modflags |= HAMMER_INODE_ITIMES;
1391 if (vap->va_mode != (mode_t)VNOVAL) {
1392 if (ip->ino_data.mode != vap->va_mode) {
1393 ip->ino_data.mode = vap->va_mode;
1394 modflags |= HAMMER_INODE_DDIRTY;
1397 done:
1398 if (error) {
1399 hammer_abort_transaction(&trans);
1400 } else {
1401 hammer_modify_inode(&trans, ip, modflags);
1402 hammer_commit_transaction(&trans);
1404 return (error);
1408 * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1410 static
1412 hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
1414 struct hammer_transaction trans;
1415 struct hammer_inode *dip;
1416 struct hammer_inode *nip;
1417 struct nchandle *nch;
1418 hammer_record_t record;
1419 int error;
1420 int bytes;
1422 ap->a_vap->va_type = VLNK;
1424 nch = ap->a_nch;
1425 dip = VTOI(ap->a_dvp);
1427 if (dip->flags & HAMMER_INODE_RO)
1428 return (EROFS);
1431 * Create a transaction to cover the operations we perform.
1433 hammer_start_transaction(&trans, dip->hmp);
1436 * Create a new filesystem object of the requested type. The
1437 * returned inode will be referenced but not locked.
1440 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
1441 if (error) {
1442 hammer_abort_transaction(&trans);
1443 *ap->a_vpp = NULL;
1444 return (error);
1448 * Add the new filesystem object to the directory. This will also
1449 * bump the inode's link count.
1451 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
1454 * Add a record representing the symlink. symlink stores the link
1455 * as pure data, not a string, and is no \0 terminated.
1457 if (error == 0) {
1458 record = hammer_alloc_mem_record(nip);
1459 bytes = strlen(ap->a_target);
1461 record->rec.generic.base.base.key = HAMMER_FIXKEY_SYMLINK;
1462 record->rec.generic.base.base.rec_type = HAMMER_RECTYPE_FIX;
1463 record->rec.generic.base.data_len = bytes;
1464 if (bytes <= sizeof(record->rec.generic.filler)) {
1465 record->data = (void *)record->rec.generic.filler;
1466 bcopy(ap->a_target, record->data, bytes);
1467 } else {
1468 record->data = (void *)ap->a_target;
1469 /* will be reallocated by routine below */
1471 error = hammer_ip_add_record(&trans, record);
1475 * Finish up.
1477 if (error) {
1478 hammer_rel_inode(nip, 0);
1479 hammer_abort_transaction(&trans);
1480 *ap->a_vpp = NULL;
1481 } else {
1482 hammer_commit_transaction(&trans);
1483 error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
1484 hammer_rel_inode(nip, 0);
1485 if (error == 0) {
1486 cache_setunresolved(ap->a_nch);
1487 cache_setvp(ap->a_nch, *ap->a_vpp);
1490 return (error);
1494 * hammer_vop_nwhiteout { nch, dvp, cred, flags }
1496 static
1498 hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap)
1500 return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, ap->a_flags));
1504 * hammer_vop_ioctl { vp, command, data, fflag, cred }
1506 static
1508 hammer_vop_ioctl(struct vop_ioctl_args *ap)
1510 struct hammer_inode *ip = ap->a_vp->v_data;
1512 return(hammer_ioctl(ip, ap->a_command, ap->a_data,
1513 ap->a_fflag, ap->a_cred));
1516 static
1518 hammer_vop_mountctl(struct vop_mountctl_args *ap)
1520 struct mount *mp;
1521 int error;
1523 mp = ap->a_head.a_ops->head.vv_mount;
1525 switch(ap->a_op) {
1526 case MOUNTCTL_SET_EXPORT:
1527 if (ap->a_ctllen != sizeof(struct export_args))
1528 error = EINVAL;
1529 error = hammer_vfs_export(mp, ap->a_op,
1530 (const struct export_args *)ap->a_ctl);
1531 break;
1532 default:
1533 error = journal_mountctl(ap);
1534 break;
1536 return(error);
1540 * hammer_vop_strategy { vp, bio }
1542 * Strategy call, used for regular file read & write only. Note that the
1543 * bp may represent a cluster.
1545 * To simplify operation and allow better optimizations in the future,
1546 * this code does not make any assumptions with regards to buffer alignment
1547 * or size.
1549 static
1551 hammer_vop_strategy(struct vop_strategy_args *ap)
1553 struct buf *bp;
1554 int error;
1556 bp = ap->a_bio->bio_buf;
1558 switch(bp->b_cmd) {
1559 case BUF_CMD_READ:
1560 error = hammer_vop_strategy_read(ap);
1561 break;
1562 case BUF_CMD_WRITE:
1563 error = hammer_vop_strategy_write(ap);
1564 break;
1565 default:
1566 error = EINVAL;
1567 break;
1569 bp->b_error = error;
1570 if (error)
1571 bp->b_flags |= B_ERROR;
1572 biodone(ap->a_bio);
1573 return (error);
1577 * Read from a regular file. Iterate the related records and fill in the
1578 * BIO/BUF. Gaps are zero-filled.
1580 * The support code in hammer_object.c should be used to deal with mixed
1581 * in-memory and on-disk records.
1583 * XXX atime update
1585 static
1587 hammer_vop_strategy_read(struct vop_strategy_args *ap)
1589 struct hammer_inode *ip = ap->a_vp->v_data;
1590 struct hammer_cursor cursor;
1591 hammer_record_ondisk_t rec;
1592 hammer_base_elm_t base;
1593 struct bio *bio;
1594 struct buf *bp;
1595 int64_t rec_offset;
1596 int64_t ran_end;
1597 int64_t tmp64;
1598 int error;
1599 int boff;
1600 int roff;
1601 int n;
1603 bio = ap->a_bio;
1604 bp = bio->bio_buf;
1606 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp);
1609 * Key range (begin and end inclusive) to scan. Note that the key's
1610 * stored in the actual records represent BASE+LEN, not BASE. The
1611 * first record containing bio_offset will have a key > bio_offset.
1613 cursor.key_beg.obj_id = ip->obj_id;
1614 cursor.key_beg.create_tid = 0;
1615 cursor.key_beg.delete_tid = 0;
1616 cursor.key_beg.obj_type = 0;
1617 cursor.key_beg.key = bio->bio_offset + 1;
1618 cursor.asof = ip->obj_asof;
1619 cursor.flags |= HAMMER_CURSOR_ASOF;
1621 cursor.key_end = cursor.key_beg;
1622 if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
1623 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
1624 cursor.key_end.rec_type = HAMMER_RECTYPE_DB;
1625 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1626 } else {
1627 ran_end = bio->bio_offset + bp->b_bufsize;
1628 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
1629 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
1630 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */
1631 if (tmp64 < ran_end)
1632 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1633 else
1634 cursor.key_end.key = ran_end + MAXPHYS + 1;
1636 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
1638 error = hammer_ip_first(&cursor, ip);
1639 boff = 0;
1641 while (error == 0) {
1642 error = hammer_ip_resolve_data(&cursor);
1643 if (error)
1644 break;
1645 rec = cursor.record;
1646 base = &rec->base.base;
1648 rec_offset = base->key - rec->data.base.data_len;
1651 * Calculate the gap, if any, and zero-fill it.
1653 n = (int)(rec_offset - (bio->bio_offset + boff));
1654 if (n > 0) {
1655 if (n > bp->b_bufsize - boff)
1656 n = bp->b_bufsize - boff;
1657 bzero((char *)bp->b_data + boff, n);
1658 boff += n;
1659 n = 0;
1663 * Calculate the data offset in the record and the number
1664 * of bytes we can copy.
1666 * Note there is a degenerate case here where boff may
1667 * already be at bp->b_bufsize.
1669 roff = -n;
1670 n = rec->data.base.data_len - roff;
1671 KKASSERT(n > 0);
1672 if (n > bp->b_bufsize - boff)
1673 n = bp->b_bufsize - boff;
1674 bcopy((char *)cursor.data + roff, (char *)bp->b_data + boff, n);
1675 boff += n;
1676 if (boff == bp->b_bufsize)
1677 break;
1678 error = hammer_ip_next(&cursor);
1680 hammer_done_cursor(&cursor);
1683 * There may have been a gap after the last record
1685 if (error == ENOENT)
1686 error = 0;
1687 if (error == 0 && boff != bp->b_bufsize) {
1688 KKASSERT(boff < bp->b_bufsize);
1689 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff);
1690 /* boff = bp->b_bufsize; */
1692 bp->b_resid = 0;
1693 return(error);
1697 * Write to a regular file. Iterate the related records and mark for
1698 * deletion. If existing edge records (left and right side) overlap our
1699 * write they have to be marked deleted and new records created, usually
1700 * referencing a portion of the original data. Then add a record to
1701 * represent the buffer.
1703 * The support code in hammer_object.c should be used to deal with mixed
1704 * in-memory and on-disk records.
1706 static
1708 hammer_vop_strategy_write(struct vop_strategy_args *ap)
1710 struct hammer_transaction trans;
1711 struct hammer_cursor *spike = NULL;
1712 hammer_inode_t ip;
1713 struct bio *bio;
1714 struct buf *bp;
1715 int error;
1717 bio = ap->a_bio;
1718 bp = bio->bio_buf;
1719 ip = ap->a_vp->v_data;
1721 if (ip->flags & HAMMER_INODE_RO)
1722 return (EROFS);
1725 * Start a transaction using the TID stored with the bp.
1727 KKASSERT(bp->b_tid != 0);
1728 hammer_start_transaction_tid(&trans, ip->hmp, bp->b_tid);
1730 retry:
1732 * Delete any records overlapping our range. This function will
1733 * (eventually) properly truncate partial overlaps.
1735 if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
1736 error = hammer_ip_delete_range(&trans, ip, bio->bio_offset,
1737 bio->bio_offset, &spike);
1738 } else {
1739 error = hammer_ip_delete_range(&trans, ip, bio->bio_offset,
1740 bio->bio_offset +
1741 bp->b_bufsize - 1,
1742 &spike);
1746 * Add a single record to cover the write
1748 if (error == 0) {
1749 error = hammer_ip_sync_data(&trans, ip, bio->bio_offset,
1750 bp->b_data, bp->b_bufsize,
1751 &spike);
1755 * If we ran out of space the spike structure will be filled in
1756 * and we must call hammer_spike with it, then retry.
1758 if (error == ENOSPC) {
1759 error = hammer_spike(&spike);
1760 if (error == 0)
1761 goto retry;
1763 KKASSERT(spike == NULL);
1766 * If an error occured abort the transaction
1768 if (error) {
1769 /* XXX undo deletion */
1770 hammer_abort_transaction(&trans);
1771 bp->b_resid = bp->b_bufsize;
1772 } else {
1773 hammer_commit_transaction(&trans);
1774 bp->b_resid = 0;
1775 bp->b_tid = 0;
1777 return(error);
1781 * dounlink - disconnect a directory entry
1783 * XXX whiteout support not really in yet
1785 static int
1786 hammer_dounlink(struct nchandle *nch, struct vnode *dvp, struct ucred *cred,
1787 int flags)
1789 struct hammer_transaction trans;
1790 struct namecache *ncp;
1791 hammer_inode_t dip;
1792 hammer_inode_t ip;
1793 hammer_record_ondisk_t rec;
1794 struct hammer_cursor cursor;
1795 int64_t namekey;
1796 int error;
1799 * Calculate the namekey and setup the key range for the scan. This
1800 * works kinda like a chained hash table where the lower 32 bits
1801 * of the namekey synthesize the chain.
1803 * The key range is inclusive of both key_beg and key_end.
1805 dip = VTOI(dvp);
1806 ncp = nch->ncp;
1808 if (dip->flags & HAMMER_INODE_RO)
1809 return (EROFS);
1811 hammer_start_transaction(&trans, dip->hmp);
1813 namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
1814 retry:
1815 hammer_init_cursor_hmp(&cursor, &dip->cache[0], dip->hmp);
1816 cursor.key_beg.obj_id = dip->obj_id;
1817 cursor.key_beg.key = namekey;
1818 cursor.key_beg.create_tid = 0;
1819 cursor.key_beg.delete_tid = 0;
1820 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1821 cursor.key_beg.obj_type = 0;
1823 cursor.key_end = cursor.key_beg;
1824 cursor.key_end.key |= 0xFFFFFFFFULL;
1825 cursor.asof = dip->obj_asof;
1826 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
1829 * Scan all matching records (the chain), locate the one matching
1830 * the requested path component. info->last_error contains the
1831 * error code on search termination and could be 0, ENOENT, or
1832 * something else.
1834 * The hammer_ip_*() functions merge in-memory records with on-disk
1835 * records for the purposes of the search.
1837 error = hammer_ip_first(&cursor, dip);
1838 while (error == 0) {
1839 error = hammer_ip_resolve_data(&cursor);
1840 if (error)
1841 break;
1842 rec = cursor.record;
1843 if (ncp->nc_nlen == rec->entry.base.data_len &&
1844 bcmp(ncp->nc_name, cursor.data, ncp->nc_nlen) == 0) {
1845 break;
1847 error = hammer_ip_next(&cursor);
1851 * If all is ok we have to get the inode so we can adjust nlinks.
1853 * If the target is a directory, it must be empty.
1855 if (error == 0) {
1856 ip = hammer_get_inode(dip->hmp, &dip->cache[1],
1857 rec->entry.obj_id,
1858 dip->hmp->asof, 0, &error);
1859 if (error == ENOENT) {
1860 kprintf("obj_id %016llx\n", rec->entry.obj_id);
1861 Debugger("ENOENT unlinking object that should exist, cont to sync");
1862 hammer_sync_hmp(dip->hmp, MNT_NOWAIT);
1863 Debugger("ENOENT - sync done");
1865 if (error == 0 && ip->ino_rec.base.base.obj_type ==
1866 HAMMER_OBJTYPE_DIRECTORY) {
1867 error = hammer_ip_check_directory_empty(&trans, ip);
1870 * WARNING: hammer_ip_del_directory() may have to terminate
1871 * the cursor to avoid a lock recursion. It's ok to call
1872 * hammer_done_cursor() twice.
1874 if (error == 0)
1875 error = hammer_ip_del_directory(&trans, &cursor, dip, ip);
1876 if (error == 0) {
1877 cache_setunresolved(nch);
1878 cache_setvp(nch, NULL);
1879 /* XXX locking */
1880 if (ip->vp)
1881 cache_inval_vp(ip->vp, CINV_DESTROY);
1883 hammer_rel_inode(ip, 0);
1885 hammer_done_cursor(&cursor);
1886 if (error == EDEADLK)
1887 goto retry;
1889 if (error == 0)
1890 hammer_commit_transaction(&trans);
1891 else
1892 hammer_abort_transaction(&trans);
1893 return (error);
1896 /************************************************************************
1897 * FIFO AND SPECFS OPS *
1898 ************************************************************************
1902 static int
1903 hammer_vop_fifoclose (struct vop_close_args *ap)
1905 /* XXX update itimes */
1906 return (VOCALL(&fifo_vnode_vops, &ap->a_head));
1909 static int
1910 hammer_vop_fiforead (struct vop_read_args *ap)
1912 int error;
1914 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
1915 /* XXX update access time */
1916 return (error);
1919 static int
1920 hammer_vop_fifowrite (struct vop_write_args *ap)
1922 int error;
1924 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
1925 /* XXX update access time */
1926 return (error);
1929 static int
1930 hammer_vop_specclose (struct vop_close_args *ap)
1932 /* XXX update itimes */
1933 return (VOCALL(&spec_vnode_vops, &ap->a_head));
1936 static int
1937 hammer_vop_specread (struct vop_read_args *ap)
1939 /* XXX update access time */
1940 return (VOCALL(&spec_vnode_vops, &ap->a_head));
1943 static int
1944 hammer_vop_specwrite (struct vop_write_args *ap)
1946 /* XXX update last change time */
1947 return (VOCALL(&spec_vnode_vops, &ap->a_head));