hammer2 - Refactor frontend part 6/many
[dragonfly.git] / sys / vfs / hammer2 / hammer2_strategy.c
blob4257cee26368825fcb9851e9e4d25a87d46a11e1
1 /*
2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression)
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 * 3. Neither the name of The DragonFly Project nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific, prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
37 * This module handles low level logical file I/O (strategy) which backs
38 * the logical buffer cache.
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/fcntl.h>
45 #include <sys/buf.h>
46 #include <sys/proc.h>
47 #include <sys/namei.h>
48 #include <sys/mount.h>
49 #include <sys/vnode.h>
50 #include <sys/mountctl.h>
51 #include <sys/dirent.h>
52 #include <sys/uio.h>
53 #include <sys/objcache.h>
54 #include <sys/event.h>
55 #include <sys/file.h>
56 #include <vfs/fifofs/fifo.h>
58 #include "hammer2.h"
59 #include "hammer2_lz4.h"
61 #include "zlib/hammer2_zlib.h"
63 struct objcache *cache_buffer_read;
64 struct objcache *cache_buffer_write;
67 * Strategy code (async logical file buffer I/O from system)
69 * WARNING: The strategy code cannot safely use hammer2 transactions
70 * as this can deadlock against vfs_sync's vfsync() call
71 * if multiple flushes are queued. All H2 structures must
72 * already be present and ready for the DIO.
74 * Reads can be initiated asynchronously, writes have to be
75 * spooled to a separate thread for action to avoid deadlocks.
77 static int hammer2_strategy_read(struct vop_strategy_args *ap);
78 static int hammer2_strategy_write(struct vop_strategy_args *ap);
79 static void hammer2_strategy_read_callback(hammer2_iocb_t *iocb);
81 int
82 hammer2_vop_strategy(struct vop_strategy_args *ap)
84 struct bio *biop;
85 struct buf *bp;
86 int error;
88 biop = ap->a_bio;
89 bp = biop->bio_buf;
91 switch(bp->b_cmd) {
92 case BUF_CMD_READ:
93 error = hammer2_strategy_read(ap);
94 ++hammer2_iod_file_read;
95 break;
96 case BUF_CMD_WRITE:
97 error = hammer2_strategy_write(ap);
98 ++hammer2_iod_file_write;
99 break;
100 default:
101 bp->b_error = error = EINVAL;
102 bp->b_flags |= B_ERROR;
103 biodone(biop);
104 break;
106 return (error);
110 * Return the largest contiguous physical disk range for the logical
111 * request, in bytes.
113 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
115 * Basically disabled, the logical buffer write thread has to deal with
116 * buffers one-at-a-time.
119 hammer2_vop_bmap(struct vop_bmap_args *ap)
121 *ap->a_doffsetp = NOOFFSET;
122 if (ap->a_runp)
123 *ap->a_runp = 0;
124 if (ap->a_runb)
125 *ap->a_runb = 0;
126 return (EOPNOTSUPP);
129 /****************************************************************************
130 * READ SUPPORT *
131 ****************************************************************************/
133 * Callback used in read path in case that a block is compressed with LZ4.
135 static
136 void
137 hammer2_decompress_LZ4_callback(const char *data, u_int bytes, struct bio *bio)
139 struct buf *bp;
140 char *compressed_buffer;
141 int compressed_size;
142 int result;
144 bp = bio->bio_buf;
146 #if 0
147 if bio->bio_caller_info2.index &&
148 bio->bio_caller_info1.uvalue32 !=
149 crc32(bp->b_data, bp->b_bufsize) --- return error
150 #endif
152 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
153 compressed_size = *(const int *)data;
154 KKASSERT(compressed_size <= bytes - sizeof(int));
156 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
157 result = LZ4_decompress_safe(__DECONST(char *, &data[sizeof(int)]),
158 compressed_buffer,
159 compressed_size,
160 bp->b_bufsize);
161 if (result < 0) {
162 kprintf("READ PATH: Error during decompression."
163 "bio %016jx/%d\n",
164 (intmax_t)bio->bio_offset, bytes);
165 /* make sure it isn't random garbage */
166 bzero(compressed_buffer, bp->b_bufsize);
168 KKASSERT(result <= bp->b_bufsize);
169 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
170 if (result < bp->b_bufsize)
171 bzero(bp->b_data + result, bp->b_bufsize - result);
172 objcache_put(cache_buffer_read, compressed_buffer);
173 bp->b_resid = 0;
174 bp->b_flags |= B_AGE;
178 * Callback used in read path in case that a block is compressed with ZLIB.
179 * It is almost identical to LZ4 callback, so in theory they can be unified,
180 * but we didn't want to make changes in bio structure for that.
182 static
183 void
184 hammer2_decompress_ZLIB_callback(const char *data, u_int bytes, struct bio *bio)
186 struct buf *bp;
187 char *compressed_buffer;
188 z_stream strm_decompress;
189 int result;
190 int ret;
192 bp = bio->bio_buf;
194 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
195 strm_decompress.avail_in = 0;
196 strm_decompress.next_in = Z_NULL;
198 ret = inflateInit(&strm_decompress);
200 if (ret != Z_OK)
201 kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n");
203 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
204 strm_decompress.next_in = __DECONST(char *, data);
206 /* XXX supply proper size, subset of device bp */
207 strm_decompress.avail_in = bytes;
208 strm_decompress.next_out = compressed_buffer;
209 strm_decompress.avail_out = bp->b_bufsize;
211 ret = inflate(&strm_decompress, Z_FINISH);
212 if (ret != Z_STREAM_END) {
213 kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n");
214 bzero(compressed_buffer, bp->b_bufsize);
216 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
217 result = bp->b_bufsize - strm_decompress.avail_out;
218 if (result < bp->b_bufsize)
219 bzero(bp->b_data + result, strm_decompress.avail_out);
220 objcache_put(cache_buffer_read, compressed_buffer);
221 ret = inflateEnd(&strm_decompress);
223 bp->b_resid = 0;
224 bp->b_flags |= B_AGE;
228 * Logical buffer I/O, async read.
230 static
232 hammer2_strategy_read(struct vop_strategy_args *ap)
234 struct buf *bp;
235 struct bio *bio;
236 struct bio *nbio;
237 hammer2_inode_t *ip;
238 hammer2_cluster_t *cparent;
239 hammer2_cluster_t *cluster;
240 hammer2_key_t key_dummy;
241 hammer2_key_t lbase;
242 uint8_t btype;
244 bio = ap->a_bio;
245 bp = bio->bio_buf;
246 ip = VTOI(ap->a_vp);
247 nbio = push_bio(bio);
249 lbase = bio->bio_offset;
250 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0);
253 * Lookup the file offset.
255 hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS |
256 HAMMER2_RESOLVE_SHARED);
257 cparent = hammer2_inode_cluster(ip, HAMMER2_RESOLVE_ALWAYS |
258 HAMMER2_RESOLVE_SHARED);
259 cluster = hammer2_cluster_lookup(cparent, &key_dummy,
260 lbase, lbase,
261 HAMMER2_LOOKUP_NODATA |
262 HAMMER2_LOOKUP_SHARED);
263 hammer2_inode_unlock(ip, cparent);
266 * Data is zero-fill if no cluster could be found
267 * (XXX or EIO on a cluster failure).
269 if (cluster == NULL) {
270 bp->b_resid = 0;
271 bp->b_error = 0;
272 bzero(bp->b_data, bp->b_bcount);
273 biodone(nbio);
274 return(0);
278 * Cluster elements must be type INODE or type DATA, but the
279 * compression mode (or not) for DATA chains can be different for
280 * each chain. This will be handled by the callback.
282 * If the cluster already has valid data the callback will be made
283 * immediately/synchronously.
285 btype = hammer2_cluster_type(cluster);
286 if (btype != HAMMER2_BREF_TYPE_INODE &&
287 btype != HAMMER2_BREF_TYPE_DATA) {
288 panic("READ PATH: hammer2_strategy_read: unknown bref type");
290 hammer2_cluster_load_async(cluster, hammer2_strategy_read_callback,
291 nbio);
292 return(0);
296 * Read callback for hammer2_cluster_load_async(). The load function may
297 * start several actual I/Os but will only make one callback, typically with
298 * the first valid I/O XXX
300 static
301 void
302 hammer2_strategy_read_callback(hammer2_iocb_t *iocb)
304 struct bio *bio = iocb->ptr; /* original logical buffer */
305 struct buf *bp = bio->bio_buf; /* original logical buffer */
306 hammer2_chain_t *chain;
307 hammer2_cluster_t *cluster;
308 hammer2_io_t *dio;
309 char *data;
310 int i;
313 * Extract data and handle iteration on I/O failure. iocb->off
314 * is the cluster index for iteration.
316 cluster = iocb->cluster;
317 dio = iocb->dio; /* can be NULL if iocb not in progress */
320 * Work to do if INPROG set, else dio is already good or dio is
321 * NULL (which is the shortcut case if chain->data is already good).
323 if (iocb->flags & HAMMER2_IOCB_INPROG) {
325 * Read attempt not yet made. Issue an asynchronous read
326 * if necessary and return, operation will chain back to
327 * this function.
329 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) {
330 if (dio->bp == NULL ||
331 (dio->bp->b_flags & B_CACHE) == 0) {
332 if (dio->bp) {
333 bqrelse(dio->bp);
334 dio->bp = NULL;
336 iocb->flags |= HAMMER2_IOCB_READ;
337 breadcb(dio->hmp->devvp,
338 dio->pbase, dio->psize,
339 hammer2_io_callback, iocb);
340 return;
346 * If we have a DIO it is now done, check for an error and
347 * calculate the data.
349 * If there is no DIO it is an optimization by
350 * hammer2_cluster_load_async(), the data is available in
351 * chain->data.
353 if (dio) {
354 if (dio->bp->b_flags & B_ERROR) {
355 i = (int)iocb->lbase + 1;
356 if (i >= cluster->nchains) {
357 bp->b_flags |= B_ERROR;
358 bp->b_error = dio->bp->b_error;
359 hammer2_io_complete(iocb);
360 biodone(bio);
361 hammer2_cluster_unlock(cluster);
362 hammer2_cluster_drop(cluster);
363 } else {
364 hammer2_io_complete(iocb); /* XXX */
365 chain = cluster->array[i].chain;
366 kprintf("hammer2: IO CHAIN-%d %p\n", i, chain);
367 hammer2_adjreadcounter(&chain->bref,
368 chain->bytes);
369 iocb->chain = chain;
370 iocb->lbase = (off_t)i;
371 iocb->flags = 0;
372 iocb->error = 0;
373 hammer2_io_getblk(chain->hmp,
374 chain->bref.data_off,
375 chain->bytes,
376 iocb);
378 return;
380 chain = iocb->chain;
381 data = hammer2_io_data(dio, chain->bref.data_off);
382 } else {
384 * Special synchronous case, data present in chain->data.
386 chain = iocb->chain;
387 data = (void *)chain->data;
390 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
392 * Data is embedded in the inode (copy from inode).
394 bcopy(((hammer2_inode_data_t *)data)->u.data,
395 bp->b_data, HAMMER2_EMBEDDED_BYTES);
396 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES,
397 bp->b_bcount - HAMMER2_EMBEDDED_BYTES);
398 bp->b_resid = 0;
399 bp->b_error = 0;
400 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
402 * Data is on-media, issue device I/O and copy.
404 * XXX direct-IO shortcut could go here XXX.
406 switch (HAMMER2_DEC_COMP(chain->bref.methods)) {
407 case HAMMER2_COMP_LZ4:
408 hammer2_decompress_LZ4_callback(data, chain->bytes,
409 bio);
410 break;
411 case HAMMER2_COMP_ZLIB:
412 hammer2_decompress_ZLIB_callback(data, chain->bytes,
413 bio);
414 break;
415 case HAMMER2_COMP_NONE:
416 KKASSERT(chain->bytes <= bp->b_bcount);
417 bcopy(data, bp->b_data, chain->bytes);
418 if (chain->bytes < bp->b_bcount) {
419 bzero(bp->b_data + chain->bytes,
420 bp->b_bcount - chain->bytes);
422 bp->b_flags |= B_NOTMETA;
423 bp->b_resid = 0;
424 bp->b_error = 0;
425 break;
426 default:
427 panic("hammer2_strategy_read: "
428 "unknown compression type");
430 } else {
431 /* bqrelse the dio to help stabilize the call to panic() */
432 if (dio)
433 hammer2_io_bqrelse(&dio);
434 panic("hammer2_strategy_read: unknown bref type");
438 * Once the iocb is cleaned up the DIO (if any) will no longer be
439 * in-progress but will still have a ref. Be sure to release
440 * the ref.
442 hammer2_io_complete(iocb); /* physical management */
443 if (dio) /* physical dio & buffer */
444 hammer2_io_bqrelse(&dio);
445 hammer2_cluster_unlock(cluster); /* cluster management */
446 hammer2_cluster_drop(cluster); /* cluster management */
447 biodone(bio); /* logical buffer */
450 /****************************************************************************
451 * WRITE SUPPORT *
452 ****************************************************************************/
455 * Functions for compression in threads,
456 * from hammer2_vnops.c
458 static void hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans,
459 hammer2_inode_t *ip,
460 hammer2_cluster_t *cparent,
461 hammer2_key_t lbase, int ioflag, int pblksize,
462 int *errorp);
463 static void hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans,
464 hammer2_inode_t *ip,
465 hammer2_cluster_t *cparent,
466 hammer2_key_t lbase, int ioflag,
467 int pblksize, int *errorp,
468 int comp_algo, int check_algo);
469 static void hammer2_zero_check_and_write(struct buf *bp,
470 hammer2_trans_t *trans, hammer2_inode_t *ip,
471 hammer2_cluster_t *cparent,
472 hammer2_key_t lbase,
473 int ioflag, int pblksize, int *errorp,
474 int check_algo);
475 static int test_block_zeros(const char *buf, size_t bytes);
476 static void zero_write(struct buf *bp, hammer2_trans_t *trans,
477 hammer2_inode_t *ip,
478 hammer2_cluster_t *cparent,
479 hammer2_key_t lbase,
480 int *errorp);
481 static void hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp,
482 int ioflag, int pblksize, int *errorp,
483 int check_algo);
486 static
488 hammer2_strategy_write(struct vop_strategy_args *ap)
490 hammer2_pfs_t *pmp;
491 struct bio *bio;
492 struct buf *bp;
493 hammer2_inode_t *ip;
495 bio = ap->a_bio;
496 bp = bio->bio_buf;
497 ip = VTOI(ap->a_vp);
498 pmp = ip->pmp;
500 hammer2_lwinprog_ref(pmp);
501 hammer2_trans_assert_strategy(pmp);
502 hammer2_mtx_ex(&pmp->wthread_mtx);
503 if (TAILQ_EMPTY(&pmp->wthread_bioq.queue)) {
504 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio);
505 hammer2_mtx_unlock(&pmp->wthread_mtx);
506 wakeup(&pmp->wthread_bioq);
507 } else {
508 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio);
509 hammer2_mtx_unlock(&pmp->wthread_mtx);
511 hammer2_lwinprog_wait(pmp);
513 return(0);
517 * Thread to handle bioq for strategy write (started from hammer2_vfsops.c)
519 void
520 hammer2_write_thread(void *arg)
522 hammer2_pfs_t *pmp;
523 struct bio *bio;
524 struct buf *bp;
525 hammer2_trans_t trans;
526 struct vnode *vp;
527 hammer2_inode_t *ip;
528 hammer2_cluster_t *cparent;
529 hammer2_key_t lbase;
530 int lblksize;
531 int pblksize;
532 int error;
534 pmp = arg;
536 hammer2_mtx_ex(&pmp->wthread_mtx);
537 for (;;) {
539 * Wait for work. Break out and destroy the thread only if
540 * requested and no work remains.
542 if (bioq_first(&pmp->wthread_bioq) == NULL) {
543 if (pmp->wthread_destroy)
544 break;
545 mtxsleep(&pmp->wthread_bioq, &pmp->wthread_mtx,
546 0, "h2bioqw", 0);
547 continue;
551 * Special transaction for logical buffer cache writes.
553 hammer2_trans_init(&trans, pmp, HAMMER2_TRANS_BUFCACHE);
555 while ((bio = bioq_takefirst(&pmp->wthread_bioq)) != NULL) {
557 * dummy bio for synchronization. The transaction
558 * must be terminated.
560 if (bio->bio_buf == NULL) {
561 bio->bio_flags |= BIO_DONE;
562 /* bio will become invalid after DONE set */
563 wakeup(bio);
564 break;
568 * else normal bio processing
570 hammer2_mtx_unlock(&pmp->wthread_mtx);
572 hammer2_lwinprog_drop(pmp);
574 error = 0;
575 bp = bio->bio_buf;
576 vp = bp->b_vp;
577 ip = VTOI(vp);
580 * Inode is modified, flush size and mtime changes
581 * to ensure that the file size remains consistent
582 * with the buffers being flushed.
584 * NOTE: The inode_fsync() call only flushes the
585 * inode's meta-data state, it doesn't try
586 * to flush underlying buffers or chains.
588 * NOTE: hammer2_write_file_core() may indirectly
589 * modify and modsync the inode.
591 hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
592 cparent = hammer2_inode_cluster(ip,
593 HAMMER2_RESOLVE_ALWAYS);
594 if (ip->flags & (HAMMER2_INODE_RESIZED |
595 HAMMER2_INODE_MTIME)) {
596 hammer2_inode_fsync(&trans, ip, cparent);
598 lblksize = hammer2_calc_logical(ip, bio->bio_offset,
599 &lbase, NULL);
600 pblksize = hammer2_calc_physical(ip, lbase);
601 hammer2_write_file_core(bp, &trans, ip,
602 cparent,
603 lbase, IO_ASYNC,
604 pblksize, &error);
605 hammer2_inode_unlock(ip, cparent);
606 if (error) {
607 kprintf("hammer2: error in buffer write\n");
608 bp->b_flags |= B_ERROR;
609 bp->b_error = EIO;
611 biodone(bio);
612 hammer2_mtx_ex(&pmp->wthread_mtx);
614 hammer2_trans_done(&trans);
616 pmp->wthread_destroy = -1;
617 wakeup(&pmp->wthread_destroy);
619 hammer2_mtx_unlock(&pmp->wthread_mtx);
623 * Wait for pending I/O to complete
625 void
626 hammer2_bioq_sync(hammer2_pfs_t *pmp)
628 struct bio sync_bio;
630 bzero(&sync_bio, sizeof(sync_bio)); /* dummy with no bio_buf */
631 hammer2_mtx_ex(&pmp->wthread_mtx);
632 if (pmp->wthread_destroy == 0 &&
633 TAILQ_FIRST(&pmp->wthread_bioq.queue)) {
634 bioq_insert_tail(&pmp->wthread_bioq, &sync_bio);
635 while ((sync_bio.bio_flags & BIO_DONE) == 0)
636 mtxsleep(&sync_bio, &pmp->wthread_mtx, 0, "h2bioq", 0);
638 hammer2_mtx_unlock(&pmp->wthread_mtx);
642 * Create a new cluster at (cparent, lbase) and assign physical storage,
643 * returning a cluster suitable for I/O. The cluster will be in a modified
644 * state.
646 * cparent can wind up being anything.
648 * NOTE: Special case for data embedded in inode.
650 static
651 hammer2_cluster_t *
652 hammer2_assign_physical(hammer2_trans_t *trans,
653 hammer2_inode_t *ip, hammer2_cluster_t *cparent,
654 hammer2_key_t lbase, int pblksize, int *errorp)
656 hammer2_cluster_t *cluster;
657 hammer2_cluster_t *dparent;
658 hammer2_key_t key_dummy;
659 int pradix = hammer2_getradix(pblksize);
662 * Locate the chain associated with lbase, return a locked chain.
663 * However, do not instantiate any data reference (which utilizes a
664 * device buffer) because we will be using direct IO via the
665 * logical buffer cache buffer.
667 *errorp = 0;
668 KKASSERT(pblksize >= HAMMER2_ALLOC_MIN);
669 retry:
670 dparent = hammer2_cluster_lookup_init(cparent, 0);
671 cluster = hammer2_cluster_lookup(dparent, &key_dummy,
672 lbase, lbase,
673 HAMMER2_LOOKUP_NODATA);
675 if (cluster == NULL) {
677 * We found a hole, create a new chain entry.
679 * NOTE: DATA chains are created without device backing
680 * store (nor do we want any).
682 *errorp = hammer2_cluster_create(trans, dparent, &cluster,
683 lbase, HAMMER2_PBUFRADIX,
684 HAMMER2_BREF_TYPE_DATA,
685 pblksize, 0);
686 if (cluster == NULL) {
687 hammer2_cluster_lookup_done(dparent);
688 panic("hammer2_cluster_create: par=%p error=%d\n",
689 dparent->focus, *errorp);
690 goto retry;
692 /*ip->delta_dcount += pblksize;*/
693 } else {
694 switch (hammer2_cluster_type(cluster)) {
695 case HAMMER2_BREF_TYPE_INODE:
697 * The data is embedded in the inode, which requires
698 * a bit more finess.
700 hammer2_cluster_modify_ip(trans, ip, cluster, 0);
701 break;
702 case HAMMER2_BREF_TYPE_DATA:
703 if (hammer2_cluster_need_resize(cluster, pblksize)) {
704 hammer2_cluster_resize(trans, ip,
705 dparent, cluster,
706 pradix,
707 HAMMER2_MODIFY_OPTDATA);
711 * DATA buffers must be marked modified whether the
712 * data is in a logical buffer or not. We also have
713 * to make this call to fixup the chain data pointers
714 * after resizing in case this is an encrypted or
715 * compressed buffer.
717 hammer2_cluster_modify(trans, cluster,
718 HAMMER2_MODIFY_OPTDATA);
719 break;
720 default:
721 panic("hammer2_assign_physical: bad type");
722 /* NOT REACHED */
723 break;
728 * Cleanup. If cluster wound up being the inode itself, i.e.
729 * the DIRECTDATA case for offset 0, then we need to update cparent.
730 * The caller expects cparent to not become stale.
732 hammer2_cluster_lookup_done(dparent);
733 /* dparent = NULL; safety */
734 return (cluster);
738 * hammer2_write_file_core() - hammer2_write_thread() helper
740 * The core write function which determines which path to take
741 * depending on compression settings. We also have to locate the
742 * related clusters so we can calculate and set the check data for
743 * the blockref.
745 static
746 void
747 hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans,
748 hammer2_inode_t *ip,
749 hammer2_cluster_t *cparent,
750 hammer2_key_t lbase, int ioflag, int pblksize,
751 int *errorp)
753 hammer2_cluster_t *cluster;
755 switch(HAMMER2_DEC_ALGO(ip->meta.comp_algo)) {
756 case HAMMER2_COMP_NONE:
758 * We have to assign physical storage to the buffer
759 * we intend to dirty or write now to avoid deadlocks
760 * in the strategy code later.
762 * This can return NOOFFSET for inode-embedded data.
763 * The strategy code will take care of it in that case.
765 cluster = hammer2_assign_physical(trans, ip, cparent,
766 lbase, pblksize,
767 errorp);
768 if (cluster->ddflag) {
769 hammer2_inode_data_t *wipdata;
771 wipdata = hammer2_cluster_modify_ip(trans, ip,
772 cluster, 0);
773 KKASSERT(wipdata->meta.op_flags &
774 HAMMER2_OPFLAG_DIRECTDATA);
775 KKASSERT(bp->b_loffset == 0);
776 bcopy(bp->b_data, wipdata->u.data,
777 HAMMER2_EMBEDDED_BYTES);
778 hammer2_cluster_modsync(cluster);
779 } else {
780 hammer2_write_bp(cluster, bp, ioflag, pblksize,
781 errorp, ip->meta.check_algo);
783 if (cluster) {
784 hammer2_cluster_unlock(cluster);
785 hammer2_cluster_drop(cluster);
787 break;
788 case HAMMER2_COMP_AUTOZERO:
790 * Check for zero-fill only
792 hammer2_zero_check_and_write(bp, trans, ip,
793 cparent, lbase,
794 ioflag, pblksize, errorp,
795 ip->meta.check_algo);
796 break;
797 case HAMMER2_COMP_LZ4:
798 case HAMMER2_COMP_ZLIB:
799 default:
801 * Check for zero-fill and attempt compression.
803 hammer2_compress_and_write(bp, trans, ip,
804 cparent,
805 lbase, ioflag,
806 pblksize, errorp,
807 ip->meta.comp_algo,
808 ip->meta.check_algo);
809 break;
814 * Helper
816 * Generic function that will perform the compression in compression
817 * write path. The compression algorithm is determined by the settings
818 * obtained from inode.
820 static
821 void
822 hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans,
823 hammer2_inode_t *ip,
824 hammer2_cluster_t *cparent,
825 hammer2_key_t lbase, int ioflag, int pblksize,
826 int *errorp, int comp_algo, int check_algo)
828 hammer2_cluster_t *cluster;
829 hammer2_chain_t *chain;
830 int comp_size;
831 int comp_block_size;
832 int i;
833 char *comp_buffer;
835 if (test_block_zeros(bp->b_data, pblksize)) {
836 zero_write(bp, trans, ip, cparent, lbase, errorp);
837 return;
840 comp_size = 0;
841 comp_buffer = NULL;
843 KKASSERT(pblksize / 2 <= 32768);
845 if (ip->comp_heuristic < 8 || (ip->comp_heuristic & 7) == 0) {
846 z_stream strm_compress;
847 int comp_level;
848 int ret;
850 switch(HAMMER2_DEC_ALGO(comp_algo)) {
851 case HAMMER2_COMP_LZ4:
852 comp_buffer = objcache_get(cache_buffer_write,
853 M_INTWAIT);
854 comp_size = LZ4_compress_limitedOutput(
855 bp->b_data,
856 &comp_buffer[sizeof(int)],
857 pblksize,
858 pblksize / 2 - sizeof(int));
860 * We need to prefix with the size, LZ4
861 * doesn't do it for us. Add the related
862 * overhead.
864 *(int *)comp_buffer = comp_size;
865 if (comp_size)
866 comp_size += sizeof(int);
867 break;
868 case HAMMER2_COMP_ZLIB:
869 comp_level = HAMMER2_DEC_LEVEL(comp_algo);
870 if (comp_level == 0)
871 comp_level = 6; /* default zlib compression */
872 else if (comp_level < 6)
873 comp_level = 6;
874 else if (comp_level > 9)
875 comp_level = 9;
876 ret = deflateInit(&strm_compress, comp_level);
877 if (ret != Z_OK) {
878 kprintf("HAMMER2 ZLIB: fatal error "
879 "on deflateInit.\n");
882 comp_buffer = objcache_get(cache_buffer_write,
883 M_INTWAIT);
884 strm_compress.next_in = bp->b_data;
885 strm_compress.avail_in = pblksize;
886 strm_compress.next_out = comp_buffer;
887 strm_compress.avail_out = pblksize / 2;
888 ret = deflate(&strm_compress, Z_FINISH);
889 if (ret == Z_STREAM_END) {
890 comp_size = pblksize / 2 -
891 strm_compress.avail_out;
892 } else {
893 comp_size = 0;
895 ret = deflateEnd(&strm_compress);
896 break;
897 default:
898 kprintf("Error: Unknown compression method.\n");
899 kprintf("Comp_method = %d.\n", comp_algo);
900 break;
904 if (comp_size == 0) {
906 * compression failed or turned off
908 comp_block_size = pblksize; /* safety */
909 if (++ip->comp_heuristic > 128)
910 ip->comp_heuristic = 8;
911 } else {
913 * compression succeeded
915 ip->comp_heuristic = 0;
916 if (comp_size <= 1024) {
917 comp_block_size = 1024;
918 } else if (comp_size <= 2048) {
919 comp_block_size = 2048;
920 } else if (comp_size <= 4096) {
921 comp_block_size = 4096;
922 } else if (comp_size <= 8192) {
923 comp_block_size = 8192;
924 } else if (comp_size <= 16384) {
925 comp_block_size = 16384;
926 } else if (comp_size <= 32768) {
927 comp_block_size = 32768;
928 } else {
929 panic("hammer2: WRITE PATH: "
930 "Weird comp_size value.");
931 /* NOT REACHED */
932 comp_block_size = pblksize;
936 cluster = hammer2_assign_physical(trans, ip, cparent,
937 lbase, comp_block_size,
938 errorp);
939 if (*errorp) {
940 kprintf("WRITE PATH: An error occurred while "
941 "assigning physical space.\n");
942 KKASSERT(cluster == NULL);
943 goto done;
946 if (cluster->ddflag) {
947 hammer2_inode_data_t *wipdata;
949 wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
950 KKASSERT(wipdata->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA);
951 KKASSERT(bp->b_loffset == 0);
952 bcopy(bp->b_data, wipdata->u.data, HAMMER2_EMBEDDED_BYTES);
953 hammer2_cluster_modsync(cluster);
954 } else
955 for (i = 0; i < cluster->nchains; ++i) {
956 hammer2_io_t *dio;
957 char *bdata;
959 /* XXX hackx */
961 if ((cluster->array[i].flags & HAMMER2_CITEM_FEMOD) == 0)
962 continue;
963 chain = cluster->array[i].chain; /* XXX */
964 if (chain == NULL)
965 continue;
966 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED);
968 switch(chain->bref.type) {
969 case HAMMER2_BREF_TYPE_INODE:
970 panic("hammer2_write_bp: unexpected inode\n");
971 break;
972 case HAMMER2_BREF_TYPE_DATA:
974 * Optimize out the read-before-write
975 * if possible.
977 *errorp = hammer2_io_newnz(chain->hmp,
978 chain->bref.data_off,
979 chain->bytes,
980 &dio);
981 if (*errorp) {
982 hammer2_io_brelse(&dio);
983 kprintf("hammer2: WRITE PATH: "
984 "dbp bread error\n");
985 break;
987 bdata = hammer2_io_data(dio, chain->bref.data_off);
990 * When loading the block make sure we don't
991 * leave garbage after the compressed data.
993 if (comp_size) {
994 chain->bref.methods =
995 HAMMER2_ENC_COMP(comp_algo) +
996 HAMMER2_ENC_CHECK(check_algo);
997 bcopy(comp_buffer, bdata, comp_size);
998 if (comp_size != comp_block_size) {
999 bzero(bdata + comp_size,
1000 comp_block_size - comp_size);
1002 } else {
1003 chain->bref.methods =
1004 HAMMER2_ENC_COMP(
1005 HAMMER2_COMP_NONE) +
1006 HAMMER2_ENC_CHECK(check_algo);
1007 bcopy(bp->b_data, bdata, pblksize);
1011 * The flush code doesn't calculate check codes for
1012 * file data (doing so can result in excessive I/O),
1013 * so we do it here.
1015 hammer2_chain_setcheck(chain, bdata);
1018 * Device buffer is now valid, chain is no longer in
1019 * the initial state.
1021 * (No blockref table worries with file data)
1023 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
1025 /* Now write the related bdp. */
1026 if (ioflag & IO_SYNC) {
1028 * Synchronous I/O requested.
1030 hammer2_io_bwrite(&dio);
1032 } else if ((ioflag & IO_DIRECT) &&
1033 loff + n == pblksize) {
1034 hammer2_io_bdwrite(&dio);
1036 } else if (ioflag & IO_ASYNC) {
1037 hammer2_io_bawrite(&dio);
1038 } else {
1039 hammer2_io_bdwrite(&dio);
1041 break;
1042 default:
1043 panic("hammer2_write_bp: bad chain type %d\n",
1044 chain->bref.type);
1045 /* NOT REACHED */
1046 break;
1049 done:
1050 if (cluster) {
1051 hammer2_cluster_unlock(cluster);
1052 hammer2_cluster_drop(cluster);
1054 if (comp_buffer)
1055 objcache_put(cache_buffer_write, comp_buffer);
1059 * Helper
1061 * Function that performs zero-checking and writing without compression,
1062 * it corresponds to default zero-checking path.
1064 static
1065 void
1066 hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans,
1067 hammer2_inode_t *ip,
1068 hammer2_cluster_t *cparent,
1069 hammer2_key_t lbase, int ioflag, int pblksize, int *errorp,
1070 int check_algo)
1072 hammer2_cluster_t *cluster;
1074 if (test_block_zeros(bp->b_data, pblksize)) {
1075 zero_write(bp, trans, ip, cparent, lbase, errorp);
1076 } else {
1077 cluster = hammer2_assign_physical(trans, ip, cparent,
1078 lbase, pblksize, errorp);
1079 hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp,
1080 check_algo);
1081 if (cluster) {
1082 hammer2_cluster_unlock(cluster);
1083 hammer2_cluster_drop(cluster);
1089 * Helper
1091 * A function to test whether a block of data contains only zeros,
1092 * returns TRUE (non-zero) if the block is all zeros.
1094 static
1096 test_block_zeros(const char *buf, size_t bytes)
1098 size_t i;
1100 for (i = 0; i < bytes; i += sizeof(long)) {
1101 if (*(const long *)(buf + i) != 0)
1102 return (0);
1104 return (1);
1108 * Helper
1110 * Function to "write" a block that contains only zeros.
1112 static
1113 void
1114 zero_write(struct buf *bp, hammer2_trans_t *trans,
1115 hammer2_inode_t *ip,
1116 hammer2_cluster_t *cparent,
1117 hammer2_key_t lbase, int *errorp __unused)
1119 hammer2_cluster_t *cluster;
1120 hammer2_key_t key_dummy;
1122 cparent = hammer2_cluster_lookup_init(cparent, 0);
1123 cluster = hammer2_cluster_lookup(cparent, &key_dummy, lbase, lbase,
1124 HAMMER2_LOOKUP_NODATA);
1125 if (cluster) {
1126 if (cluster->ddflag) {
1127 hammer2_inode_data_t *wipdata;
1129 wipdata = hammer2_cluster_modify_ip(trans, ip,
1130 cluster, 0);
1131 KKASSERT(wipdata->meta.op_flags &
1132 HAMMER2_OPFLAG_DIRECTDATA);
1133 KKASSERT(bp->b_loffset == 0);
1134 bzero(wipdata->u.data, HAMMER2_EMBEDDED_BYTES);
1135 hammer2_cluster_modsync(cluster);
1136 } else {
1137 hammer2_cluster_delete(trans, cparent, cluster,
1138 HAMMER2_DELETE_PERMANENT);
1140 hammer2_cluster_unlock(cluster);
1141 hammer2_cluster_drop(cluster);
1143 hammer2_cluster_lookup_done(cparent);
1147 * Helper
1149 * Function to write the data as it is, without performing any sort of
1150 * compression. This function is used in path without compression and
1151 * default zero-checking path.
1153 static
1154 void
1155 hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, int ioflag,
1156 int pblksize, int *errorp, int check_algo)
1158 hammer2_chain_t *chain;
1159 hammer2_inode_data_t *wipdata;
1160 hammer2_io_t *dio;
1161 char *bdata;
1162 int error;
1163 int i;
1165 error = 0; /* XXX TODO below */
1167 for (i = 0; i < cluster->nchains; ++i) {
1168 if ((cluster->array[i].flags & HAMMER2_CITEM_FEMOD) == 0)
1169 continue;
1170 chain = cluster->array[i].chain; /* XXX */
1171 if (chain == NULL)
1172 continue;
1173 KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED);
1175 switch(chain->bref.type) {
1176 case HAMMER2_BREF_TYPE_INODE:
1177 wipdata = &hammer2_chain_wdata(chain)->ipdata;
1178 KKASSERT(wipdata->meta.op_flags &
1179 HAMMER2_OPFLAG_DIRECTDATA);
1180 KKASSERT(bp->b_loffset == 0);
1181 bcopy(bp->b_data, wipdata->u.data,
1182 HAMMER2_EMBEDDED_BYTES);
1183 error = 0;
1184 break;
1185 case HAMMER2_BREF_TYPE_DATA:
1186 error = hammer2_io_newnz(chain->hmp,
1187 chain->bref.data_off,
1188 chain->bytes, &dio);
1189 if (error) {
1190 hammer2_io_bqrelse(&dio);
1191 kprintf("hammer2: WRITE PATH: "
1192 "dbp bread error\n");
1193 break;
1195 bdata = hammer2_io_data(dio, chain->bref.data_off);
1197 chain->bref.methods = HAMMER2_ENC_COMP(
1198 HAMMER2_COMP_NONE) +
1199 HAMMER2_ENC_CHECK(check_algo);
1200 bcopy(bp->b_data, bdata, chain->bytes);
1203 * The flush code doesn't calculate check codes for
1204 * file data (doing so can result in excessive I/O),
1205 * so we do it here.
1207 hammer2_chain_setcheck(chain, bdata);
1210 * Device buffer is now valid, chain is no longer in
1211 * the initial state.
1213 * (No blockref table worries with file data)
1215 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
1217 if (ioflag & IO_SYNC) {
1219 * Synchronous I/O requested.
1221 hammer2_io_bwrite(&dio);
1223 } else if ((ioflag & IO_DIRECT) &&
1224 loff + n == pblksize) {
1225 hammer2_io_bdwrite(&dio);
1227 } else if (ioflag & IO_ASYNC) {
1228 hammer2_io_bawrite(&dio);
1229 } else {
1230 hammer2_io_bdwrite(&dio);
1232 break;
1233 default:
1234 panic("hammer2_write_bp: bad chain type %d\n",
1235 chain->bref.type);
1236 /* NOT REACHED */
1237 error = 0;
1238 break;
1240 KKASSERT(error == 0); /* XXX TODO */
1242 *errorp = error;