2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression)
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
19 * 3. Neither the name of The DragonFly Project nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific, prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * This module handles low level logical file I/O (strategy) which backs
38 * the logical buffer cache.
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/fcntl.h>
47 #include <sys/namei.h>
48 #include <sys/mount.h>
49 #include <sys/vnode.h>
50 #include <sys/mountctl.h>
51 #include <sys/dirent.h>
53 #include <sys/objcache.h>
54 #include <sys/event.h>
56 #include <vfs/fifofs/fifo.h>
59 #include "hammer2_lz4.h"
61 #include "zlib/hammer2_zlib.h"
63 struct objcache
*cache_buffer_read
;
64 struct objcache
*cache_buffer_write
;
67 * Strategy code (async logical file buffer I/O from system)
69 * WARNING: The strategy code cannot safely use hammer2 transactions
70 * as this can deadlock against vfs_sync's vfsync() call
71 * if multiple flushes are queued. All H2 structures must
72 * already be present and ready for the DIO.
74 * Reads can be initiated asynchronously, writes have to be
75 * spooled to a separate thread for action to avoid deadlocks.
77 static int hammer2_strategy_read(struct vop_strategy_args
*ap
);
78 static int hammer2_strategy_write(struct vop_strategy_args
*ap
);
79 static void hammer2_strategy_read_callback(hammer2_iocb_t
*iocb
);
82 hammer2_vop_strategy(struct vop_strategy_args
*ap
)
93 error
= hammer2_strategy_read(ap
);
94 ++hammer2_iod_file_read
;
97 error
= hammer2_strategy_write(ap
);
98 ++hammer2_iod_file_write
;
101 bp
->b_error
= error
= EINVAL
;
102 bp
->b_flags
|= B_ERROR
;
110 * Return the largest contiguous physical disk range for the logical
113 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
115 * Basically disabled, the logical buffer write thread has to deal with
116 * buffers one-at-a-time.
119 hammer2_vop_bmap(struct vop_bmap_args
*ap
)
121 *ap
->a_doffsetp
= NOOFFSET
;
129 /****************************************************************************
131 ****************************************************************************/
133 * Callback used in read path in case that a block is compressed with LZ4.
137 hammer2_decompress_LZ4_callback(const char *data
, u_int bytes
, struct bio
*bio
)
140 char *compressed_buffer
;
147 if bio
->bio_caller_info2
.index
&&
148 bio
->bio_caller_info1
.uvalue32
!=
149 crc32(bp
->b_data
, bp
->b_bufsize
) --- return error
152 KKASSERT(bp
->b_bufsize
<= HAMMER2_PBUFSIZE
);
153 compressed_size
= *(const int *)data
;
154 KKASSERT(compressed_size
<= bytes
- sizeof(int));
156 compressed_buffer
= objcache_get(cache_buffer_read
, M_INTWAIT
);
157 result
= LZ4_decompress_safe(__DECONST(char *, &data
[sizeof(int)]),
162 kprintf("READ PATH: Error during decompression."
164 (intmax_t)bio
->bio_offset
, bytes
);
165 /* make sure it isn't random garbage */
166 bzero(compressed_buffer
, bp
->b_bufsize
);
168 KKASSERT(result
<= bp
->b_bufsize
);
169 bcopy(compressed_buffer
, bp
->b_data
, bp
->b_bufsize
);
170 if (result
< bp
->b_bufsize
)
171 bzero(bp
->b_data
+ result
, bp
->b_bufsize
- result
);
172 objcache_put(cache_buffer_read
, compressed_buffer
);
174 bp
->b_flags
|= B_AGE
;
178 * Callback used in read path in case that a block is compressed with ZLIB.
179 * It is almost identical to LZ4 callback, so in theory they can be unified,
180 * but we didn't want to make changes in bio structure for that.
184 hammer2_decompress_ZLIB_callback(const char *data
, u_int bytes
, struct bio
*bio
)
187 char *compressed_buffer
;
188 z_stream strm_decompress
;
194 KKASSERT(bp
->b_bufsize
<= HAMMER2_PBUFSIZE
);
195 strm_decompress
.avail_in
= 0;
196 strm_decompress
.next_in
= Z_NULL
;
198 ret
= inflateInit(&strm_decompress
);
201 kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n");
203 compressed_buffer
= objcache_get(cache_buffer_read
, M_INTWAIT
);
204 strm_decompress
.next_in
= __DECONST(char *, data
);
206 /* XXX supply proper size, subset of device bp */
207 strm_decompress
.avail_in
= bytes
;
208 strm_decompress
.next_out
= compressed_buffer
;
209 strm_decompress
.avail_out
= bp
->b_bufsize
;
211 ret
= inflate(&strm_decompress
, Z_FINISH
);
212 if (ret
!= Z_STREAM_END
) {
213 kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n");
214 bzero(compressed_buffer
, bp
->b_bufsize
);
216 bcopy(compressed_buffer
, bp
->b_data
, bp
->b_bufsize
);
217 result
= bp
->b_bufsize
- strm_decompress
.avail_out
;
218 if (result
< bp
->b_bufsize
)
219 bzero(bp
->b_data
+ result
, strm_decompress
.avail_out
);
220 objcache_put(cache_buffer_read
, compressed_buffer
);
221 ret
= inflateEnd(&strm_decompress
);
224 bp
->b_flags
|= B_AGE
;
228 * Logical buffer I/O, async read.
232 hammer2_strategy_read(struct vop_strategy_args
*ap
)
238 hammer2_cluster_t
*cparent
;
239 hammer2_cluster_t
*cluster
;
240 hammer2_key_t key_dummy
;
247 nbio
= push_bio(bio
);
249 lbase
= bio
->bio_offset
;
250 KKASSERT(((int)lbase
& HAMMER2_PBUFMASK
) == 0);
253 * Lookup the file offset.
255 hammer2_inode_lock(ip
, HAMMER2_RESOLVE_ALWAYS
|
256 HAMMER2_RESOLVE_SHARED
);
257 cparent
= hammer2_inode_cluster(ip
, HAMMER2_RESOLVE_ALWAYS
|
258 HAMMER2_RESOLVE_SHARED
);
259 cluster
= hammer2_cluster_lookup(cparent
, &key_dummy
,
261 HAMMER2_LOOKUP_NODATA
|
262 HAMMER2_LOOKUP_SHARED
);
263 hammer2_inode_unlock(ip
, cparent
);
266 * Data is zero-fill if no cluster could be found
267 * (XXX or EIO on a cluster failure).
269 if (cluster
== NULL
) {
272 bzero(bp
->b_data
, bp
->b_bcount
);
278 * Cluster elements must be type INODE or type DATA, but the
279 * compression mode (or not) for DATA chains can be different for
280 * each chain. This will be handled by the callback.
282 * If the cluster already has valid data the callback will be made
283 * immediately/synchronously.
285 btype
= hammer2_cluster_type(cluster
);
286 if (btype
!= HAMMER2_BREF_TYPE_INODE
&&
287 btype
!= HAMMER2_BREF_TYPE_DATA
) {
288 panic("READ PATH: hammer2_strategy_read: unknown bref type");
290 hammer2_cluster_load_async(cluster
, hammer2_strategy_read_callback
,
296 * Read callback for hammer2_cluster_load_async(). The load function may
297 * start several actual I/Os but will only make one callback, typically with
298 * the first valid I/O XXX
302 hammer2_strategy_read_callback(hammer2_iocb_t
*iocb
)
304 struct bio
*bio
= iocb
->ptr
; /* original logical buffer */
305 struct buf
*bp
= bio
->bio_buf
; /* original logical buffer */
306 hammer2_chain_t
*chain
;
307 hammer2_cluster_t
*cluster
;
313 * Extract data and handle iteration on I/O failure. iocb->off
314 * is the cluster index for iteration.
316 cluster
= iocb
->cluster
;
317 dio
= iocb
->dio
; /* can be NULL if iocb not in progress */
320 * Work to do if INPROG set, else dio is already good or dio is
321 * NULL (which is the shortcut case if chain->data is already good).
323 if (iocb
->flags
& HAMMER2_IOCB_INPROG
) {
325 * Read attempt not yet made. Issue an asynchronous read
326 * if necessary and return, operation will chain back to
329 if ((iocb
->flags
& HAMMER2_IOCB_READ
) == 0) {
330 if (dio
->bp
== NULL
||
331 (dio
->bp
->b_flags
& B_CACHE
) == 0) {
336 iocb
->flags
|= HAMMER2_IOCB_READ
;
337 breadcb(dio
->hmp
->devvp
,
338 dio
->pbase
, dio
->psize
,
339 hammer2_io_callback
, iocb
);
346 * If we have a DIO it is now done, check for an error and
347 * calculate the data.
349 * If there is no DIO it is an optimization by
350 * hammer2_cluster_load_async(), the data is available in
354 if (dio
->bp
->b_flags
& B_ERROR
) {
355 i
= (int)iocb
->lbase
+ 1;
356 if (i
>= cluster
->nchains
) {
357 bp
->b_flags
|= B_ERROR
;
358 bp
->b_error
= dio
->bp
->b_error
;
359 hammer2_io_complete(iocb
);
361 hammer2_cluster_unlock(cluster
);
362 hammer2_cluster_drop(cluster
);
364 hammer2_io_complete(iocb
); /* XXX */
365 chain
= cluster
->array
[i
].chain
;
366 kprintf("hammer2: IO CHAIN-%d %p\n", i
, chain
);
367 hammer2_adjreadcounter(&chain
->bref
,
370 iocb
->lbase
= (off_t
)i
;
373 hammer2_io_getblk(chain
->hmp
,
374 chain
->bref
.data_off
,
381 data
= hammer2_io_data(dio
, chain
->bref
.data_off
);
384 * Special synchronous case, data present in chain->data.
387 data
= (void *)chain
->data
;
390 if (chain
->bref
.type
== HAMMER2_BREF_TYPE_INODE
) {
392 * Data is embedded in the inode (copy from inode).
394 bcopy(((hammer2_inode_data_t
*)data
)->u
.data
,
395 bp
->b_data
, HAMMER2_EMBEDDED_BYTES
);
396 bzero(bp
->b_data
+ HAMMER2_EMBEDDED_BYTES
,
397 bp
->b_bcount
- HAMMER2_EMBEDDED_BYTES
);
400 } else if (chain
->bref
.type
== HAMMER2_BREF_TYPE_DATA
) {
402 * Data is on-media, issue device I/O and copy.
404 * XXX direct-IO shortcut could go here XXX.
406 switch (HAMMER2_DEC_COMP(chain
->bref
.methods
)) {
407 case HAMMER2_COMP_LZ4
:
408 hammer2_decompress_LZ4_callback(data
, chain
->bytes
,
411 case HAMMER2_COMP_ZLIB
:
412 hammer2_decompress_ZLIB_callback(data
, chain
->bytes
,
415 case HAMMER2_COMP_NONE
:
416 KKASSERT(chain
->bytes
<= bp
->b_bcount
);
417 bcopy(data
, bp
->b_data
, chain
->bytes
);
418 if (chain
->bytes
< bp
->b_bcount
) {
419 bzero(bp
->b_data
+ chain
->bytes
,
420 bp
->b_bcount
- chain
->bytes
);
422 bp
->b_flags
|= B_NOTMETA
;
427 panic("hammer2_strategy_read: "
428 "unknown compression type");
431 /* bqrelse the dio to help stabilize the call to panic() */
433 hammer2_io_bqrelse(&dio
);
434 panic("hammer2_strategy_read: unknown bref type");
438 * Once the iocb is cleaned up the DIO (if any) will no longer be
439 * in-progress but will still have a ref. Be sure to release
442 hammer2_io_complete(iocb
); /* physical management */
443 if (dio
) /* physical dio & buffer */
444 hammer2_io_bqrelse(&dio
);
445 hammer2_cluster_unlock(cluster
); /* cluster management */
446 hammer2_cluster_drop(cluster
); /* cluster management */
447 biodone(bio
); /* logical buffer */
450 /****************************************************************************
452 ****************************************************************************/
455 * Functions for compression in threads,
456 * from hammer2_vnops.c
458 static void hammer2_write_file_core(struct buf
*bp
, hammer2_trans_t
*trans
,
460 hammer2_cluster_t
*cparent
,
461 hammer2_key_t lbase
, int ioflag
, int pblksize
,
463 static void hammer2_compress_and_write(struct buf
*bp
, hammer2_trans_t
*trans
,
465 hammer2_cluster_t
*cparent
,
466 hammer2_key_t lbase
, int ioflag
,
467 int pblksize
, int *errorp
,
468 int comp_algo
, int check_algo
);
469 static void hammer2_zero_check_and_write(struct buf
*bp
,
470 hammer2_trans_t
*trans
, hammer2_inode_t
*ip
,
471 hammer2_cluster_t
*cparent
,
473 int ioflag
, int pblksize
, int *errorp
,
475 static int test_block_zeros(const char *buf
, size_t bytes
);
476 static void zero_write(struct buf
*bp
, hammer2_trans_t
*trans
,
478 hammer2_cluster_t
*cparent
,
481 static void hammer2_write_bp(hammer2_cluster_t
*cluster
, struct buf
*bp
,
482 int ioflag
, int pblksize
, int *errorp
,
488 hammer2_strategy_write(struct vop_strategy_args
*ap
)
500 hammer2_lwinprog_ref(pmp
);
501 hammer2_trans_assert_strategy(pmp
);
502 hammer2_mtx_ex(&pmp
->wthread_mtx
);
503 if (TAILQ_EMPTY(&pmp
->wthread_bioq
.queue
)) {
504 bioq_insert_tail(&pmp
->wthread_bioq
, ap
->a_bio
);
505 hammer2_mtx_unlock(&pmp
->wthread_mtx
);
506 wakeup(&pmp
->wthread_bioq
);
508 bioq_insert_tail(&pmp
->wthread_bioq
, ap
->a_bio
);
509 hammer2_mtx_unlock(&pmp
->wthread_mtx
);
511 hammer2_lwinprog_wait(pmp
);
517 * Thread to handle bioq for strategy write (started from hammer2_vfsops.c)
520 hammer2_write_thread(void *arg
)
525 hammer2_trans_t trans
;
528 hammer2_cluster_t
*cparent
;
536 hammer2_mtx_ex(&pmp
->wthread_mtx
);
539 * Wait for work. Break out and destroy the thread only if
540 * requested and no work remains.
542 if (bioq_first(&pmp
->wthread_bioq
) == NULL
) {
543 if (pmp
->wthread_destroy
)
545 mtxsleep(&pmp
->wthread_bioq
, &pmp
->wthread_mtx
,
551 * Special transaction for logical buffer cache writes.
553 hammer2_trans_init(&trans
, pmp
, HAMMER2_TRANS_BUFCACHE
);
555 while ((bio
= bioq_takefirst(&pmp
->wthread_bioq
)) != NULL
) {
557 * dummy bio for synchronization. The transaction
558 * must be terminated.
560 if (bio
->bio_buf
== NULL
) {
561 bio
->bio_flags
|= BIO_DONE
;
562 /* bio will become invalid after DONE set */
568 * else normal bio processing
570 hammer2_mtx_unlock(&pmp
->wthread_mtx
);
572 hammer2_lwinprog_drop(pmp
);
580 * Inode is modified, flush size and mtime changes
581 * to ensure that the file size remains consistent
582 * with the buffers being flushed.
584 * NOTE: The inode_fsync() call only flushes the
585 * inode's meta-data state, it doesn't try
586 * to flush underlying buffers or chains.
588 * NOTE: hammer2_write_file_core() may indirectly
589 * modify and modsync the inode.
591 hammer2_inode_lock(ip
, HAMMER2_RESOLVE_ALWAYS
);
592 cparent
= hammer2_inode_cluster(ip
,
593 HAMMER2_RESOLVE_ALWAYS
);
594 if (ip
->flags
& (HAMMER2_INODE_RESIZED
|
595 HAMMER2_INODE_MTIME
)) {
596 hammer2_inode_fsync(&trans
, ip
, cparent
);
598 lblksize
= hammer2_calc_logical(ip
, bio
->bio_offset
,
600 pblksize
= hammer2_calc_physical(ip
, lbase
);
601 hammer2_write_file_core(bp
, &trans
, ip
,
605 hammer2_inode_unlock(ip
, cparent
);
607 kprintf("hammer2: error in buffer write\n");
608 bp
->b_flags
|= B_ERROR
;
612 hammer2_mtx_ex(&pmp
->wthread_mtx
);
614 hammer2_trans_done(&trans
);
616 pmp
->wthread_destroy
= -1;
617 wakeup(&pmp
->wthread_destroy
);
619 hammer2_mtx_unlock(&pmp
->wthread_mtx
);
623 * Wait for pending I/O to complete
626 hammer2_bioq_sync(hammer2_pfs_t
*pmp
)
630 bzero(&sync_bio
, sizeof(sync_bio
)); /* dummy with no bio_buf */
631 hammer2_mtx_ex(&pmp
->wthread_mtx
);
632 if (pmp
->wthread_destroy
== 0 &&
633 TAILQ_FIRST(&pmp
->wthread_bioq
.queue
)) {
634 bioq_insert_tail(&pmp
->wthread_bioq
, &sync_bio
);
635 while ((sync_bio
.bio_flags
& BIO_DONE
) == 0)
636 mtxsleep(&sync_bio
, &pmp
->wthread_mtx
, 0, "h2bioq", 0);
638 hammer2_mtx_unlock(&pmp
->wthread_mtx
);
642 * Create a new cluster at (cparent, lbase) and assign physical storage,
643 * returning a cluster suitable for I/O. The cluster will be in a modified
646 * cparent can wind up being anything.
648 * NOTE: Special case for data embedded in inode.
652 hammer2_assign_physical(hammer2_trans_t
*trans
,
653 hammer2_inode_t
*ip
, hammer2_cluster_t
*cparent
,
654 hammer2_key_t lbase
, int pblksize
, int *errorp
)
656 hammer2_cluster_t
*cluster
;
657 hammer2_cluster_t
*dparent
;
658 hammer2_key_t key_dummy
;
659 int pradix
= hammer2_getradix(pblksize
);
662 * Locate the chain associated with lbase, return a locked chain.
663 * However, do not instantiate any data reference (which utilizes a
664 * device buffer) because we will be using direct IO via the
665 * logical buffer cache buffer.
668 KKASSERT(pblksize
>= HAMMER2_ALLOC_MIN
);
670 dparent
= hammer2_cluster_lookup_init(cparent
, 0);
671 cluster
= hammer2_cluster_lookup(dparent
, &key_dummy
,
673 HAMMER2_LOOKUP_NODATA
);
675 if (cluster
== NULL
) {
677 * We found a hole, create a new chain entry.
679 * NOTE: DATA chains are created without device backing
680 * store (nor do we want any).
682 *errorp
= hammer2_cluster_create(trans
, dparent
, &cluster
,
683 lbase
, HAMMER2_PBUFRADIX
,
684 HAMMER2_BREF_TYPE_DATA
,
686 if (cluster
== NULL
) {
687 hammer2_cluster_lookup_done(dparent
);
688 panic("hammer2_cluster_create: par=%p error=%d\n",
689 dparent
->focus
, *errorp
);
692 /*ip->delta_dcount += pblksize;*/
694 switch (hammer2_cluster_type(cluster
)) {
695 case HAMMER2_BREF_TYPE_INODE
:
697 * The data is embedded in the inode, which requires
700 hammer2_cluster_modify_ip(trans
, ip
, cluster
, 0);
702 case HAMMER2_BREF_TYPE_DATA
:
703 if (hammer2_cluster_need_resize(cluster
, pblksize
)) {
704 hammer2_cluster_resize(trans
, ip
,
707 HAMMER2_MODIFY_OPTDATA
);
711 * DATA buffers must be marked modified whether the
712 * data is in a logical buffer or not. We also have
713 * to make this call to fixup the chain data pointers
714 * after resizing in case this is an encrypted or
717 hammer2_cluster_modify(trans
, cluster
,
718 HAMMER2_MODIFY_OPTDATA
);
721 panic("hammer2_assign_physical: bad type");
728 * Cleanup. If cluster wound up being the inode itself, i.e.
729 * the DIRECTDATA case for offset 0, then we need to update cparent.
730 * The caller expects cparent to not become stale.
732 hammer2_cluster_lookup_done(dparent
);
733 /* dparent = NULL; safety */
738 * hammer2_write_file_core() - hammer2_write_thread() helper
740 * The core write function which determines which path to take
741 * depending on compression settings. We also have to locate the
742 * related clusters so we can calculate and set the check data for
747 hammer2_write_file_core(struct buf
*bp
, hammer2_trans_t
*trans
,
749 hammer2_cluster_t
*cparent
,
750 hammer2_key_t lbase
, int ioflag
, int pblksize
,
753 hammer2_cluster_t
*cluster
;
755 switch(HAMMER2_DEC_ALGO(ip
->meta
.comp_algo
)) {
756 case HAMMER2_COMP_NONE
:
758 * We have to assign physical storage to the buffer
759 * we intend to dirty or write now to avoid deadlocks
760 * in the strategy code later.
762 * This can return NOOFFSET for inode-embedded data.
763 * The strategy code will take care of it in that case.
765 cluster
= hammer2_assign_physical(trans
, ip
, cparent
,
768 if (cluster
->ddflag
) {
769 hammer2_inode_data_t
*wipdata
;
771 wipdata
= hammer2_cluster_modify_ip(trans
, ip
,
773 KKASSERT(wipdata
->meta
.op_flags
&
774 HAMMER2_OPFLAG_DIRECTDATA
);
775 KKASSERT(bp
->b_loffset
== 0);
776 bcopy(bp
->b_data
, wipdata
->u
.data
,
777 HAMMER2_EMBEDDED_BYTES
);
778 hammer2_cluster_modsync(cluster
);
780 hammer2_write_bp(cluster
, bp
, ioflag
, pblksize
,
781 errorp
, ip
->meta
.check_algo
);
784 hammer2_cluster_unlock(cluster
);
785 hammer2_cluster_drop(cluster
);
788 case HAMMER2_COMP_AUTOZERO
:
790 * Check for zero-fill only
792 hammer2_zero_check_and_write(bp
, trans
, ip
,
794 ioflag
, pblksize
, errorp
,
795 ip
->meta
.check_algo
);
797 case HAMMER2_COMP_LZ4
:
798 case HAMMER2_COMP_ZLIB
:
801 * Check for zero-fill and attempt compression.
803 hammer2_compress_and_write(bp
, trans
, ip
,
808 ip
->meta
.check_algo
);
816 * Generic function that will perform the compression in compression
817 * write path. The compression algorithm is determined by the settings
818 * obtained from inode.
822 hammer2_compress_and_write(struct buf
*bp
, hammer2_trans_t
*trans
,
824 hammer2_cluster_t
*cparent
,
825 hammer2_key_t lbase
, int ioflag
, int pblksize
,
826 int *errorp
, int comp_algo
, int check_algo
)
828 hammer2_cluster_t
*cluster
;
829 hammer2_chain_t
*chain
;
835 if (test_block_zeros(bp
->b_data
, pblksize
)) {
836 zero_write(bp
, trans
, ip
, cparent
, lbase
, errorp
);
843 KKASSERT(pblksize
/ 2 <= 32768);
845 if (ip
->comp_heuristic
< 8 || (ip
->comp_heuristic
& 7) == 0) {
846 z_stream strm_compress
;
850 switch(HAMMER2_DEC_ALGO(comp_algo
)) {
851 case HAMMER2_COMP_LZ4
:
852 comp_buffer
= objcache_get(cache_buffer_write
,
854 comp_size
= LZ4_compress_limitedOutput(
856 &comp_buffer
[sizeof(int)],
858 pblksize
/ 2 - sizeof(int));
860 * We need to prefix with the size, LZ4
861 * doesn't do it for us. Add the related
864 *(int *)comp_buffer
= comp_size
;
866 comp_size
+= sizeof(int);
868 case HAMMER2_COMP_ZLIB
:
869 comp_level
= HAMMER2_DEC_LEVEL(comp_algo
);
871 comp_level
= 6; /* default zlib compression */
872 else if (comp_level
< 6)
874 else if (comp_level
> 9)
876 ret
= deflateInit(&strm_compress
, comp_level
);
878 kprintf("HAMMER2 ZLIB: fatal error "
879 "on deflateInit.\n");
882 comp_buffer
= objcache_get(cache_buffer_write
,
884 strm_compress
.next_in
= bp
->b_data
;
885 strm_compress
.avail_in
= pblksize
;
886 strm_compress
.next_out
= comp_buffer
;
887 strm_compress
.avail_out
= pblksize
/ 2;
888 ret
= deflate(&strm_compress
, Z_FINISH
);
889 if (ret
== Z_STREAM_END
) {
890 comp_size
= pblksize
/ 2 -
891 strm_compress
.avail_out
;
895 ret
= deflateEnd(&strm_compress
);
898 kprintf("Error: Unknown compression method.\n");
899 kprintf("Comp_method = %d.\n", comp_algo
);
904 if (comp_size
== 0) {
906 * compression failed or turned off
908 comp_block_size
= pblksize
; /* safety */
909 if (++ip
->comp_heuristic
> 128)
910 ip
->comp_heuristic
= 8;
913 * compression succeeded
915 ip
->comp_heuristic
= 0;
916 if (comp_size
<= 1024) {
917 comp_block_size
= 1024;
918 } else if (comp_size
<= 2048) {
919 comp_block_size
= 2048;
920 } else if (comp_size
<= 4096) {
921 comp_block_size
= 4096;
922 } else if (comp_size
<= 8192) {
923 comp_block_size
= 8192;
924 } else if (comp_size
<= 16384) {
925 comp_block_size
= 16384;
926 } else if (comp_size
<= 32768) {
927 comp_block_size
= 32768;
929 panic("hammer2: WRITE PATH: "
930 "Weird comp_size value.");
932 comp_block_size
= pblksize
;
936 cluster
= hammer2_assign_physical(trans
, ip
, cparent
,
937 lbase
, comp_block_size
,
940 kprintf("WRITE PATH: An error occurred while "
941 "assigning physical space.\n");
942 KKASSERT(cluster
== NULL
);
946 if (cluster
->ddflag
) {
947 hammer2_inode_data_t
*wipdata
;
949 wipdata
= &hammer2_cluster_wdata(cluster
)->ipdata
;
950 KKASSERT(wipdata
->meta
.op_flags
& HAMMER2_OPFLAG_DIRECTDATA
);
951 KKASSERT(bp
->b_loffset
== 0);
952 bcopy(bp
->b_data
, wipdata
->u
.data
, HAMMER2_EMBEDDED_BYTES
);
953 hammer2_cluster_modsync(cluster
);
955 for (i
= 0; i
< cluster
->nchains
; ++i
) {
961 if ((cluster
->array
[i
].flags
& HAMMER2_CITEM_FEMOD
) == 0)
963 chain
= cluster
->array
[i
].chain
; /* XXX */
966 KKASSERT(chain
->flags
& HAMMER2_CHAIN_MODIFIED
);
968 switch(chain
->bref
.type
) {
969 case HAMMER2_BREF_TYPE_INODE
:
970 panic("hammer2_write_bp: unexpected inode\n");
972 case HAMMER2_BREF_TYPE_DATA
:
974 * Optimize out the read-before-write
977 *errorp
= hammer2_io_newnz(chain
->hmp
,
978 chain
->bref
.data_off
,
982 hammer2_io_brelse(&dio
);
983 kprintf("hammer2: WRITE PATH: "
984 "dbp bread error\n");
987 bdata
= hammer2_io_data(dio
, chain
->bref
.data_off
);
990 * When loading the block make sure we don't
991 * leave garbage after the compressed data.
994 chain
->bref
.methods
=
995 HAMMER2_ENC_COMP(comp_algo
) +
996 HAMMER2_ENC_CHECK(check_algo
);
997 bcopy(comp_buffer
, bdata
, comp_size
);
998 if (comp_size
!= comp_block_size
) {
999 bzero(bdata
+ comp_size
,
1000 comp_block_size
- comp_size
);
1003 chain
->bref
.methods
=
1005 HAMMER2_COMP_NONE
) +
1006 HAMMER2_ENC_CHECK(check_algo
);
1007 bcopy(bp
->b_data
, bdata
, pblksize
);
1011 * The flush code doesn't calculate check codes for
1012 * file data (doing so can result in excessive I/O),
1015 hammer2_chain_setcheck(chain
, bdata
);
1018 * Device buffer is now valid, chain is no longer in
1019 * the initial state.
1021 * (No blockref table worries with file data)
1023 atomic_clear_int(&chain
->flags
, HAMMER2_CHAIN_INITIAL
);
1025 /* Now write the related bdp. */
1026 if (ioflag
& IO_SYNC
) {
1028 * Synchronous I/O requested.
1030 hammer2_io_bwrite(&dio
);
1032 } else if ((ioflag & IO_DIRECT) &&
1033 loff + n == pblksize) {
1034 hammer2_io_bdwrite(&dio);
1036 } else if (ioflag
& IO_ASYNC
) {
1037 hammer2_io_bawrite(&dio
);
1039 hammer2_io_bdwrite(&dio
);
1043 panic("hammer2_write_bp: bad chain type %d\n",
1051 hammer2_cluster_unlock(cluster
);
1052 hammer2_cluster_drop(cluster
);
1055 objcache_put(cache_buffer_write
, comp_buffer
);
1061 * Function that performs zero-checking and writing without compression,
1062 * it corresponds to default zero-checking path.
1066 hammer2_zero_check_and_write(struct buf
*bp
, hammer2_trans_t
*trans
,
1067 hammer2_inode_t
*ip
,
1068 hammer2_cluster_t
*cparent
,
1069 hammer2_key_t lbase
, int ioflag
, int pblksize
, int *errorp
,
1072 hammer2_cluster_t
*cluster
;
1074 if (test_block_zeros(bp
->b_data
, pblksize
)) {
1075 zero_write(bp
, trans
, ip
, cparent
, lbase
, errorp
);
1077 cluster
= hammer2_assign_physical(trans
, ip
, cparent
,
1078 lbase
, pblksize
, errorp
);
1079 hammer2_write_bp(cluster
, bp
, ioflag
, pblksize
, errorp
,
1082 hammer2_cluster_unlock(cluster
);
1083 hammer2_cluster_drop(cluster
);
1091 * A function to test whether a block of data contains only zeros,
1092 * returns TRUE (non-zero) if the block is all zeros.
1096 test_block_zeros(const char *buf
, size_t bytes
)
1100 for (i
= 0; i
< bytes
; i
+= sizeof(long)) {
1101 if (*(const long *)(buf
+ i
) != 0)
1110 * Function to "write" a block that contains only zeros.
1114 zero_write(struct buf
*bp
, hammer2_trans_t
*trans
,
1115 hammer2_inode_t
*ip
,
1116 hammer2_cluster_t
*cparent
,
1117 hammer2_key_t lbase
, int *errorp __unused
)
1119 hammer2_cluster_t
*cluster
;
1120 hammer2_key_t key_dummy
;
1122 cparent
= hammer2_cluster_lookup_init(cparent
, 0);
1123 cluster
= hammer2_cluster_lookup(cparent
, &key_dummy
, lbase
, lbase
,
1124 HAMMER2_LOOKUP_NODATA
);
1126 if (cluster
->ddflag
) {
1127 hammer2_inode_data_t
*wipdata
;
1129 wipdata
= hammer2_cluster_modify_ip(trans
, ip
,
1131 KKASSERT(wipdata
->meta
.op_flags
&
1132 HAMMER2_OPFLAG_DIRECTDATA
);
1133 KKASSERT(bp
->b_loffset
== 0);
1134 bzero(wipdata
->u
.data
, HAMMER2_EMBEDDED_BYTES
);
1135 hammer2_cluster_modsync(cluster
);
1137 hammer2_cluster_delete(trans
, cparent
, cluster
,
1138 HAMMER2_DELETE_PERMANENT
);
1140 hammer2_cluster_unlock(cluster
);
1141 hammer2_cluster_drop(cluster
);
1143 hammer2_cluster_lookup_done(cparent
);
1149 * Function to write the data as it is, without performing any sort of
1150 * compression. This function is used in path without compression and
1151 * default zero-checking path.
1155 hammer2_write_bp(hammer2_cluster_t
*cluster
, struct buf
*bp
, int ioflag
,
1156 int pblksize
, int *errorp
, int check_algo
)
1158 hammer2_chain_t
*chain
;
1159 hammer2_inode_data_t
*wipdata
;
1165 error
= 0; /* XXX TODO below */
1167 for (i
= 0; i
< cluster
->nchains
; ++i
) {
1168 if ((cluster
->array
[i
].flags
& HAMMER2_CITEM_FEMOD
) == 0)
1170 chain
= cluster
->array
[i
].chain
; /* XXX */
1173 KKASSERT(chain
->flags
& HAMMER2_CHAIN_MODIFIED
);
1175 switch(chain
->bref
.type
) {
1176 case HAMMER2_BREF_TYPE_INODE
:
1177 wipdata
= &hammer2_chain_wdata(chain
)->ipdata
;
1178 KKASSERT(wipdata
->meta
.op_flags
&
1179 HAMMER2_OPFLAG_DIRECTDATA
);
1180 KKASSERT(bp
->b_loffset
== 0);
1181 bcopy(bp
->b_data
, wipdata
->u
.data
,
1182 HAMMER2_EMBEDDED_BYTES
);
1185 case HAMMER2_BREF_TYPE_DATA
:
1186 error
= hammer2_io_newnz(chain
->hmp
,
1187 chain
->bref
.data_off
,
1188 chain
->bytes
, &dio
);
1190 hammer2_io_bqrelse(&dio
);
1191 kprintf("hammer2: WRITE PATH: "
1192 "dbp bread error\n");
1195 bdata
= hammer2_io_data(dio
, chain
->bref
.data_off
);
1197 chain
->bref
.methods
= HAMMER2_ENC_COMP(
1198 HAMMER2_COMP_NONE
) +
1199 HAMMER2_ENC_CHECK(check_algo
);
1200 bcopy(bp
->b_data
, bdata
, chain
->bytes
);
1203 * The flush code doesn't calculate check codes for
1204 * file data (doing so can result in excessive I/O),
1207 hammer2_chain_setcheck(chain
, bdata
);
1210 * Device buffer is now valid, chain is no longer in
1211 * the initial state.
1213 * (No blockref table worries with file data)
1215 atomic_clear_int(&chain
->flags
, HAMMER2_CHAIN_INITIAL
);
1217 if (ioflag
& IO_SYNC
) {
1219 * Synchronous I/O requested.
1221 hammer2_io_bwrite(&dio
);
1223 } else if ((ioflag & IO_DIRECT) &&
1224 loff + n == pblksize) {
1225 hammer2_io_bdwrite(&dio);
1227 } else if (ioflag
& IO_ASYNC
) {
1228 hammer2_io_bawrite(&dio
);
1230 hammer2_io_bdwrite(&dio
);
1234 panic("hammer2_write_bp: bad chain type %d\n",
1240 KKASSERT(error
== 0); /* XXX TODO */