From 286ef71398fb54b1d5007d6f45aa4320a9e0ede2 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Wed, 9 Mar 2016 09:12:17 -0800 Subject: [PATCH] 6370 ZFS send fails to transmit some holes Reviewed by: Matthew Ahrens Reviewed by: Chris Williamson Reviewed by: Stefan Ring Reviewed by: Steven Burgess Reviewed by: Arne Jansen Approved by: Robert Mustacchi --- usr/src/pkg/manifests/system-test-zfstest.mf | 3 + usr/src/test/zfs-tests/cmd/Makefile | 1 + usr/src/test/zfs-tests/cmd/{ => mkfiles}/Makefile | 27 ++---- usr/src/test/zfs-tests/cmd/mkfiles/mkfiles.c | 63 ++++++++++++++ usr/src/test/zfs-tests/include/default.cfg | 3 +- usr/src/test/zfs-tests/runfiles/delphix.run | 3 +- .../cli_root/zfs_receive/zfs_receive_010_pos.ksh | 5 ++ .../tests/functional/cli_root/zfs_send/Makefile | 5 +- .../cli_root/zfs_send/zfs_send_007_pos.ksh | 99 ++++++++++++++++++++++ usr/src/uts/common/fs/zfs/dmu_object.c | 8 +- usr/src/uts/common/fs/zfs/dmu_traverse.c | 49 ++++++++--- 11 files changed, 228 insertions(+), 38 deletions(-) copy usr/src/test/zfs-tests/cmd/{ => mkfiles}/Makefile (53%) create mode 100644 usr/src/test/zfs-tests/cmd/mkfiles/mkfiles.c create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index cb1016e921..457fe05e29 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -152,6 +152,7 @@ file path=opt/zfs-tests/bin/file_write mode=0555 file path=opt/zfs-tests/bin/getholes mode=0555 file path=opt/zfs-tests/bin/largest_file mode=0555 file path=opt/zfs-tests/bin/mkbusy mode=0555 +file path=opt/zfs-tests/bin/mkfiles mode=0555 file path=opt/zfs-tests/bin/mkholes mode=0555 file path=opt/zfs-tests/bin/mktree mode=0555 file path=opt/zfs-tests/bin/mmapwrite mode=0555 @@ -794,6 +795,8 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_005_pos \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos \ mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/cache_001_pos \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg \ diff --git a/usr/src/test/zfs-tests/cmd/Makefile b/usr/src/test/zfs-tests/cmd/Makefile index 031b8ff1f6..5bc6db13c3 100644 --- a/usr/src/test/zfs-tests/cmd/Makefile +++ b/usr/src/test/zfs-tests/cmd/Makefile @@ -24,6 +24,7 @@ SUBDIRS = chg_usr_exec \ getholes \ largest_file \ mkbusy \ + mkfiles \ mkholes \ mktree \ mmapwrite \ diff --git a/usr/src/test/zfs-tests/cmd/Makefile b/usr/src/test/zfs-tests/cmd/mkfiles/Makefile similarity index 53% copy from usr/src/test/zfs-tests/cmd/Makefile copy to usr/src/test/zfs-tests/cmd/mkfiles/Makefile index 031b8ff1f6..7e833fefb1 100644 --- a/usr/src/test/zfs-tests/cmd/Makefile +++ b/usr/src/test/zfs-tests/cmd/mkfiles/Makefile @@ -10,27 +10,14 @@ # # -# Copyright (c) 2012, 2014 by Delphix. All rights reserved. +# Copyright (c) 2016 by Delphix. All rights reserved. # -.PARALLEL: $(SUBDIRS) +PROG = mkfiles -SUBDIRS = chg_usr_exec \ - devname2devid \ - dir_rd_update \ - file_check \ - file_trunc \ - file_write \ - getholes \ - largest_file \ - mkbusy \ - mkholes \ - mktree \ - mmapwrite \ - randfree_file \ - readmmap \ - rename_dir \ - rm_lnkcnt_zero_file \ - scripts +include $(SRC)/cmd/Makefile.cmd -include $(SRC)/test/Makefile.com +LDLIBS += -lc +C99MODE = -xc99=%all + +include ../Makefile.subdirs diff --git a/usr/src/test/zfs-tests/cmd/mkfiles/mkfiles.c b/usr/src/test/zfs-tests/cmd/mkfiles/mkfiles.c new file mode 100644 index 0000000000..58c7d5f509 --- /dev/null +++ b/usr/src/test/zfs-tests/cmd/mkfiles/mkfiles.c @@ -0,0 +1,63 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2016 by Delphix. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define MAX_INT_LENGTH 10 + +static void +usage(char *msg, int exit_value) +{ + (void) fprintf(stderr, "mkfiles basename max_file [min_file]\n"); + (void) fprintf(stderr, "%s\n", msg); + exit(exit_value); +} + +int +main(int argc, char **argv) +{ + unsigned int numfiles = 0; + unsigned int first_file = 0; + if (argc < 3 || argc > 4) + usage("Invalid number of arguments", -1); + + if (sscanf(argv[2], "%u", &numfiles) != 1) + usage("Invalid maximum file", -2); + + if (argc == 4 && sscanf(argv[3], "%u", &first_file) != 1) + usage("Invalid first file", -3); + + if (numfiles < first_file) + usage("First file larger than last file", -3); + + char buf[MAXPATHLEN]; + for (unsigned int i = first_file; i <= numfiles; i++) { + int fd; + (void) snprintf(buf, MAXPATHLEN, "%s%u", argv[1], i); + if ((fd = open(buf, O_CREAT | O_EXCL, O_RDWR)) == -1) { + (void) fprintf(stderr, "Failed to create %s %s\n", buf, + strerror(errno)); + return (-4); + } + (void) close(fd); + } + return (0); +} diff --git a/usr/src/test/zfs-tests/include/default.cfg b/usr/src/test/zfs-tests/include/default.cfg index fa57c978a7..61fb25e628 100644 --- a/usr/src/test/zfs-tests/include/default.cfg +++ b/usr/src/test/zfs-tests/include/default.cfg @@ -25,7 +25,7 @@ # # -# Copyright (c) 2012, 2014 by Delphix. All rights reserved. +# Copyright (c) 2012, 2016 by Delphix. All rights reserved. # . $STF_SUITE/include/commands.cfg @@ -50,6 +50,7 @@ export FILE_WRITE="/opt/zfs-tests/bin/file_write" export GETHOLES="/opt/zfs-tests/bin/getholes" export LARGEST_FILE="/opt/zfs-tests/bin/largest_file" export MKBUSY="/opt/zfs-tests/bin/mkbusy" +export MKFILES="/opt/zfs-tests/bin/mkfiles" export MKHOLES="/opt/zfs-tests/bin/mkholes" export MKTREE="/opt/zfs-tests/bin/mktree" export MMAPWRITE="/opt/zfs-tests/bin/mmapwrite" diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run index bc6c80d491..bcd19ae686 100644 --- a/usr/src/test/zfs-tests/runfiles/delphix.run +++ b/usr/src/test/zfs-tests/runfiles/delphix.run @@ -168,7 +168,8 @@ tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_send] tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', - 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos'] + 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', + 'zfs_send_007_pos'] [/opt/zfs-tests/tests/functional/cli_root/zfs_set] tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh index f9c1ec4a5f..b1fbff2976 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_010_pos.ksh @@ -137,6 +137,11 @@ $TOUCH $mntpnt2/f18 $RM $mntpnt/h17 $RM $mntpnt2/h* +# Add empty objects to $fs to exercise dmu_traverse code +for i in `seq 1 100`; do + log_must touch $mntpnt/uf$i +done + log_must $ZFS snapshot $fs@s1 log_must $ZFS snapshot $fs2@s1 diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/Makefile index e8d5a70bf6..2647f8bbe4 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/Makefile +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/Makefile @@ -10,7 +10,7 @@ # # -# Copyright (c) 2012 by Delphix. All rights reserved. +# Copyright (c) 2012, 2015 by Delphix. All rights reserved. # include $(SRC)/Makefile.master @@ -25,7 +25,8 @@ PROGS = cleanup \ zfs_send_003_pos \ zfs_send_004_neg \ zfs_send_005_pos \ - zfs_send_006_pos + zfs_send_006_pos \ + zfs_send_007_pos FILES = zfs_send.cfg diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh new file mode 100644 index 0000000000..13ae4f0248 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh @@ -0,0 +1,99 @@ +#!/bin/ksh +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2015, 2016 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +# +# DESCRIPTION: +# Verify 'zfs send' drills holes appropriately when files are replaced +# +# STRATEGY: +# 1. Create dataset +# 2. Write block 0 in a bunch of files +# 3. Snapshot the dataset +# 4. Remove all the files and rewrite some files with just block 1 +# 5. Snapshot the dataset +# 6. Send both snapshots and receive them locally +# 7. diff the received dataset and the old datasets. +# 8. Repeat steps 1-7 above with pool that never had hole birth enabled. +# + +verify_runnable "both" + +function cleanup +{ + $ZFS destroy -rf $TESTPOOL/fs + $ZFS destroy -rf $TESTPOOL/recvfs + $RM $streamfile + $RM $vdev + $ZPOOL destroy testpool +} + + +log_assert "Verify that 'zfs send' drills appropriate holes" +log_onexit cleanup +streamfile=$(mktemp /var/tmp/file.XXXXXX) +vdev=$(mktemp /var/tmp/file.XXXXXX) + + +test_pool () +{ + POOL=$1 + log_must $ZFS create -o recordsize=512 $POOL/fs + mntpnt=$(get_prop mountpoint "$POOL/fs") + log_must $DD if=/dev/urandom of=${mntpnt}/file bs=512 count=1 2>/dev/null + first_object=$(ls -i $mntpnt | awk '{print $1}') + log_must $ZFS snapshot $POOL/fs@a + while true; do + log_must $FIND $mntpnt -delete + sync + log_must $MKFILES "$mntpnt/" 4000 + FILE=$(ls -i $mntpnt | awk \ + '{if ($1 == '$first_object') {print $2}}') + if [[ -n "$FILE" ]]; then + break + fi + done + $DD if=/dev/urandom of=${mntpnt}/$FILE bs=512 count=1 seek=1 2>/dev/null + + log_must $ZFS snapshot $POOL/fs@b + + log_must eval "$ZFS send $POOL/fs@a > $streamfile" + $CAT $streamfile | log_must $ZFS receive $POOL/recvfs + + log_must eval "$ZFS send -i @a $POOL/fs@b > $streamfile" + $CAT $streamfile | log_must $ZFS receive $POOL/recvfs + + recv_mntpnt=$(get_prop mountpoint "$POOL/recvfs") + log_must $DIFF -r $mntpnt $recv_mntpnt + log_must $ZFS destroy -rf $POOL/fs + log_must $ZFS destroy -rf $POOL/recvfs +} + +test_pool $TESTPOOL +log_must $TRUNCATE --size=1G $vdev +log_must $ZPOOL create -o version=1 testpool $vdev +test_pool testpool +log_must $ZPOOL destroy testpool +log_must $ZPOOL create -d testpool $vdev +test_pool testpool +log_must $ZPOOL destroy testpool + + +log_pass "'zfs send' drills appropriate holes" diff --git a/usr/src/uts/common/fs/zfs/dmu_object.c b/usr/src/uts/common/fs/zfs/dmu_object.c index 6ca021eecb..2c9802f51e 100644 --- a/usr/src/uts/common/fs/zfs/dmu_object.c +++ b/usr/src/uts/common/fs/zfs/dmu_object.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2014 by Delphix. All rights reserved. + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. * Copyright 2014 HybridCluster. All rights reserved. */ @@ -50,6 +50,12 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, * reasonably sparse (at most 1/4 full). Look from the * beginning once, but after that keep looking from here. * If we can't find one, just keep going from here. + * + * Note that dmu_traverse depends on the behavior that we use + * multiple blocks of the dnode object before going back to + * reuse objects. Any change to this algorithm should preserve + * that property or find another solution to the issues + * described in traverse_visitbp. */ if (P2PHASE(object, L2_dnode_count) == 0) { uint64_t offset = restarted ? object << DNODE_SHIFT : 0; diff --git a/usr/src/uts/common/fs/zfs/dmu_traverse.c b/usr/src/uts/common/fs/zfs/dmu_traverse.c index e8739eddaf..2822ca4525 100644 --- a/usr/src/uts/common/fs/zfs/dmu_traverse.c +++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2016 by Delphix. All rights reserved. */ #include @@ -62,6 +62,7 @@ typedef struct traverse_data { uint64_t td_hole_birth_enabled_txg; blkptr_cb_t *td_func; void *td_arg; + boolean_t td_realloc_possible; } traverse_data_t; static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, @@ -231,18 +232,30 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, if (bp->blk_birth == 0) { /* - * Since this block has a birth time of 0 it must be a - * hole created before the SPA_FEATURE_HOLE_BIRTH - * feature was enabled. If SPA_FEATURE_HOLE_BIRTH - * was enabled before the min_txg for this traveral we - * know the hole must have been created before the - * min_txg for this traveral, so we can skip it. If - * SPA_FEATURE_HOLE_BIRTH was enabled after the min_txg - * for this traveral we cannot tell if the hole was - * created before or after the min_txg for this - * traversal, so we cannot skip it. + * Since this block has a birth time of 0 it must be one of + * two things: a hole created before the + * SPA_FEATURE_HOLE_BIRTH feature was enabled, or a hole + * which has always been a hole in an object. + * + * If a file is written sparsely, then the unwritten parts of + * the file were "always holes" -- that is, they have been + * holes since this object was allocated. However, we (and + * our callers) can not necessarily tell when an object was + * allocated. Therefore, if it's possible that this object + * was freed and then its object number reused, we need to + * visit all the holes with birth==0. + * + * If it isn't possible that the object number was reused, + * then if SPA_FEATURE_HOLE_BIRTH was enabled before we wrote + * all the blocks we will visit as part of this traversal, + * then this hole must have always existed, so we can skip + * it. We visit blocks born after (exclusive) td_min_txg. + * + * Note that the meta-dnode cannot be reallocated. */ - if (td->td_hole_birth_enabled_txg < td->td_min_txg) + if ((!td->td_realloc_possible || + zb->zb_object == DMU_META_DNODE_OBJECT) && + td->td_hole_birth_enabled_txg <= td->td_min_txg) return (0); } else if (bp->blk_birth <= td->td_min_txg) { return (0); @@ -337,6 +350,15 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, objset_phys_t *osp = buf->b_data; prefetch_dnode_metadata(td, &osp->os_meta_dnode, zb->zb_objset, DMU_META_DNODE_OBJECT); + /* + * See the block comment above for the goal of this variable. + * If the maxblkid of the meta-dnode is 0, then we know that + * we've never had more than DNODES_PER_BLOCK objects in the + * dataset, which means we can't have reused any object ids. + */ + if (osp->os_meta_dnode.dn_maxblkid == 0) + td->td_realloc_possible = B_FALSE; + if (arc_buf_size(buf) >= sizeof (objset_phys_t)) { prefetch_dnode_metadata(td, &osp->os_groupused_dnode, zb->zb_objset, DMU_GROUPUSED_OBJECT); @@ -543,12 +565,13 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, td.td_pfd = &pd; td.td_flags = flags; td.td_paused = B_FALSE; + td.td_realloc_possible = (txg_start == 0 ? B_FALSE : B_TRUE); if (spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) { VERIFY(spa_feature_enabled_txg(spa, SPA_FEATURE_HOLE_BIRTH, &td.td_hole_birth_enabled_txg)); } else { - td.td_hole_birth_enabled_txg = 0; + td.td_hole_birth_enabled_txg = UINT64_MAX; } pd.pd_flags = flags; -- 2.11.4.GIT