Skip to content

Commit 015fb8b

Browse files
committed
Fix double spares for failed vdev
It's possible for two spares to get attached to a single failed vdev. This happens when you have a failed disk that is spared, and then you replace the failed disk with a new disk, but during the resilver the new disk fails, and ZED kicks in a spare for the failed new disk. This commit checks for that condition and disallows it. Closes: #16547 Signed-off-by: Tony Hutter <hutter2@llnl.gov>
1 parent 8d14897 commit 015fb8b

File tree

4 files changed

+209
-4
lines changed

4 files changed

+209
-4
lines changed

module/zfs/spa.c

+82
Original file line numberDiff line numberDiff line change
@@ -7430,6 +7430,82 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot, boolean_t check_ashift)
74307430
return (0);
74317431
}
74327432

7433+
/*
7434+
* Given a vdev to be replaced and its parent, check for a possible
7435+
* "double spare" condition if a vdev is to be replaced by a spare. When this
7436+
* happens, you can get two spares assigned to one failed vdev.
7437+
*
7438+
* To trigger a double spare condition:
7439+
*
7440+
* 1. disk1 fails
7441+
* 2. 1st spare is kicked in for disk1 and it resilvers
7442+
* 3. Someone replaces disk1 with a new blank disk
7443+
* 4. New blank disk starts resilvering
7444+
* 5. While resilvering, new blank disk has IO errors and faults
7445+
* 6. 2nd spare is kicked in for new blank disk
7446+
* 7. At this point two spares are kicked in for the original disk1.
7447+
*
7448+
* It looks like this:
7449+
*
7450+
* NAME STATE READ WRITE CKSUM
7451+
* tank2 DEGRADED 0 0 0
7452+
* draid2:6d:10c:2s-0 DEGRADED 0 0 0
7453+
* scsi-0QEMU_QEMU_HARDDISK_d1 ONLINE 0 0 0
7454+
* scsi-0QEMU_QEMU_HARDDISK_d2 ONLINE 0 0 0
7455+
* scsi-0QEMU_QEMU_HARDDISK_d3 ONLINE 0 0 0
7456+
* scsi-0QEMU_QEMU_HARDDISK_d4 ONLINE 0 0 0
7457+
* scsi-0QEMU_QEMU_HARDDISK_d5 ONLINE 0 0 0
7458+
* scsi-0QEMU_QEMU_HARDDISK_d6 ONLINE 0 0 0
7459+
* scsi-0QEMU_QEMU_HARDDISK_d7 ONLINE 0 0 0
7460+
* scsi-0QEMU_QEMU_HARDDISK_d8 ONLINE 0 0 0
7461+
* scsi-0QEMU_QEMU_HARDDISK_d9 ONLINE 0 0 0
7462+
* spare-9 DEGRADED 0 0 0
7463+
* replacing-0 DEGRADED 0 93 0
7464+
* scsi-0QEMU_QEMU_HARDDISK_d10-part1/old UNAVAIL 0 0 0
7465+
* spare-1 DEGRADED 0 0 0
7466+
* scsi-0QEMU_QEMU_HARDDISK_d10 REMOVED 0 0 0
7467+
* draid2-0-0 ONLINE 0 0 0
7468+
* draid2-0-1 ONLINE 0 0 0
7469+
* spares
7470+
* draid2-0-0 INUSE currently in use
7471+
* draid2-0-1 INUSE currently in use
7472+
*
7473+
* ARGS:
7474+
*
7475+
* newvd: New spare disk
7476+
* pvd: Parent vdev_t the spare should attach to
7477+
*
7478+
* This function returns B_TRUE if adding the new vdev would create a double
7479+
* spare condition, B_FALSE otherwise.
7480+
*/
7481+
static boolean_t
7482+
spa_vdev_new_spare_would_cause_double_spares(vdev_t *newvd, vdev_t *pvd)
7483+
{
7484+
vdev_t *ppvd;
7485+
7486+
ppvd = pvd->vdev_parent;
7487+
if (ppvd == NULL)
7488+
return (B_FALSE);
7489+
7490+
/*
7491+
* To determine if this configuration would cause a double spare, we
7492+
* look at the vdev_op of the parent vdev, and of the parent's parent
7493+
* vdev. We also look at vdev_isspare on the new disk. A double spare
7494+
* condition looks like this:
7495+
*
7496+
* 1. parent of parent's op is a spare or draid spare
7497+
* 2. parent's op is replacing
7498+
* 3. new disk is a spare
7499+
*/
7500+
if ((ppvd->vdev_ops == &vdev_spare_ops) ||
7501+
(ppvd->vdev_ops == &vdev_draid_spare_ops))
7502+
if (pvd->vdev_ops == &vdev_replacing_ops)
7503+
if (newvd->vdev_isspare)
7504+
return (B_TRUE);
7505+
7506+
return (B_FALSE);
7507+
}
7508+
74337509
/*
74347510
* Attach a device to a vdev specified by its guid. The vdev type can be
74357511
* a mirror, a raidz, or a leaf device that is also a top-level (e.g. a
@@ -7604,6 +7680,12 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
76047680
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
76057681
}
76067682

7683+
if (spa_vdev_new_spare_would_cause_double_spares(newvd, pvd)) {
7684+
vdev_dbgmsg(newvd,
7685+
"disk would create double spares, ignore.");
7686+
return (spa_vdev_exit(spa, newrootvd, txg, EEXIST));
7687+
}
7688+
76077689
if (newvd->vdev_isspare)
76087690
pvops = &vdev_spare_ops;
76097691
else

tests/runfiles/linux.run

+4-4
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,10 @@ tags = ['functional', 'fallocate']
123123
[tests/functional/fault:Linux]
124124
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
125125
'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos',
126-
'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift',
127-
'auto_spare_shared', 'decrypt_fault', 'decompress_fault',
128-
'fault_limits', 'scrub_after_resilver', 'suspend_on_probe_errors',
129-
'suspend_resume_single', 'zpool_status_-s']
126+
'auto_spare_002_pos', 'auto_spare_double', 'auto_spare_multiple',
127+
'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault',
128+
'decompress_fault', 'fault_limits', 'scrub_after_resilver',
129+
'suspend_on_probe_errors', 'suspend_resume_single', 'zpool_status_-s']
130130
tags = ['functional', 'fault']
131131

132132
[tests/functional/features/large_dnode:Linux]

tests/zfs-tests/tests/Makefile.am

+1
Original file line numberDiff line numberDiff line change
@@ -1535,6 +1535,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
15351535
functional/fault/auto_spare_001_pos.ksh \
15361536
functional/fault/auto_spare_002_pos.ksh \
15371537
functional/fault/auto_spare_ashift.ksh \
1538+
functional/fault/auto_spare_double.ksh \
15381539
functional/fault/auto_spare_multiple.ksh \
15391540
functional/fault/auto_spare_shared.ksh \
15401541
functional/fault/cleanup.ksh \
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
#!/bin/ksh -p
2+
# SPDX-License-Identifier: CDDL-1.0
3+
#
4+
# CDDL HEADER START
5+
#
6+
# This file and its contents are supplied under the terms of the
7+
# Common Development and Distribution License ("CDDL"), version 1.0.
8+
# You may only use this file in accordance with the terms of version
9+
# 1.0 of the CDDL.
10+
#
11+
# A full copy of the text of the CDDL should have accompanied this
12+
# source. A copy of the CDDL is also available via the Internet at
13+
# http://www.illumos.org/license/CDDL.
14+
#
15+
# CDDL HEADER END
16+
#
17+
18+
#
19+
# Copyright (c) 2025 by Lawrence Livermore National Security, LLC.
20+
#
21+
22+
# DESCRIPTION:
23+
# Try do induce a double spare condition and verify we're prevented from doing
24+
# it.
25+
#
26+
# STRATEGY:
27+
# 1. Fail a drive
28+
# 2. Kick in first spare
29+
# 3. Bring new drive back online and start resilvering to it
30+
# 4. Immediately after the resilver starts, fail the new drive.
31+
# 5. Try to kick in the second spare for the new drive (which is now failed)
32+
# 6. Verify that we can't kick in the second spare
33+
#
34+
# Repeat this test for both traditional spares and dRAID spares.
35+
#
36+
. $STF_SUITE/include/libtest.shlib
37+
38+
LAST_VDEV=3
39+
SIZE_MB=300
40+
41+
ZED_PID="$(zed_check)"
42+
43+
function cleanup
44+
{
45+
destroy_pool $TESTPOOL
46+
for i in {0..$LAST_VDEV} ; do
47+
log_must rm -f $TEST_BASE_DIR/file$i
48+
done
49+
50+
# Restore ZED if it was running before this test
51+
if [ -n $ZED_PID ] ; then
52+
log_must zed_start
53+
fi
54+
}
55+
56+
log_assert "Cannot attach two spares to same failed vdev"
57+
log_onexit cleanup
58+
59+
# Stop ZED if it's running
60+
if [ -n $ZED_PID ] ; then
61+
log_must zed_stop
62+
fi
63+
64+
log_must truncate -s ${SIZE_MB}M $TEST_BASE_DIR/file{0..$LAST_VDEV}
65+
66+
ZFS_DBGMSG=/proc/spl/kstat/zfs/dbgmsg
67+
68+
# Run the test - we assume the pool is already created.
69+
# $1: disk to fail
70+
# $2: 1st spare name
71+
# $3: 2nd spare name
72+
function do_test {
73+
FAIL_DRIVE=$1
74+
SPARE0=$2
75+
SPARE1=$3
76+
echo 0 > $ZFS_DBGMSG
77+
log_must zpool status
78+
79+
log_note "Kicking in first spare ($SPARE0)"
80+
log_must zpool offline -f $TESTPOOL $FAIL_DRIVE
81+
log_must zpool replace $TESTPOOL $FAIL_DRIVE $SPARE0
82+
83+
# Fill the pool with data to make the resilver take a little
84+
# time.
85+
dd if=/dev/zero of=/$TESTPOOL/testfile bs=1M || true
86+
87+
# Zero our failed disk. It will appear as a blank.
88+
rm -f $FAIL_DRIVE
89+
truncate -s ${SIZE_MB}M $FAIL_DRIVE
90+
91+
# Attempt to replace our failed disk, then immediately fault it.
92+
log_must zpool replace $TESTPOOL $FAIL_DRIVE
93+
log_must zpool offline -f $TESTPOOL $FAIL_DRIVE
94+
log_must check_state $TESTPOOL $FAIL_DRIVE "faulted"
95+
96+
log_note "Kicking in second spare ($SPARE0)... This should not work..."
97+
log_mustnot zpool replace $TESTPOOL $FAIL_DRIVE $SPARE1
98+
# Verify the debug line in dbgmsg
99+
log_must grep 'disk would create double spares' $ZFS_DBGMSG
100+
101+
# Disk should still be faulted
102+
log_must check_state $TESTPOOL $FAIL_DRIVE "faulted"
103+
}
104+
105+
# Test with traditional spares
106+
log_must zpool create -O compression=off -O recordsize=4k -O primarycache=none \
107+
$TESTPOOL mirror $TEST_BASE_DIR/file{0,1} spare $TEST_BASE_DIR/file{2,3}
108+
do_test $TEST_BASE_DIR/file1 $TEST_BASE_DIR/file2 $TEST_BASE_DIR/file3
109+
destroy_pool $TESTPOOL
110+
111+
# Clear vdev files for next test
112+
for i in {0..$LAST_VDEV} ; do
113+
log_must rm -f $TEST_BASE_DIR/file$i
114+
done
115+
log_must truncate -s ${SIZE_MB}M $TEST_BASE_DIR/file{0..$LAST_VDEV}
116+
117+
# Test with dRAID spares
118+
log_must zpool create -O compression=off -O recordsize=4k -O primarycache=none \
119+
$TESTPOOL draid1:1d:4c:2s $TEST_BASE_DIR/file{0..3}
120+
do_test $TEST_BASE_DIR/file1 draid1-0-0 draid1-0-1
121+
122+
log_pass "Verified we cannot attach two spares to same failed vdev"

0 commit comments

Comments
 (0)