dcesrv_unixinfo: No wbc_context required
[Samba.git] / ctdb / tests / simple / 77_ctdb_db_recovery.sh
blob6dbc0cf639cf8b274622706ea70f3944eb2d5d74
1 #!/bin/bash
3 test_info()
5 cat <<EOF
6 Recovery can under certain circumstances lead to old record copies
7 resurrecting: Recovery selects the newest record copy purely by RSN. At
8 the end of the recovery, the recovery master is the dmaster for all
9 records in all (non-persistent) databases. And the other nodes locally
10 hold the complete copy of the databases. The bug is that the recovery
11 process does not increment the RSN on the recovery master at the end of
12 the recovery. Now clients acting directly on the Recovery master will
13 directly change a record's content on the recmaster without migration
14 and hence without RSN bump. So a subsequent recovery can not tell that
15 the recmaster's copy is newer than the copies on the other nodes, since
16 their RSN is the same. Hence, if the recmaster is not node 0 (or more
17 precisely not the active node with the lowest node number), the recovery
18 will choose copies from nodes with lower number and stick to these.
20 Steps:
22 1. Create a test database
23 2. Add a record with value value1 on recovery master
24 3. Force a recovery
25 4. Update the record with value value2 on recovery master
26 5. Force a recovery
27 6. Fetch the record
29 Expected results:
31 * The record should have value value2 and not value1
33 EOF
36 . "${TEST_SCRIPTS_DIR}/integration.bash"
38 ctdb_test_init "$@"
40 set -e
42 cluster_is_healthy
44 # Reset configuration
45 ctdb_restart_when_done
48 # Main test
50 TESTDB="rec_test.tdb"
52 status=0
54 # Make sure node 0 is not the recovery master
55 echo "find out which node is recmaster"
56 try_command_on_node any $CTDB recmaster
57 recmaster="$out"
58 if [ "$recmaster" = "0" ]; then
59 echo "node 0 is recmaster, disable recmasterrole on node 0"
61 # Note:
62 # It should be sufficient to run "ctdb setrecmasterrole off"
63 # on node 0 and wait for election and recovery to finish.
64 # But there were problems related to this in this automatic
65 # test, so for now use "ctdb stop" and "ctdb continue".
67 echo "stop node 0"
68 try_command_on_node 0 $CTDB stop
69 wait_until_node_has_status 0 stopped
70 echo "continue node 0"
71 try_command_on_node 0 $CTDB continue
72 wait_until_node_has_status 0 notstopped
74 try_command_on_node any $CTDB recmaster
75 recmaster="$out"
76 if [ "$recmaster" = "0" ]; then
77 echo "failed to move recmaster to different node"
78 exit 1
82 echo "Recmaster:$recmaster"
84 # Create a temporary non-persistent database to test with
85 echo "create test database $TESTDB"
86 try_command_on_node $recmaster $CTDB attach $TESTDB
88 # Wipe Test database
89 echo "wipe test database"
90 try_command_on_node $recmaster $CTDB wipedb $TESTDB
92 # Add a record key=test1 data=value1
93 echo "store key(test1) data(value1)"
94 try_command_on_node $recmaster $CTDB writekey $TESTDB test1 value1
96 # Fetch a record key=test1
97 echo "read key(test1)"
98 try_command_on_node $recmaster $CTDB readkey $TESTDB test1
99 echo "$out"
101 # Do a recovery
102 echo "force recovery"
103 try_command_on_node $recmaster $CTDB recover
105 wait_until_node_has_status $recmaster recovered
107 # Add a record key=test1 data=value2
108 echo "store key(test1) data(value2)"
109 try_command_on_node $recmaster $CTDB writekey $TESTDB test1 value2
111 # Fetch a record key=test1
112 echo "read key(test1)"
113 try_command_on_node $recmaster $CTDB readkey $TESTDB test1
114 echo "$out"
116 # Do a recovery
117 echo "force recovery"
118 try_command_on_node $recmaster $CTDB recover
120 wait_until_node_has_status $recmaster recovered
122 # Verify record key=test1
123 echo "read key(test1)"
124 try_command_on_node $recmaster $CTDB readkey $TESTDB test1
125 echo "$out"
126 if [ "$out" = "Data: size:6 ptr:[value2]" ]; then
127 echo "GOOD: Recovery did not corrupt database"
128 else
129 echo "BAD: Recovery corrupted database"
130 status=1
133 exit $status