2 implementation of the update record control
4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/time.h"
28 #include "lib/tdb_wrap/tdb_wrap.h"
29 #include "lib/util/debug.h"
30 #include "lib/util/samba_util.h"
31 #include "lib/util/sys_rw.h"
32 #include "lib/util/util_process.h"
34 #include "ctdb_private.h"
35 #include "ctdb_client.h"
37 #include "common/system.h"
38 #include "common/common.h"
39 #include "common/logging.h"
41 struct ctdb_persistent_write_state
{
42 struct ctdb_db_context
*ctdb_db
;
43 struct ctdb_marshall_buffer
*m
;
44 struct ctdb_req_control_old
*c
;
48 /* don't create/update records that does not exist locally */
49 #define UPDATE_FLAGS_REPLACE_ONLY 1
52 called from a child process to write the data
54 static int ctdb_persistent_store(struct ctdb_persistent_write_state
*state
)
57 struct ctdb_rec_data_old
*rec
= NULL
;
58 struct ctdb_marshall_buffer
*m
= state
->m
;
60 ret
= tdb_transaction_start(state
->ctdb_db
->ltdb
->tdb
);
62 DEBUG(DEBUG_ERR
,("Failed to start transaction for db_id 0x%08x in ctdb_persistent_store\n",
63 state
->ctdb_db
->db_id
));
67 for (i
=0;i
<m
->count
;i
++) {
68 struct ctdb_ltdb_header oldheader
;
69 struct ctdb_ltdb_header header
;
70 TDB_DATA key
, data
, olddata
;
71 TALLOC_CTX
*tmp_ctx
= talloc_new(state
);
73 rec
= ctdb_marshall_loop_next(m
, rec
, NULL
, &header
, &key
, &data
);
76 DEBUG(DEBUG_ERR
,("Failed to get next record %d for db_id 0x%08x in ctdb_persistent_store\n",
77 i
, state
->ctdb_db
->db_id
));
82 /* we must check if the record exists or not because
83 ctdb_ltdb_fetch will unconditionally create a record
85 if (state
->flags
& UPDATE_FLAGS_REPLACE_ONLY
) {
87 trec
= tdb_fetch(state
->ctdb_db
->ltdb
->tdb
, key
);
88 if (trec
.dsize
== 0) {
95 /* fetch the old header and ensure the rsn is less than the new rsn */
96 ret
= ctdb_ltdb_fetch(state
->ctdb_db
, key
, &oldheader
, tmp_ctx
, &olddata
);
98 DEBUG(DEBUG_ERR
,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n",
99 state
->ctdb_db
->db_id
));
100 talloc_free(tmp_ctx
);
104 if (oldheader
.rsn
>= header
.rsn
&&
105 (olddata
.dsize
!= data
.dsize
||
106 memcmp(olddata
.dptr
, data
.dptr
, data
.dsize
) != 0)) {
107 DEBUG(DEBUG_CRIT
,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n",
108 state
->ctdb_db
->db_id
,
109 (unsigned long long)oldheader
.rsn
, (unsigned long long)header
.rsn
));
110 talloc_free(tmp_ctx
);
114 talloc_free(tmp_ctx
);
116 ret
= ctdb_ltdb_store(state
->ctdb_db
, key
, &header
, data
);
118 DEBUG(DEBUG_CRIT
,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n",
119 state
->ctdb_db
->db_id
));
124 ret
= tdb_transaction_commit(state
->ctdb_db
->ltdb
->tdb
);
126 DEBUG(DEBUG_ERR
,("Failed to commit transaction for db_id 0x%08x in ctdb_persistent_store\n",
127 state
->ctdb_db
->db_id
));
134 tdb_transaction_cancel(state
->ctdb_db
->ltdb
->tdb
);
140 called when we the child has completed the persistent write
143 static void ctdb_persistent_write_callback(int status
, void *private_data
)
145 struct ctdb_persistent_write_state
*state
= talloc_get_type(private_data
,
146 struct ctdb_persistent_write_state
);
149 ctdb_request_control_reply(state
->ctdb_db
->ctdb
, state
->c
, NULL
, status
, NULL
);
155 called if our lockwait child times out
157 static void ctdb_persistent_lock_timeout(struct tevent_context
*ev
,
158 struct tevent_timer
*te
,
159 struct timeval t
, void *private_data
)
161 struct ctdb_persistent_write_state
*state
= talloc_get_type(private_data
,
162 struct ctdb_persistent_write_state
);
163 ctdb_request_control_reply(state
->ctdb_db
->ctdb
, state
->c
, NULL
, -1, "timeout in ctdb_persistent_lock");
167 struct childwrite_handle
{
168 struct ctdb_context
*ctdb
;
169 struct ctdb_db_context
*ctdb_db
;
170 struct tevent_fd
*fde
;
174 void (*callback
)(int, void *);
175 struct timeval start_time
;
178 static int childwrite_destructor(struct childwrite_handle
*h
)
180 CTDB_DECREMENT_STAT(h
->ctdb
, pending_childwrite_calls
);
181 ctdb_kill(h
->ctdb
, h
->child
, SIGKILL
);
185 /* called when the child process has finished writing the record to the
188 static void childwrite_handler(struct tevent_context
*ev
,
189 struct tevent_fd
*fde
,
190 uint16_t flags
, void *private_data
)
192 struct childwrite_handle
*h
= talloc_get_type(private_data
,
193 struct childwrite_handle
);
194 void *p
= h
->private_data
;
195 void (*callback
)(int, void *) = h
->callback
;
196 pid_t child
= h
->child
;
197 TALLOC_CTX
*tmp_ctx
= talloc_new(ev
);
201 CTDB_UPDATE_LATENCY(h
->ctdb
, h
->ctdb_db
, "persistent", childwrite_latency
, h
->start_time
);
202 CTDB_DECREMENT_STAT(h
->ctdb
, pending_childwrite_calls
);
204 /* the handle needs to go away when the context is gone - when
205 the handle goes away this implicitly closes the pipe, which
207 talloc_steal(tmp_ctx
, h
);
209 talloc_set_destructor(h
, NULL
);
211 ret
= sys_read(h
->fd
[0], &c
, 1);
213 DEBUG(DEBUG_ERR
, (__location__
" Read returned %d. Childwrite failed\n", ret
));
219 ctdb_kill(h
->ctdb
, child
, SIGKILL
);
220 talloc_free(tmp_ctx
);
223 /* this creates a child process which will take out a tdb transaction
224 and write the record to the database.
226 static struct childwrite_handle
*ctdb_childwrite(
227 struct ctdb_db_context
*ctdb_db
,
228 void (*callback
)(int, void *private_data
),
229 struct ctdb_persistent_write_state
*state
)
231 struct childwrite_handle
*result
;
233 pid_t parent
= getpid();
235 CTDB_INCREMENT_STAT(ctdb_db
->ctdb
, childwrite_calls
);
236 CTDB_INCREMENT_STAT(ctdb_db
->ctdb
, pending_childwrite_calls
);
238 if (!(result
= talloc_zero(state
, struct childwrite_handle
))) {
239 CTDB_DECREMENT_STAT(ctdb_db
->ctdb
, pending_childwrite_calls
);
243 ret
= pipe(result
->fd
);
247 CTDB_DECREMENT_STAT(ctdb_db
->ctdb
, pending_childwrite_calls
);
251 result
->child
= ctdb_fork(ctdb_db
->ctdb
);
253 if (result
->child
== (pid_t
)-1) {
254 close(result
->fd
[0]);
255 close(result
->fd
[1]);
257 CTDB_DECREMENT_STAT(ctdb_db
->ctdb
, pending_childwrite_calls
);
261 result
->callback
= callback
;
262 result
->private_data
= state
;
263 result
->ctdb
= ctdb_db
->ctdb
;
264 result
->ctdb_db
= ctdb_db
;
266 if (result
->child
== 0) {
269 close(result
->fd
[0]);
270 prctl_set_comment("ctdb_write_persistent");
271 ret
= ctdb_persistent_store(state
);
273 DEBUG(DEBUG_ERR
, (__location__
" Failed to write persistent data\n"));
277 sys_write(result
->fd
[1], &c
, 1);
279 ctdb_wait_for_process_to_exit(parent
);
283 close(result
->fd
[1]);
284 set_close_on_exec(result
->fd
[0]);
286 talloc_set_destructor(result
, childwrite_destructor
);
288 DEBUG(DEBUG_DEBUG
, (__location__
" Created PIPE FD:%d for ctdb_childwrite\n", result
->fd
[0]));
290 result
->fde
= tevent_add_fd(ctdb_db
->ctdb
->ev
, result
, result
->fd
[0],
291 TEVENT_FD_READ
, childwrite_handler
,
293 if (result
->fde
== NULL
) {
295 CTDB_DECREMENT_STAT(ctdb_db
->ctdb
, pending_childwrite_calls
);
298 tevent_fd_set_auto_close(result
->fde
);
300 result
->start_time
= timeval_current();
306 update a record on this node if the new record has a higher rsn than the
309 int32_t ctdb_control_update_record(struct ctdb_context
*ctdb
,
310 struct ctdb_req_control_old
*c
, TDB_DATA recdata
,
313 struct ctdb_db_context
*ctdb_db
;
314 struct ctdb_persistent_write_state
*state
;
315 struct childwrite_handle
*handle
;
316 struct ctdb_marshall_buffer
*m
= (struct ctdb_marshall_buffer
*)recdata
.dptr
;
318 if (ctdb
->recovery_mode
!= CTDB_RECOVERY_NORMAL
) {
319 DEBUG(DEBUG_INFO
,("rejecting ctdb_control_update_record when recovery active\n"));
323 ctdb_db
= find_ctdb_db(ctdb
, m
->db_id
);
324 if (ctdb_db
== NULL
) {
325 DEBUG(DEBUG_ERR
,("Unknown database 0x%08x in ctdb_control_update_record\n", m
->db_id
));
329 if (ctdb_db
->unhealthy_reason
) {
330 DEBUG(DEBUG_ERR
,("db(%s) unhealty in ctdb_control_update_record: %s\n",
331 ctdb_db
->db_name
, ctdb_db
->unhealthy_reason
));
335 state
= talloc(ctdb
, struct ctdb_persistent_write_state
);
336 CTDB_NO_MEMORY(ctdb
, state
);
338 state
->ctdb_db
= ctdb_db
;
342 if (ctdb_db_volatile(ctdb_db
)) {
343 state
->flags
= UPDATE_FLAGS_REPLACE_ONLY
;
346 /* create a child process to take out a transaction and
349 handle
= ctdb_childwrite(ctdb_db
, ctdb_persistent_write_callback
, state
);
350 if (handle
== NULL
) {
351 DEBUG(DEBUG_ERR
,("Failed to setup childwrite handler in ctdb_control_update_record\n"));
356 /* we need to wait for the replies */
359 /* need to keep the control structure around */
360 talloc_steal(state
, c
);
362 /* but we won't wait forever */
363 tevent_add_timer(ctdb
->ev
, state
,
364 timeval_current_ofs(ctdb
->tunable
.control_timeout
, 0),
365 ctdb_persistent_lock_timeout
, state
);