ctdb: Use prctl_set_comment from lib/util
[Samba.git] / ctdb / server / ctdb_update_record.c
blobbc9c6fe06c37adc67605110f25a9fb7956f63f7d
1 /*
2 implementation of the update record control
4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/time.h"
25 #include <talloc.h>
26 #include <tevent.h>
28 #include "lib/tdb_wrap/tdb_wrap.h"
29 #include "lib/util/debug.h"
30 #include "lib/util/samba_util.h"
31 #include "lib/util/util_process.h"
33 #include "ctdb_private.h"
34 #include "ctdb_client.h"
36 #include "common/system.h"
37 #include "common/common.h"
38 #include "common/logging.h"
40 struct ctdb_persistent_write_state {
41 struct ctdb_db_context *ctdb_db;
42 struct ctdb_marshall_buffer *m;
43 struct ctdb_req_control_old *c;
44 uint32_t flags;
47 /* don't create/update records that does not exist locally */
48 #define UPDATE_FLAGS_REPLACE_ONLY 1
51 called from a child process to write the data
53 static int ctdb_persistent_store(struct ctdb_persistent_write_state *state)
55 int ret, i;
56 struct ctdb_rec_data_old *rec = NULL;
57 struct ctdb_marshall_buffer *m = state->m;
59 ret = tdb_transaction_start(state->ctdb_db->ltdb->tdb);
60 if (ret == -1) {
61 DEBUG(DEBUG_ERR,("Failed to start transaction for db_id 0x%08x in ctdb_persistent_store\n",
62 state->ctdb_db->db_id));
63 return -1;
66 for (i=0;i<m->count;i++) {
67 struct ctdb_ltdb_header oldheader;
68 struct ctdb_ltdb_header header;
69 TDB_DATA key, data, olddata;
70 TALLOC_CTX *tmp_ctx = talloc_new(state);
72 rec = ctdb_marshall_loop_next(m, rec, NULL, &header, &key, &data);
74 if (rec == NULL) {
75 DEBUG(DEBUG_ERR,("Failed to get next record %d for db_id 0x%08x in ctdb_persistent_store\n",
76 i, state->ctdb_db->db_id));
77 talloc_free(tmp_ctx);
78 goto failed;
81 /* we must check if the record exists or not because
82 ctdb_ltdb_fetch will unconditionally create a record
84 if (state->flags & UPDATE_FLAGS_REPLACE_ONLY) {
85 TDB_DATA trec;
86 trec = tdb_fetch(state->ctdb_db->ltdb->tdb, key);
87 if (trec.dsize == 0) {
88 talloc_free(tmp_ctx);
89 continue;
91 free(trec.dptr);
94 /* fetch the old header and ensure the rsn is less than the new rsn */
95 ret = ctdb_ltdb_fetch(state->ctdb_db, key, &oldheader, tmp_ctx, &olddata);
96 if (ret != 0) {
97 DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n",
98 state->ctdb_db->db_id));
99 talloc_free(tmp_ctx);
100 goto failed;
103 if (oldheader.rsn >= header.rsn &&
104 (olddata.dsize != data.dsize ||
105 memcmp(olddata.dptr, data.dptr, data.dsize) != 0)) {
106 DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n",
107 state->ctdb_db->db_id,
108 (unsigned long long)oldheader.rsn, (unsigned long long)header.rsn));
109 talloc_free(tmp_ctx);
110 goto failed;
113 talloc_free(tmp_ctx);
115 ret = ctdb_ltdb_store(state->ctdb_db, key, &header, data);
116 if (ret != 0) {
117 DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n",
118 state->ctdb_db->db_id));
119 goto failed;
123 ret = tdb_transaction_commit(state->ctdb_db->ltdb->tdb);
124 if (ret == -1) {
125 DEBUG(DEBUG_ERR,("Failed to commit transaction for db_id 0x%08x in ctdb_persistent_store\n",
126 state->ctdb_db->db_id));
127 return -1;
130 return 0;
132 failed:
133 tdb_transaction_cancel(state->ctdb_db->ltdb->tdb);
134 return -1;
139 called when we the child has completed the persistent write
140 on our behalf
142 static void ctdb_persistent_write_callback(int status, void *private_data)
144 struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
145 struct ctdb_persistent_write_state);
148 ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, status, NULL);
150 talloc_free(state);
154 called if our lockwait child times out
156 static void ctdb_persistent_lock_timeout(struct tevent_context *ev,
157 struct tevent_timer *te,
158 struct timeval t, void *private_data)
160 struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
161 struct ctdb_persistent_write_state);
162 ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_lock");
163 talloc_free(state);
166 struct childwrite_handle {
167 struct ctdb_context *ctdb;
168 struct ctdb_db_context *ctdb_db;
169 struct tevent_fd *fde;
170 int fd[2];
171 pid_t child;
172 void *private_data;
173 void (*callback)(int, void *);
174 struct timeval start_time;
177 static int childwrite_destructor(struct childwrite_handle *h)
179 CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
180 ctdb_kill(h->ctdb, h->child, SIGKILL);
181 return 0;
184 /* called when the child process has finished writing the record to the
185 database
187 static void childwrite_handler(struct tevent_context *ev,
188 struct tevent_fd *fde,
189 uint16_t flags, void *private_data)
191 struct childwrite_handle *h = talloc_get_type(private_data,
192 struct childwrite_handle);
193 void *p = h->private_data;
194 void (*callback)(int, void *) = h->callback;
195 pid_t child = h->child;
196 TALLOC_CTX *tmp_ctx = talloc_new(ev);
197 int ret;
198 char c;
200 CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "persistent", childwrite_latency, h->start_time);
201 CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
203 /* the handle needs to go away when the context is gone - when
204 the handle goes away this implicitly closes the pipe, which
205 kills the child */
206 talloc_steal(tmp_ctx, h);
208 talloc_set_destructor(h, NULL);
210 ret = sys_read(h->fd[0], &c, 1);
211 if (ret < 1) {
212 DEBUG(DEBUG_ERR, (__location__ " Read returned %d. Childwrite failed\n", ret));
213 c = 1;
216 callback(c, p);
218 ctdb_kill(h->ctdb, child, SIGKILL);
219 talloc_free(tmp_ctx);
222 /* this creates a child process which will take out a tdb transaction
223 and write the record to the database.
225 static struct childwrite_handle *ctdb_childwrite(
226 struct ctdb_db_context *ctdb_db,
227 void (*callback)(int, void *private_data),
228 struct ctdb_persistent_write_state *state)
230 struct childwrite_handle *result;
231 int ret;
232 pid_t parent = getpid();
234 CTDB_INCREMENT_STAT(ctdb_db->ctdb, childwrite_calls);
235 CTDB_INCREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
237 if (!(result = talloc_zero(state, struct childwrite_handle))) {
238 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
239 return NULL;
242 ret = pipe(result->fd);
244 if (ret != 0) {
245 talloc_free(result);
246 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
247 return NULL;
250 result->child = ctdb_fork(ctdb_db->ctdb);
252 if (result->child == (pid_t)-1) {
253 close(result->fd[0]);
254 close(result->fd[1]);
255 talloc_free(result);
256 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
257 return NULL;
260 result->callback = callback;
261 result->private_data = state;
262 result->ctdb = ctdb_db->ctdb;
263 result->ctdb_db = ctdb_db;
265 if (result->child == 0) {
266 char c = 0;
268 close(result->fd[0]);
269 prctl_set_comment("ctdb_write_persistent");
270 debug_extra = talloc_asprintf(NULL, "childwrite-%s:", ctdb_db->db_name);
271 ret = ctdb_persistent_store(state);
272 if (ret != 0) {
273 DEBUG(DEBUG_ERR, (__location__ " Failed to write persistent data\n"));
274 c = 1;
277 sys_write(result->fd[1], &c, 1);
279 /* make sure we die when our parent dies */
280 while (ctdb_kill(ctdb_db->ctdb, parent, 0) == 0 || errno != ESRCH) {
281 sleep(5);
283 _exit(0);
286 close(result->fd[1]);
287 set_close_on_exec(result->fd[0]);
289 talloc_set_destructor(result, childwrite_destructor);
291 DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d for ctdb_childwrite\n", result->fd[0]));
293 result->fde = tevent_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
294 TEVENT_FD_READ, childwrite_handler,
295 (void *)result);
296 if (result->fde == NULL) {
297 talloc_free(result);
298 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
299 return NULL;
301 tevent_fd_set_auto_close(result->fde);
303 result->start_time = timeval_current();
305 return result;
309 update a record on this node if the new record has a higher rsn than the
310 current record
312 int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
313 struct ctdb_req_control_old *c, TDB_DATA recdata,
314 bool *async_reply)
316 struct ctdb_db_context *ctdb_db;
317 struct ctdb_persistent_write_state *state;
318 struct childwrite_handle *handle;
319 struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
321 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
322 DEBUG(DEBUG_INFO,("rejecting ctdb_control_update_record when recovery active\n"));
323 return -1;
326 ctdb_db = find_ctdb_db(ctdb, m->db_id);
327 if (ctdb_db == NULL) {
328 DEBUG(DEBUG_ERR,("Unknown database 0x%08x in ctdb_control_update_record\n", m->db_id));
329 return -1;
332 if (ctdb_db->unhealthy_reason) {
333 DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_update_record: %s\n",
334 ctdb_db->db_name, ctdb_db->unhealthy_reason));
335 return -1;
338 state = talloc(ctdb, struct ctdb_persistent_write_state);
339 CTDB_NO_MEMORY(ctdb, state);
341 state->ctdb_db = ctdb_db;
342 state->c = c;
343 state->m = m;
344 state->flags = 0;
345 if (!ctdb_db->persistent) {
346 state->flags = UPDATE_FLAGS_REPLACE_ONLY;
349 /* create a child process to take out a transaction and
350 write the data.
352 handle = ctdb_childwrite(ctdb_db, ctdb_persistent_write_callback, state);
353 if (handle == NULL) {
354 DEBUG(DEBUG_ERR,("Failed to setup childwrite handler in ctdb_control_update_record\n"));
355 talloc_free(state);
356 return -1;
359 /* we need to wait for the replies */
360 *async_reply = true;
362 /* need to keep the control structure around */
363 talloc_steal(state, c);
365 /* but we won't wait forever */
366 tevent_add_timer(ctdb->ev, state,
367 timeval_current_ofs(ctdb->tunable.control_timeout, 0),
368 ctdb_persistent_lock_timeout, state);
370 return 0;