libcli: Avoid an explicit memset call
[Samba.git] / ctdb / server / ctdb_update_record.c
blob7bfa08ac8b4a572a3a48870dcfc215e2b59c4d96
1 /*
2 implementation of the update record control
4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "includes.h"
22 #include "db_wrap.h"
23 #include "tdb.h"
24 #include "ctdb_private.h"
26 struct ctdb_persistent_write_state {
27 struct ctdb_db_context *ctdb_db;
28 struct ctdb_marshall_buffer *m;
29 struct ctdb_req_control *c;
30 uint32_t flags;
33 /* dont create/update records that does not exist locally */
34 #define UPDATE_FLAGS_REPLACE_ONLY 1
37 called from a child process to write the data
39 static int ctdb_persistent_store(struct ctdb_persistent_write_state *state)
41 int ret, i;
42 struct ctdb_rec_data *rec = NULL;
43 struct ctdb_marshall_buffer *m = state->m;
45 ret = tdb_transaction_start(state->ctdb_db->ltdb->tdb);
46 if (ret == -1) {
47 DEBUG(DEBUG_ERR,("Failed to start transaction for db_id 0x%08x in ctdb_persistent_store\n",
48 state->ctdb_db->db_id));
49 return -1;
52 for (i=0;i<m->count;i++) {
53 struct ctdb_ltdb_header oldheader;
54 struct ctdb_ltdb_header header;
55 TDB_DATA key, data, olddata;
56 TALLOC_CTX *tmp_ctx = talloc_new(state);
58 rec = ctdb_marshall_loop_next(m, rec, NULL, &header, &key, &data);
60 if (rec == NULL) {
61 DEBUG(DEBUG_ERR,("Failed to get next record %d for db_id 0x%08x in ctdb_persistent_store\n",
62 i, state->ctdb_db->db_id));
63 talloc_free(tmp_ctx);
64 goto failed;
67 /* we must check if the record exists or not because
68 ctdb_ltdb_fetch will unconditionally create a record
70 if (state->flags & UPDATE_FLAGS_REPLACE_ONLY) {
71 TDB_DATA trec;
72 trec = tdb_fetch(state->ctdb_db->ltdb->tdb, key);
73 if (trec.dsize == 0) {
74 talloc_free(tmp_ctx);
75 continue;
77 free(trec.dptr);
80 /* fetch the old header and ensure the rsn is less than the new rsn */
81 ret = ctdb_ltdb_fetch(state->ctdb_db, key, &oldheader, tmp_ctx, &olddata);
82 if (ret != 0) {
83 DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n",
84 state->ctdb_db->db_id));
85 talloc_free(tmp_ctx);
86 goto failed;
89 if (oldheader.rsn >= header.rsn &&
90 (olddata.dsize != data.dsize ||
91 memcmp(olddata.dptr, data.dptr, data.dsize) != 0)) {
92 DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n",
93 state->ctdb_db->db_id,
94 (unsigned long long)oldheader.rsn, (unsigned long long)header.rsn));
95 talloc_free(tmp_ctx);
96 goto failed;
99 talloc_free(tmp_ctx);
101 ret = ctdb_ltdb_store(state->ctdb_db, key, &header, data);
102 if (ret != 0) {
103 DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n",
104 state->ctdb_db->db_id));
105 goto failed;
109 ret = tdb_transaction_commit(state->ctdb_db->ltdb->tdb);
110 if (ret == -1) {
111 DEBUG(DEBUG_ERR,("Failed to commit transaction for db_id 0x%08x in ctdb_persistent_store\n",
112 state->ctdb_db->db_id));
113 return -1;
116 return 0;
118 failed:
119 tdb_transaction_cancel(state->ctdb_db->ltdb->tdb);
120 return -1;
125 called when we the child has completed the persistent write
126 on our behalf
128 static void ctdb_persistent_write_callback(int status, void *private_data)
130 struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
131 struct ctdb_persistent_write_state);
134 ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, status, NULL);
136 talloc_free(state);
140 called if our lockwait child times out
142 static void ctdb_persistent_lock_timeout(struct event_context *ev, struct timed_event *te,
143 struct timeval t, void *private_data)
145 struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
146 struct ctdb_persistent_write_state);
147 ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_lock");
148 talloc_free(state);
151 struct childwrite_handle {
152 struct ctdb_context *ctdb;
153 struct ctdb_db_context *ctdb_db;
154 struct fd_event *fde;
155 int fd[2];
156 pid_t child;
157 void *private_data;
158 void (*callback)(int, void *);
159 struct timeval start_time;
162 static int childwrite_destructor(struct childwrite_handle *h)
164 CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
165 ctdb_kill(h->ctdb, h->child, SIGKILL);
166 return 0;
169 /* called when the child process has finished writing the record to the
170 database
172 static void childwrite_handler(struct event_context *ev, struct fd_event *fde,
173 uint16_t flags, void *private_data)
175 struct childwrite_handle *h = talloc_get_type(private_data,
176 struct childwrite_handle);
177 void *p = h->private_data;
178 void (*callback)(int, void *) = h->callback;
179 pid_t child = h->child;
180 TALLOC_CTX *tmp_ctx = talloc_new(ev);
181 int ret;
182 char c;
184 CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "persistent", childwrite_latency, h->start_time);
185 CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
187 /* the handle needs to go away when the context is gone - when
188 the handle goes away this implicitly closes the pipe, which
189 kills the child */
190 talloc_steal(tmp_ctx, h);
192 talloc_set_destructor(h, NULL);
194 ret = read(h->fd[0], &c, 1);
195 if (ret < 1) {
196 DEBUG(DEBUG_ERR, (__location__ " Read returned %d. Childwrite failed\n", ret));
197 c = 1;
200 callback(c, p);
202 ctdb_kill(h->ctdb, child, SIGKILL);
203 talloc_free(tmp_ctx);
206 /* this creates a child process which will take out a tdb transaction
207 and write the record to the database.
209 static struct childwrite_handle *ctdb_childwrite(
210 struct ctdb_db_context *ctdb_db,
211 void (*callback)(int, void *private_data),
212 struct ctdb_persistent_write_state *state)
214 struct childwrite_handle *result;
215 int ret;
216 pid_t parent = getpid();
218 CTDB_INCREMENT_STAT(ctdb_db->ctdb, childwrite_calls);
219 CTDB_INCREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
221 if (!(result = talloc_zero(state, struct childwrite_handle))) {
222 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
223 return NULL;
226 ret = pipe(result->fd);
228 if (ret != 0) {
229 talloc_free(result);
230 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
231 return NULL;
234 result->child = ctdb_fork(ctdb_db->ctdb);
236 if (result->child == (pid_t)-1) {
237 close(result->fd[0]);
238 close(result->fd[1]);
239 talloc_free(result);
240 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
241 return NULL;
244 result->callback = callback;
245 result->private_data = state;
246 result->ctdb = ctdb_db->ctdb;
247 result->ctdb_db = ctdb_db;
249 if (result->child == 0) {
250 char c = 0;
252 close(result->fd[0]);
253 ctdb_set_process_name("ctdb_write_persistent");
254 debug_extra = talloc_asprintf(NULL, "childwrite-%s:", ctdb_db->db_name);
255 ret = ctdb_persistent_store(state);
256 if (ret != 0) {
257 DEBUG(DEBUG_ERR, (__location__ " Failed to write persistent data\n"));
258 c = 1;
261 write(result->fd[1], &c, 1);
263 /* make sure we die when our parent dies */
264 while (ctdb_kill(ctdb_db->ctdb, parent, 0) == 0 || errno != ESRCH) {
265 sleep(5);
267 _exit(0);
270 close(result->fd[1]);
271 set_close_on_exec(result->fd[0]);
273 talloc_set_destructor(result, childwrite_destructor);
275 DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d for ctdb_childwrite\n", result->fd[0]));
277 result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
278 EVENT_FD_READ, childwrite_handler,
279 (void *)result);
280 if (result->fde == NULL) {
281 talloc_free(result);
282 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
283 return NULL;
285 tevent_fd_set_auto_close(result->fde);
287 result->start_time = timeval_current();
289 return result;
293 update a record on this node if the new record has a higher rsn than the
294 current record
296 int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
297 struct ctdb_req_control *c, TDB_DATA recdata,
298 bool *async_reply)
300 struct ctdb_db_context *ctdb_db;
301 struct ctdb_persistent_write_state *state;
302 struct childwrite_handle *handle;
303 struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
305 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
306 DEBUG(DEBUG_INFO,("rejecting ctdb_control_update_record when recovery active\n"));
307 return -1;
310 ctdb_db = find_ctdb_db(ctdb, m->db_id);
311 if (ctdb_db == NULL) {
312 DEBUG(DEBUG_ERR,("Unknown database 0x%08x in ctdb_control_update_record\n", m->db_id));
313 return -1;
316 if (ctdb_db->unhealthy_reason) {
317 DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_update_record: %s\n",
318 ctdb_db->db_name, ctdb_db->unhealthy_reason));
319 return -1;
322 state = talloc(ctdb, struct ctdb_persistent_write_state);
323 CTDB_NO_MEMORY(ctdb, state);
325 state->ctdb_db = ctdb_db;
326 state->c = c;
327 state->m = m;
328 state->flags = 0;
329 if (!ctdb_db->persistent) {
330 state->flags = UPDATE_FLAGS_REPLACE_ONLY;
333 /* create a child process to take out a transaction and
334 write the data.
336 handle = ctdb_childwrite(ctdb_db, ctdb_persistent_write_callback, state);
337 if (handle == NULL) {
338 DEBUG(DEBUG_ERR,("Failed to setup childwrite handler in ctdb_control_update_record\n"));
339 talloc_free(state);
340 return -1;
343 /* we need to wait for the replies */
344 *async_reply = true;
346 /* need to keep the control structure around */
347 talloc_steal(state, c);
349 /* but we won't wait forever */
350 event_add_timed(ctdb->ev, state, timeval_current_ofs(ctdb->tunable.control_timeout, 0),
351 ctdb_persistent_lock_timeout, state);
353 return 0;