4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "system/network.h"
22 #include "system/filesys.h"
23 #include "system/wait.h"
27 #include "lib/util/debug.h"
28 #include "lib/util/samba_util.h"
30 #include "ctdb_private.h"
32 #include "common/reqid.h"
33 #include "common/system.h"
34 #include "common/common.h"
35 #include "common/logging.h"
38 return error string for last error
40 const char *ctdb_errstr(struct ctdb_context
*ctdb
)
47 remember an error message
49 void ctdb_set_error(struct ctdb_context
*ctdb
, const char *fmt
, ...)
52 talloc_free(ctdb
->err_msg
);
54 ctdb
->err_msg
= talloc_vasprintf(ctdb
, fmt
, ap
);
55 DEBUG(DEBUG_ERR
,("ctdb error: %s\n", ctdb
->err_msg
));
60 a fatal internal error occurred - no hope for recovery
62 void ctdb_fatal(struct ctdb_context
*ctdb
, const char *msg
)
64 DEBUG(DEBUG_ALERT
,("ctdb fatal error: %s\n", msg
));
69 like ctdb_fatal() but a core/backtrace would not be useful
71 void ctdb_die(struct ctdb_context
*ctdb
, const char *msg
)
73 DEBUG(DEBUG_ALERT
,("ctdb exiting with error: %s\n", msg
));
77 /* Set the path of a helper program from envvar, falling back to
78 * dir/file if envvar unset. type is a string to print in log
79 * messages. helper is assumed to point to a statically allocated
80 * array of size bytes, initialised to "". If file is NULL don't fall
81 * back if envvar is unset. If dir is NULL and envvar is unset (but
82 * file is not NULL) then this is an error. Returns true if helper is
83 * set, either previously or this time. */
84 bool ctdb_set_helper(const char *type
, char *helper
, size_t size
,
86 const char *dir
, const char *file
)
91 if (helper
[0] != '\0') {
98 if (strlen(t
) >= size
) {
100 ("Unable to set %s - path too long\n", type
));
104 strncpy(helper
, t
, size
);
105 } else if (file
== NULL
) {
107 } else if (dir
== NULL
) {
109 ("Unable to set %s - dir is NULL\n", type
));
112 if (snprintf(helper
, size
, "%s/%s", dir
, file
) >= size
) {
114 ("Unable to set %s - path too long\n", type
));
119 if (stat(helper
, &st
) != 0) {
121 ("Unable to set %s \"%s\" - %s\n",
122 type
, helper
, strerror(errno
)));
125 if (!(st
.st_mode
& S_IXUSR
)) {
127 ("Unable to set %s \"%s\" - not executable\n",
133 ("Set %s to \"%s\"\n", type
, helper
));
137 /* Invoke an external program to do some sort of tracing on the CTDB
138 * process. This might block for a little while. The external
139 * program is specified by the environment variable
140 * CTDB_EXTERNAL_TRACE. This program should take one argument: the
141 * pid of the process to trace. Commonly, the program would be a
142 * wrapper script around gcore.
144 void ctdb_external_trace(void)
147 static char external_trace
[PATH_MAX
+1] = "";
150 if (!ctdb_set_helper("external trace handler",
151 external_trace
, sizeof(external_trace
),
152 "CTDB_EXTERNAL_TRACE", NULL
, NULL
)) {
156 cmd
= talloc_asprintf(NULL
, "%s %lu", external_trace
, (unsigned long) getpid());
157 DEBUG(DEBUG_WARNING
,("begin external trace: %s\n", cmd
));
161 ("external trace command \"%s\" failed\n", cmd
));
163 DEBUG(DEBUG_WARNING
,("end external trace: %s\n", cmd
));
170 int ctdb_parse_address(TALLOC_CTX
*mem_ctx
, const char *str
,
171 ctdb_sock_addr
*address
)
177 se
= getservbyname("ctdb", "tcp");
183 port
= ntohs(se
->s_port
);
186 if (! parse_ip(str
, NULL
, port
, address
)) {
195 check if two addresses are the same
197 bool ctdb_same_address(ctdb_sock_addr
*a1
, ctdb_sock_addr
*a2
)
199 return ctdb_same_ip(a1
, a2
) &&
200 ctdb_addr_to_port(a1
) == ctdb_addr_to_port(a2
);
205 hash function for mapping data to a VNN - taken from tdb
207 uint32_t ctdb_hash(const TDB_DATA
*key
)
209 return tdb_jenkins_hash(discard_const(key
));
213 static uint32_t ctdb_marshall_record_size(TDB_DATA key
,
214 struct ctdb_ltdb_header
*header
,
217 return offsetof(struct ctdb_rec_data_old
, data
) + key
.dsize
+
218 data
.dsize
+ (header
? sizeof(*header
) : 0);
221 static void ctdb_marshall_record_copy(struct ctdb_rec_data_old
*rec
,
224 struct ctdb_ltdb_header
*header
,
230 rec
->length
= length
;
232 rec
->keylen
= key
.dsize
;
233 memcpy(&rec
->data
[0], key
.dptr
, key
.dsize
);
237 rec
->datalen
= data
.dsize
+ sizeof(*header
);
238 memcpy(&rec
->data
[offset
], header
, sizeof(*header
));
239 offset
+= sizeof(*header
);
241 rec
->datalen
= data
.dsize
;
243 memcpy(&rec
->data
[offset
], data
.dptr
, data
.dsize
);
247 form a ctdb_rec_data record from a key/data pair
249 note that header may be NULL. If not NULL then it is included in the data portion
252 struct ctdb_rec_data_old
*ctdb_marshall_record(TALLOC_CTX
*mem_ctx
,
255 struct ctdb_ltdb_header
*header
,
259 struct ctdb_rec_data_old
*d
;
261 length
= ctdb_marshall_record_size(key
, header
, data
);
263 d
= (struct ctdb_rec_data_old
*)talloc_size(mem_ctx
, length
);
268 ctdb_marshall_record_copy(d
, reqid
, key
, header
, data
, length
);
273 /* helper function for marshalling multiple records */
274 struct ctdb_marshall_buffer
*ctdb_marshall_add(TALLOC_CTX
*mem_ctx
,
275 struct ctdb_marshall_buffer
*m
,
279 struct ctdb_ltdb_header
*header
,
282 struct ctdb_rec_data_old
*r
;
283 struct ctdb_marshall_buffer
*m2
;
284 uint32_t length
, offset
;
286 length
= ctdb_marshall_record_size(key
, header
, data
);
289 offset
= offsetof(struct ctdb_marshall_buffer
, data
);
290 m2
= talloc_zero_size(mem_ctx
, offset
+ length
);
292 offset
= talloc_get_size(m
);
293 m2
= talloc_realloc_size(mem_ctx
, m
, offset
+ length
);
304 r
= (struct ctdb_rec_data_old
*)((uint8_t *)m2
+ offset
);
305 ctdb_marshall_record_copy(r
, reqid
, key
, header
, data
, length
);
311 /* we've finished marshalling, return a data blob with the marshalled records */
312 TDB_DATA
ctdb_marshall_finish(struct ctdb_marshall_buffer
*m
)
315 data
.dptr
= (uint8_t *)m
;
316 data
.dsize
= talloc_get_size(m
);
321 loop over a marshalling buffer
323 - pass r==NULL to start
324 - loop the number of times indicated by m->count
326 struct ctdb_rec_data_old
*ctdb_marshall_loop_next(
327 struct ctdb_marshall_buffer
*m
,
328 struct ctdb_rec_data_old
*r
,
330 struct ctdb_ltdb_header
*header
,
331 TDB_DATA
*key
, TDB_DATA
*data
)
334 r
= (struct ctdb_rec_data_old
*)&m
->data
[0];
336 r
= (struct ctdb_rec_data_old
*)(r
->length
+ (uint8_t *)r
);
344 key
->dptr
= &r
->data
[0];
345 key
->dsize
= r
->keylen
;
348 data
->dptr
= &r
->data
[r
->keylen
];
349 data
->dsize
= r
->datalen
;
350 if (header
!= NULL
) {
351 data
->dptr
+= sizeof(*header
);
352 data
->dsize
-= sizeof(*header
);
356 if (header
!= NULL
) {
357 if (r
->datalen
< sizeof(*header
)) {
360 memcpy(header
, &r
->data
[r
->keylen
], sizeof(*header
));
367 This is used to canonicalize a ctdb_sock_addr structure.
369 void ctdb_canonicalize_ip(const ctdb_sock_addr
*ip
, ctdb_sock_addr
*cip
)
371 char prefix
[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff };
373 memcpy(cip
, ip
, sizeof (*cip
));
375 if ( (ip
->sa
.sa_family
== AF_INET6
)
376 && !memcmp(&ip
->ip6
.sin6_addr
, prefix
, 12)) {
377 memset(cip
, 0, sizeof(*cip
));
378 #ifdef HAVE_SOCK_SIN_LEN
379 cip
->ip
.sin_len
= sizeof(*cip
);
381 cip
->ip
.sin_family
= AF_INET
;
382 cip
->ip
.sin_port
= ip
->ip6
.sin6_port
;
383 memcpy(&cip
->ip
.sin_addr
, &ip
->ip6
.sin6_addr
.s6_addr
[12], 4);
387 bool ctdb_same_ip(const ctdb_sock_addr
*tip1
, const ctdb_sock_addr
*tip2
)
389 ctdb_sock_addr ip1
, ip2
;
391 ctdb_canonicalize_ip(tip1
, &ip1
);
392 ctdb_canonicalize_ip(tip2
, &ip2
);
394 if (ip1
.sa
.sa_family
!= ip2
.sa
.sa_family
) {
398 switch (ip1
.sa
.sa_family
) {
400 return ip1
.ip
.sin_addr
.s_addr
== ip2
.ip
.sin_addr
.s_addr
;
402 return !memcmp(&ip1
.ip6
.sin6_addr
.s6_addr
[0],
403 &ip2
.ip6
.sin6_addr
.s6_addr
[0],
406 DEBUG(DEBUG_ERR
, (__location__
" CRITICAL Can not compare sockaddr structures of type %u\n", ip1
.sa
.sa_family
));
414 compare two ctdb_sock_addr structures
416 bool ctdb_same_sockaddr(const ctdb_sock_addr
*ip1
, const ctdb_sock_addr
*ip2
)
418 return ctdb_same_ip(ip1
, ip2
) && ip1
->ip
.sin_port
== ip2
->ip
.sin_port
;
421 char *ctdb_addr_to_str(ctdb_sock_addr
*addr
)
423 static char cip
[128] = "";
425 switch (addr
->sa
.sa_family
) {
427 inet_ntop(addr
->ip
.sin_family
, &addr
->ip
.sin_addr
, cip
, sizeof(cip
));
430 inet_ntop(addr
->ip6
.sin6_family
, &addr
->ip6
.sin6_addr
, cip
, sizeof(cip
));
433 DEBUG(DEBUG_ERR
, (__location__
" ERROR, unknown family %u\n", addr
->sa
.sa_family
));
434 ctdb_external_trace();
440 unsigned ctdb_addr_to_port(ctdb_sock_addr
*addr
)
442 switch (addr
->sa
.sa_family
) {
444 return ntohs(addr
->ip
.sin_port
);
447 return ntohs(addr
->ip6
.sin6_port
);
450 DEBUG(DEBUG_ERR
, (__location__
" ERROR, unknown family %u\n", addr
->sa
.sa_family
));
456 /* Add a node to a node map with given address and flags */
457 static bool node_map_add(TALLOC_CTX
*mem_ctx
,
458 const char *nstr
, uint32_t flags
,
459 struct ctdb_node_map_old
**node_map
)
464 struct ctdb_node_and_flags
*n
;
466 /* Might as well do this before trying to allocate memory */
467 if (ctdb_parse_address(mem_ctx
, nstr
, &addr
) == -1) {
471 num
= (*node_map
)->num
+ 1;
472 s
= offsetof(struct ctdb_node_map_old
, nodes
) +
473 num
* sizeof(struct ctdb_node_and_flags
);
474 *node_map
= talloc_realloc_size(mem_ctx
, *node_map
, s
);
475 if (*node_map
== NULL
) {
476 DEBUG(DEBUG_ERR
, (__location__
" Out of memory\n"));
480 n
= &(*node_map
)->nodes
[(*node_map
)->num
];
482 n
->pnn
= (*node_map
)->num
;
490 /* Read a nodes file into a node map */
491 struct ctdb_node_map_old
*ctdb_read_nodes_file(TALLOC_CTX
*mem_ctx
,
497 struct ctdb_node_map_old
*ret
;
499 /* Allocate node map header */
500 ret
= talloc_zero_size(mem_ctx
, offsetof(struct ctdb_node_map_old
, nodes
));
502 DEBUG(DEBUG_ERR
, (__location__
" Out of memory\n"));
506 lines
= file_lines_load(nlist
, &nlines
, 0, mem_ctx
);
508 DEBUG(DEBUG_ERR
, ("Failed to read nodes file \"%s\"\n", nlist
));
511 while (nlines
> 0 && strcmp(lines
[nlines
-1], "") == 0) {
515 for (i
=0; i
< nlines
; i
++) {
521 /* strip leading spaces */
522 while((*node
== ' ') || (*node
== '\t')) {
529 ((node
[len
-1] == ' ') || (node
[len
-1] == '\t')))
539 /* A "deleted" node is a node that is
540 commented out in the nodes file. This is
541 used instead of removing a line, which
542 would cause subsequent nodes to change
544 flags
= NODE_FLAGS_DELETED
;
545 node
= discard_const("0.0.0.0");
549 if (!node_map_add(mem_ctx
, node
, flags
, &ret
)) {
560 struct ctdb_node_map_old
*
561 ctdb_node_list_to_map(struct ctdb_node
**nodes
, uint32_t num_nodes
,
566 struct ctdb_node_map_old
*node_map
;
568 size
= offsetof(struct ctdb_node_map_old
, nodes
) +
569 num_nodes
* sizeof(struct ctdb_node_and_flags
);
570 node_map
= (struct ctdb_node_map_old
*)talloc_zero_size(mem_ctx
, size
);
571 if (node_map
== NULL
) {
573 (__location__
" Failed to allocate nodemap array\n"));
577 node_map
->num
= num_nodes
;
578 for (i
=0; i
<num_nodes
; i
++) {
579 node_map
->nodes
[i
].addr
= nodes
[i
]->address
;
580 node_map
->nodes
[i
].pnn
= nodes
[i
]->pnn
;
581 node_map
->nodes
[i
].flags
= nodes
[i
]->flags
;
587 const char *ctdb_eventscript_call_names
[] = {
604 /* Runstate handling */
606 enum ctdb_runstate runstate
;
609 { CTDB_RUNSTATE_UNKNOWN
, "UNKNOWN" },
610 { CTDB_RUNSTATE_INIT
, "INIT" },
611 { CTDB_RUNSTATE_SETUP
, "SETUP" },
612 { CTDB_RUNSTATE_FIRST_RECOVERY
, "FIRST_RECOVERY" },
613 { CTDB_RUNSTATE_STARTUP
, "STARTUP" },
614 { CTDB_RUNSTATE_RUNNING
, "RUNNING" },
615 { CTDB_RUNSTATE_SHUTDOWN
, "SHUTDOWN" },
619 const char *runstate_to_string(enum ctdb_runstate runstate
)
622 for (i
=0; runstate_map
[i
].label
!= NULL
; i
++) {
623 if (runstate_map
[i
].runstate
== runstate
) {
624 return runstate_map
[i
].label
;
628 return runstate_map
[0].label
;
631 enum ctdb_runstate
runstate_from_string(const char *label
)
634 for (i
=0; runstate_map
[i
].label
!= NULL
; i
++) {
635 if (strcasecmp(runstate_map
[i
].label
, label
) == 0) {
636 return runstate_map
[i
].runstate
;
640 return CTDB_RUNSTATE_UNKNOWN
;
643 void ctdb_set_runstate(struct ctdb_context
*ctdb
, enum ctdb_runstate runstate
)
645 DEBUG(DEBUG_NOTICE
,("Set runstate to %s (%d)\n",
646 runstate_to_string(runstate
), runstate
));
648 if (runstate
<= ctdb
->runstate
) {
649 ctdb_fatal(ctdb
, "runstate must always increase");
652 ctdb
->runstate
= runstate
;
655 /* Convert arbitrary data to 4-byte boundary padded uint32 array */
656 uint32_t *ctdb_key_to_idkey(TALLOC_CTX
*mem_ctx
, TDB_DATA key
)
658 uint32_t idkey_size
, *k
;
660 idkey_size
= 1 + (key
.dsize
+ sizeof(uint32_t)-1) / sizeof(uint32_t);
662 k
= talloc_zero_array(mem_ctx
, uint32_t, idkey_size
);
668 memcpy(&k
[1], key
.dptr
, key
.dsize
);