4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
28 return error string for last error
30 const char *ctdb_errstr(struct ctdb_context
*ctdb
)
37 remember an error message
39 void ctdb_set_error(struct ctdb_context
*ctdb
, const char *fmt
, ...)
42 talloc_free(ctdb
->err_msg
);
44 ctdb
->err_msg
= talloc_vasprintf(ctdb
, fmt
, ap
);
45 DEBUG(DEBUG_ERR
,("ctdb error: %s\n", ctdb
->err_msg
));
50 a fatal internal error occurred - no hope for recovery
52 void ctdb_fatal(struct ctdb_context
*ctdb
, const char *msg
)
54 DEBUG(DEBUG_ALERT
,("ctdb fatal error: %s\n", msg
));
59 like ctdb_fatal() but a core/backtrace would not be useful
61 void ctdb_die(struct ctdb_context
*ctdb
, const char *msg
)
63 DEBUG(DEBUG_ALERT
,("ctdb exiting with error: %s\n", msg
));
67 /* Set the path of a helper program from envvar, falling back to
68 * dir/file if envvar unset. type is a string to print in log
69 * messages. helper is assumed to point to a statically allocated
70 * array of size bytes, initialised to "". If file is NULL don't fall
71 * back if envvar is unset. If dir is NULL and envvar is unset (but
72 * file is not NULL) then this is an error. Returns true if helper is
73 * set, either previously or this time. */
74 bool ctdb_set_helper(const char *type
, char *helper
, size_t size
,
76 const char *dir
, const char *file
)
81 if (helper
[0] != '\0') {
88 if (strlen(t
) >= size
) {
90 ("Unable to set %s - path too long\n", type
));
94 strncpy(helper
, t
, size
);
95 } else if (file
== NULL
) {
97 } else if (dir
== NULL
) {
99 ("Unable to set %s - dir is NULL\n", type
));
102 if (snprintf(helper
, size
, "%s/%s", dir
, file
) >= size
) {
104 ("Unable to set %s - path too long\n", type
));
109 if (stat(helper
, &st
) != 0) {
111 ("Unable to set %s \"%s\" - %s\n",
112 type
, helper
, strerror(errno
)));
115 if (!(st
.st_mode
& S_IXUSR
)) {
117 ("Unable to set %s \"%s\" - not executable\n",
123 ("Set %s to \"%s\"\n", type
, helper
));
127 /* Invoke an external program to do some sort of tracing on the CTDB
128 * process. This might block for a little while. The external
129 * program is specified by the environment variable
130 * CTDB_EXTERNAL_TRACE. This program should take one argument: the
131 * pid of the process to trace. Commonly, the program would be a
132 * wrapper script around gcore.
134 void ctdb_external_trace(void)
137 static char external_trace
[PATH_MAX
+1] = "";
140 if (!ctdb_set_helper("external trace handler",
141 external_trace
, sizeof(external_trace
),
142 "CTDB_EXTERNAL_TRACE", NULL
, NULL
)) {
146 cmd
= talloc_asprintf(NULL
, "%s %lu", external_trace
, (unsigned long) getpid());
147 DEBUG(DEBUG_WARNING
,("begin external trace: %s\n", cmd
));
151 ("external trace command \"%s\" failed\n", cmd
));
153 DEBUG(DEBUG_WARNING
,("end external trace: %s\n", cmd
));
160 int ctdb_parse_address(TALLOC_CTX
*mem_ctx
, const char *str
,
161 ctdb_sock_addr
*address
)
167 se
= getservbyname("ctdb", "tcp");
173 port
= ntohs(se
->s_port
);
176 if (! parse_ip(str
, NULL
, port
, address
)) {
185 check if two addresses are the same
187 bool ctdb_same_address(ctdb_sock_addr
*a1
, ctdb_sock_addr
*a2
)
189 return ctdb_same_ip(a1
, a2
) &&
190 ctdb_addr_to_port(a1
) == ctdb_addr_to_port(a2
);
195 hash function for mapping data to a VNN - taken from tdb
197 uint32_t ctdb_hash(const TDB_DATA
*key
)
199 return tdb_jenkins_hash(discard_const(key
));
203 a type checking varient of idr_find
205 static void *_idr_find_type(struct idr_context
*idp
, int id
, const char *type
, const char *location
)
207 void *p
= idr_find(idp
, id
);
208 if (p
&& talloc_check_name(p
, type
) == NULL
) {
209 DEBUG(DEBUG_ERR
,("%s idr_find_type expected type %s but got %s\n",
210 location
, type
, talloc_get_name(p
)));
216 uint32_t ctdb_reqid_new(struct ctdb_context
*ctdb
, void *state
)
218 int id
= idr_get_new_above(ctdb
->idr
, state
, ctdb
->lastid
+1, INT_MAX
);
220 DEBUG(DEBUG_DEBUG
, ("Reqid wrap!\n"));
221 id
= idr_get_new(ctdb
->idr
, state
, INT_MAX
);
227 void *_ctdb_reqid_find(struct ctdb_context
*ctdb
, uint32_t reqid
, const char *type
, const char *location
)
231 p
= _idr_find_type(ctdb
->idr
, reqid
, type
, location
);
233 DEBUG(DEBUG_WARNING
, ("Could not find idr:%u\n",reqid
));
240 void ctdb_reqid_remove(struct ctdb_context
*ctdb
, uint32_t reqid
)
244 ret
= idr_remove(ctdb
->idr
, reqid
);
246 DEBUG(DEBUG_ERR
, ("Removing idr that does not exist\n"));
251 static uint32_t ctdb_marshall_record_size(TDB_DATA key
,
252 struct ctdb_ltdb_header
*header
,
255 return offsetof(struct ctdb_rec_data
, data
) + key
.dsize
+
256 data
.dsize
+ (header
? sizeof(*header
) : 0);
259 static void ctdb_marshall_record_copy(struct ctdb_rec_data
*rec
,
262 struct ctdb_ltdb_header
*header
,
268 rec
->length
= length
;
270 rec
->keylen
= key
.dsize
;
271 memcpy(&rec
->data
[0], key
.dptr
, key
.dsize
);
275 rec
->datalen
= data
.dsize
+ sizeof(*header
);
276 memcpy(&rec
->data
[offset
], header
, sizeof(*header
));
277 offset
+= sizeof(*header
);
279 rec
->datalen
= data
.dsize
;
281 memcpy(&rec
->data
[offset
], data
.dptr
, data
.dsize
);
285 form a ctdb_rec_data record from a key/data pair
287 note that header may be NULL. If not NULL then it is included in the data portion
290 struct ctdb_rec_data
*ctdb_marshall_record(TALLOC_CTX
*mem_ctx
, uint32_t reqid
,
292 struct ctdb_ltdb_header
*header
,
296 struct ctdb_rec_data
*d
;
298 length
= ctdb_marshall_record_size(key
, header
, data
);
300 d
= (struct ctdb_rec_data
*)talloc_size(mem_ctx
, length
);
305 ctdb_marshall_record_copy(d
, reqid
, key
, header
, data
, length
);
310 /* helper function for marshalling multiple records */
311 struct ctdb_marshall_buffer
*ctdb_marshall_add(TALLOC_CTX
*mem_ctx
,
312 struct ctdb_marshall_buffer
*m
,
316 struct ctdb_ltdb_header
*header
,
319 struct ctdb_rec_data
*r
;
320 struct ctdb_marshall_buffer
*m2
;
321 uint32_t length
, offset
;
323 length
= ctdb_marshall_record_size(key
, header
, data
);
326 offset
= offsetof(struct ctdb_marshall_buffer
, data
);
327 m2
= talloc_zero_size(mem_ctx
, offset
+ length
);
329 offset
= talloc_get_size(m
);
330 m2
= talloc_realloc_size(mem_ctx
, m
, offset
+ length
);
341 r
= (struct ctdb_rec_data
*)((uint8_t *)m2
+ offset
);
342 ctdb_marshall_record_copy(r
, reqid
, key
, header
, data
, length
);
348 /* we've finished marshalling, return a data blob with the marshalled records */
349 TDB_DATA
ctdb_marshall_finish(struct ctdb_marshall_buffer
*m
)
352 data
.dptr
= (uint8_t *)m
;
353 data
.dsize
= talloc_get_size(m
);
358 loop over a marshalling buffer
360 - pass r==NULL to start
361 - loop the number of times indicated by m->count
363 struct ctdb_rec_data
*ctdb_marshall_loop_next(struct ctdb_marshall_buffer
*m
, struct ctdb_rec_data
*r
,
365 struct ctdb_ltdb_header
*header
,
366 TDB_DATA
*key
, TDB_DATA
*data
)
369 r
= (struct ctdb_rec_data
*)&m
->data
[0];
371 r
= (struct ctdb_rec_data
*)(r
->length
+ (uint8_t *)r
);
379 key
->dptr
= &r
->data
[0];
380 key
->dsize
= r
->keylen
;
383 data
->dptr
= &r
->data
[r
->keylen
];
384 data
->dsize
= r
->datalen
;
385 if (header
!= NULL
) {
386 data
->dptr
+= sizeof(*header
);
387 data
->dsize
-= sizeof(*header
);
391 if (header
!= NULL
) {
392 if (r
->datalen
< sizeof(*header
)) {
395 memcpy(header
, &r
->data
[r
->keylen
], sizeof(*header
));
402 This is used to canonicalize a ctdb_sock_addr structure.
404 void ctdb_canonicalize_ip(const ctdb_sock_addr
*ip
, ctdb_sock_addr
*cip
)
406 char prefix
[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff };
408 memcpy(cip
, ip
, sizeof (*cip
));
410 if ( (ip
->sa
.sa_family
== AF_INET6
)
411 && !memcmp(&ip
->ip6
.sin6_addr
, prefix
, 12)) {
412 memset(cip
, 0, sizeof(*cip
));
413 #ifdef HAVE_SOCK_SIN_LEN
414 cip
->ip
.sin_len
= sizeof(*cip
);
416 cip
->ip
.sin_family
= AF_INET
;
417 cip
->ip
.sin_port
= ip
->ip6
.sin6_port
;
418 memcpy(&cip
->ip
.sin_addr
, &ip
->ip6
.sin6_addr
.s6_addr
[12], 4);
422 bool ctdb_same_ip(const ctdb_sock_addr
*tip1
, const ctdb_sock_addr
*tip2
)
424 ctdb_sock_addr ip1
, ip2
;
426 ctdb_canonicalize_ip(tip1
, &ip1
);
427 ctdb_canonicalize_ip(tip2
, &ip2
);
429 if (ip1
.sa
.sa_family
!= ip2
.sa
.sa_family
) {
433 switch (ip1
.sa
.sa_family
) {
435 return ip1
.ip
.sin_addr
.s_addr
== ip2
.ip
.sin_addr
.s_addr
;
437 return !memcmp(&ip1
.ip6
.sin6_addr
.s6_addr
[0],
438 &ip2
.ip6
.sin6_addr
.s6_addr
[0],
441 DEBUG(DEBUG_ERR
, (__location__
" CRITICAL Can not compare sockaddr structures of type %u\n", ip1
.sa
.sa_family
));
449 compare two ctdb_sock_addr structures
451 bool ctdb_same_sockaddr(const ctdb_sock_addr
*ip1
, const ctdb_sock_addr
*ip2
)
453 return ctdb_same_ip(ip1
, ip2
) && ip1
->ip
.sin_port
== ip2
->ip
.sin_port
;
456 char *ctdb_addr_to_str(ctdb_sock_addr
*addr
)
458 static char cip
[128] = "";
460 switch (addr
->sa
.sa_family
) {
462 inet_ntop(addr
->ip
.sin_family
, &addr
->ip
.sin_addr
, cip
, sizeof(cip
));
465 inet_ntop(addr
->ip6
.sin6_family
, &addr
->ip6
.sin6_addr
, cip
, sizeof(cip
));
468 DEBUG(DEBUG_ERR
, (__location__
" ERROR, unknown family %u\n", addr
->sa
.sa_family
));
469 ctdb_external_trace();
475 unsigned ctdb_addr_to_port(ctdb_sock_addr
*addr
)
477 switch (addr
->sa
.sa_family
) {
479 return ntohs(addr
->ip
.sin_port
);
482 return ntohs(addr
->ip6
.sin6_port
);
485 DEBUG(DEBUG_ERR
, (__location__
" ERROR, unknown family %u\n", addr
->sa
.sa_family
));
491 /* Add a node to a node map with given address and flags */
492 static bool node_map_add(TALLOC_CTX
*mem_ctx
,
493 const char *nstr
, uint32_t flags
,
494 struct ctdb_node_map
**node_map
)
499 struct ctdb_node_and_flags
*n
;
501 /* Might as well do this before trying to allocate memory */
502 if (ctdb_parse_address(mem_ctx
, nstr
, &addr
) == -1) {
506 num
= (*node_map
)->num
+ 1;
507 s
= offsetof(struct ctdb_node_map
, nodes
) +
508 num
* sizeof(struct ctdb_node_and_flags
);
509 *node_map
= talloc_realloc_size(mem_ctx
, *node_map
, s
);
510 if (*node_map
== NULL
) {
511 DEBUG(DEBUG_ERR
, (__location__
" Out of memory\n"));
515 n
= &(*node_map
)->nodes
[(*node_map
)->num
];
517 n
->pnn
= (*node_map
)->num
;
525 /* Read a nodes file into a node map */
526 struct ctdb_node_map
*ctdb_read_nodes_file(TALLOC_CTX
*mem_ctx
,
532 struct ctdb_node_map
*ret
;
534 /* Allocate node map header */
535 ret
= talloc_zero_size(mem_ctx
, offsetof(struct ctdb_node_map
, nodes
));
537 DEBUG(DEBUG_ERR
, (__location__
" Out of memory\n"));
541 lines
= file_lines_load(nlist
, &nlines
, 0, mem_ctx
);
543 DEBUG(DEBUG_ERR
, ("Failed to read nodes file \"%s\"\n", nlist
));
546 while (nlines
> 0 && strcmp(lines
[nlines
-1], "") == 0) {
550 for (i
=0; i
< nlines
; i
++) {
555 /* strip leading spaces */
556 while((*node
== ' ') || (*node
== '\t')) {
559 if (strcmp(node
, "") == 0) {
563 /* A "deleted" node is a node that is
564 commented out in the nodes file. This is
565 used instead of removing a line, which
566 would cause subsequent nodes to change
568 flags
= NODE_FLAGS_DELETED
;
573 if (!node_map_add(mem_ctx
, node
, flags
, &ret
)) {
584 struct ctdb_node_map
*
585 ctdb_node_list_to_map(struct ctdb_node
**nodes
, uint32_t num_nodes
,
590 struct ctdb_node_map
*node_map
;
592 size
= offsetof(struct ctdb_node_map
, nodes
) +
593 num_nodes
* sizeof(struct ctdb_node_and_flags
);
594 node_map
= (struct ctdb_node_map
*)talloc_zero_size(mem_ctx
, size
);
595 if (node_map
== NULL
) {
597 (__location__
" Failed to allocate nodemap array\n"));
601 node_map
->num
= num_nodes
;
602 for (i
=0; i
<num_nodes
; i
++) {
603 node_map
->nodes
[i
].addr
= nodes
[i
]->address
;
604 node_map
->nodes
[i
].pnn
= nodes
[i
]->pnn
;
605 node_map
->nodes
[i
].flags
= nodes
[i
]->flags
;
611 const char *ctdb_eventscript_call_names
[] = {
628 /* Runstate handling */
630 enum ctdb_runstate runstate
;
633 { CTDB_RUNSTATE_UNKNOWN
, "UNKNOWN" },
634 { CTDB_RUNSTATE_INIT
, "INIT" },
635 { CTDB_RUNSTATE_SETUP
, "SETUP" },
636 { CTDB_RUNSTATE_FIRST_RECOVERY
, "FIRST_RECOVERY" },
637 { CTDB_RUNSTATE_STARTUP
, "STARTUP" },
638 { CTDB_RUNSTATE_RUNNING
, "RUNNING" },
639 { CTDB_RUNSTATE_SHUTDOWN
, "SHUTDOWN" },
643 const char *runstate_to_string(enum ctdb_runstate runstate
)
646 for (i
=0; runstate_map
[i
].label
!= NULL
; i
++) {
647 if (runstate_map
[i
].runstate
== runstate
) {
648 return runstate_map
[i
].label
;
652 return runstate_map
[0].label
;
655 enum ctdb_runstate
runstate_from_string(const char *label
)
658 for (i
=0; runstate_map
[i
].label
!= NULL
; i
++) {
659 if (strcasecmp(runstate_map
[i
].label
, label
) == 0) {
660 return runstate_map
[i
].runstate
;
664 return CTDB_RUNSTATE_UNKNOWN
;
667 void ctdb_set_runstate(struct ctdb_context
*ctdb
, enum ctdb_runstate runstate
)
669 if (runstate
<= ctdb
->runstate
) {
670 ctdb_fatal(ctdb
, "runstate must always increase");
673 DEBUG(DEBUG_NOTICE
,("Set runstate to %s (%d)\n",
674 runstate_to_string(runstate
), runstate
));
675 ctdb
->runstate
= runstate
;
678 /* Convert arbitrary data to 4-byte boundary padded uint32 array */
679 uint32_t *ctdb_key_to_idkey(TALLOC_CTX
*mem_ctx
, TDB_DATA key
)
681 uint32_t idkey_size
, *k
;
683 idkey_size
= 1 + (key
.dsize
+ sizeof(uint32_t)-1) / sizeof(uint32_t);
685 k
= talloc_zero_array(mem_ctx
, uint32_t, idkey_size
);
691 memcpy(&k
[1], key
.dptr
, key
.dsize
);