4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "system/shmem.h"
26 #include "../include/ctdb_private.h"
28 int LogLevel
= DEBUG_NOTICE
;
29 int this_log_level
= 0;
32 return error string for last error
34 const char *ctdb_errstr(struct ctdb_context
*ctdb
)
41 remember an error message
43 void ctdb_set_error(struct ctdb_context
*ctdb
, const char *fmt
, ...)
46 talloc_free(ctdb
->err_msg
);
48 ctdb
->err_msg
= talloc_vasprintf(ctdb
, fmt
, ap
);
49 DEBUG(DEBUG_ERR
,("ctdb error: %s\n", ctdb
->err_msg
));
54 a fatal internal error occurred - no hope for recovery
56 void ctdb_fatal(struct ctdb_context
*ctdb
, const char *msg
)
58 DEBUG(DEBUG_ALERT
,("ctdb fatal error: %s\n", msg
));
63 like ctdb_fatal() but a core/backtrace would not be useful
65 void ctdb_die(struct ctdb_context
*ctdb
, const char *msg
)
67 DEBUG(DEBUG_ALERT
,("ctdb exiting with error: %s\n", msg
));
71 /* Invoke an external program to do some sort of tracing on the CTDB
72 * process. This might block for a little while. The external
73 * program is specified by the environment variable
74 * CTDB_EXTERNAL_TRACE. This program should take one argument: the
75 * pid of the process to trace. Commonly, the program would be a
76 * wrapper script around gcore.
78 void ctdb_external_trace(void)
81 const char * t
= getenv("CTDB_EXTERNAL_TRACE");
88 cmd
= talloc_asprintf(NULL
, "%s %lu", t
, (unsigned long) getpid());
89 DEBUG(DEBUG_WARNING
,("begin external trace: %s\n", cmd
));
91 DEBUG(DEBUG_WARNING
,("end external trace: %s\n", cmd
));
98 int ctdb_parse_address(struct ctdb_context
*ctdb
,
99 TALLOC_CTX
*mem_ctx
, const char *str
,
100 struct ctdb_address
*address
)
105 se
= getservbyname("ctdb", "tcp");
108 address
->address
= talloc_strdup(mem_ctx
, str
);
109 CTDB_NO_MEMORY(ctdb
, address
->address
);
112 address
->port
= CTDB_PORT
;
114 address
->port
= ntohs(se
->s_port
);
121 check if two addresses are the same
123 bool ctdb_same_address(struct ctdb_address
*a1
, struct ctdb_address
*a2
)
125 return strcmp(a1
->address
, a2
->address
) == 0 && a1
->port
== a2
->port
;
130 hash function for mapping data to a VNN - taken from tdb
132 uint32_t ctdb_hash(const TDB_DATA
*key
)
134 return tdb_jenkins_hash(discard_const(key
));
138 a type checking varient of idr_find
140 static void *_idr_find_type(struct idr_context
*idp
, int id
, const char *type
, const char *location
)
142 void *p
= idr_find(idp
, id
);
143 if (p
&& talloc_check_name(p
, type
) == NULL
) {
144 DEBUG(DEBUG_ERR
,("%s idr_find_type expected type %s but got %s\n",
145 location
, type
, talloc_get_name(p
)));
151 uint32_t ctdb_reqid_new(struct ctdb_context
*ctdb
, void *state
)
153 int id
= idr_get_new_above(ctdb
->idr
, state
, ctdb
->lastid
+1, INT_MAX
);
155 DEBUG(DEBUG_DEBUG
, ("Reqid wrap!\n"));
156 id
= idr_get_new(ctdb
->idr
, state
, INT_MAX
);
162 void *_ctdb_reqid_find(struct ctdb_context
*ctdb
, uint32_t reqid
, const char *type
, const char *location
)
166 p
= _idr_find_type(ctdb
->idr
, reqid
, type
, location
);
168 DEBUG(DEBUG_WARNING
, ("Could not find idr:%u\n",reqid
));
175 void ctdb_reqid_remove(struct ctdb_context
*ctdb
, uint32_t reqid
)
179 ret
= idr_remove(ctdb
->idr
, reqid
);
181 DEBUG(DEBUG_ERR
, ("Removing idr that does not exist\n"));
187 form a ctdb_rec_data record from a key/data pair
189 note that header may be NULL. If not NULL then it is included in the data portion
192 struct ctdb_rec_data
*ctdb_marshall_record(TALLOC_CTX
*mem_ctx
, uint32_t reqid
,
194 struct ctdb_ltdb_header
*header
,
198 struct ctdb_rec_data
*d
;
200 length
= offsetof(struct ctdb_rec_data
, data
) + key
.dsize
+
201 data
.dsize
+ (header
?sizeof(*header
):0);
202 d
= (struct ctdb_rec_data
*)talloc_size(mem_ctx
, length
);
208 d
->keylen
= key
.dsize
;
209 memcpy(&d
->data
[0], key
.dptr
, key
.dsize
);
211 d
->datalen
= data
.dsize
+ sizeof(*header
);
212 memcpy(&d
->data
[key
.dsize
], header
, sizeof(*header
));
213 memcpy(&d
->data
[key
.dsize
+sizeof(*header
)], data
.dptr
, data
.dsize
);
215 d
->datalen
= data
.dsize
;
216 memcpy(&d
->data
[key
.dsize
], data
.dptr
, data
.dsize
);
222 /* helper function for marshalling multiple records */
223 struct ctdb_marshall_buffer
*ctdb_marshall_add(TALLOC_CTX
*mem_ctx
,
224 struct ctdb_marshall_buffer
*m
,
228 struct ctdb_ltdb_header
*header
,
231 struct ctdb_rec_data
*r
;
232 size_t m_size
, r_size
;
233 struct ctdb_marshall_buffer
*m2
;
235 r
= ctdb_marshall_record(mem_ctx
, reqid
, key
, header
, data
);
242 m
= talloc_zero_size(mem_ctx
, offsetof(struct ctdb_marshall_buffer
, data
));
249 m_size
= talloc_get_size(m
);
250 r_size
= talloc_get_size(r
);
252 m2
= talloc_realloc_size(mem_ctx
, m
, m_size
+ r_size
);
258 memcpy(m_size
+ (uint8_t *)m2
, r
, r_size
);
267 /* we've finished marshalling, return a data blob with the marshalled records */
268 TDB_DATA
ctdb_marshall_finish(struct ctdb_marshall_buffer
*m
)
271 data
.dptr
= (uint8_t *)m
;
272 data
.dsize
= talloc_get_size(m
);
277 loop over a marshalling buffer
279 - pass r==NULL to start
280 - loop the number of times indicated by m->count
282 struct ctdb_rec_data
*ctdb_marshall_loop_next(struct ctdb_marshall_buffer
*m
, struct ctdb_rec_data
*r
,
284 struct ctdb_ltdb_header
*header
,
285 TDB_DATA
*key
, TDB_DATA
*data
)
288 r
= (struct ctdb_rec_data
*)&m
->data
[0];
290 r
= (struct ctdb_rec_data
*)(r
->length
+ (uint8_t *)r
);
298 key
->dptr
= &r
->data
[0];
299 key
->dsize
= r
->keylen
;
302 data
->dptr
= &r
->data
[r
->keylen
];
303 data
->dsize
= r
->datalen
;
304 if (header
!= NULL
) {
305 data
->dptr
+= sizeof(*header
);
306 data
->dsize
-= sizeof(*header
);
310 if (header
!= NULL
) {
311 if (r
->datalen
< sizeof(*header
)) {
314 *header
= *(struct ctdb_ltdb_header
*)&r
->data
[r
->keylen
];
326 #include <procinfo.h>
330 if possible, make this task real time
332 void ctdb_set_scheduler(struct ctdb_context
*ctdb
)
335 #if HAVE_THREAD_SETSCHED
336 struct thrdentry64 te
;
340 if (getthrds64(getpid(), &te
, sizeof(te
), &ti
, 1) != 1) {
341 DEBUG(DEBUG_ERR
, ("Unable to get thread information\n"));
345 if (thread_setsched(te
.ti_tid
, 0, SCHED_RR
) == -1) {
346 DEBUG(DEBUG_ERR
, ("Unable to set scheduler to SCHED_RR (%s)\n",
349 DEBUG(DEBUG_NOTICE
, ("Set scheduler to SCHED_RR\n"));
353 #if HAVE_SCHED_SETSCHEDULER
354 struct sched_param p
;
356 p
.sched_priority
= 1;
358 if (sched_setscheduler(0, SCHED_FIFO
, &p
) == -1) {
359 DEBUG(DEBUG_CRIT
,("Unable to set scheduler to SCHED_FIFO (%s)\n",
362 DEBUG(DEBUG_NOTICE
,("Set scheduler to SCHED_FIFO\n"));
369 restore previous scheduler parameters
371 void ctdb_restore_scheduler(struct ctdb_context
*ctdb
)
374 #if HAVE_THREAD_SETSCHED
375 struct thrdentry64 te
;
379 if (getthrds64(getpid(), &te
, sizeof(te
), &ti
, 1) != 1) {
380 ctdb_fatal(ctdb
, "Unable to get thread information\n");
382 if (thread_setsched(te
.ti_tid
, 0, SCHED_OTHER
) == -1) {
383 ctdb_fatal(ctdb
, "Unable to set scheduler to SCHED_OTHER\n");
387 #if HAVE_SCHED_SETSCHEDULER
388 struct sched_param p
;
390 p
.sched_priority
= 0;
391 if (sched_setscheduler(0, SCHED_OTHER
, &p
) == -1) {
392 ctdb_fatal(ctdb
, "Unable to set scheduler to SCHED_OTHER\n");
398 void set_nonblocking(int fd
)
402 v
= fcntl(fd
, F_GETFL
, 0);
404 DEBUG(DEBUG_WARNING
, ("Failed to get file status flags - %s\n",
408 if (fcntl(fd
, F_SETFL
, v
| O_NONBLOCK
) == -1) {
409 DEBUG(DEBUG_WARNING
, ("Failed to set non_blocking on fd - %s\n",
414 void set_close_on_exec(int fd
)
418 v
= fcntl(fd
, F_GETFD
, 0);
420 DEBUG(DEBUG_WARNING
, ("Failed to get file descriptor flags - %s\n",
424 if (fcntl(fd
, F_SETFD
, v
| FD_CLOEXEC
) != 0) {
425 DEBUG(DEBUG_WARNING
, ("Failed to set close_on_exec on fd - %s\n",
431 bool parse_ipv4(const char *s
, unsigned port
, struct sockaddr_in
*sin
)
433 sin
->sin_family
= AF_INET
;
434 sin
->sin_port
= htons(port
);
436 if (inet_pton(AF_INET
, s
, &sin
->sin_addr
) != 1) {
437 DEBUG(DEBUG_ERR
, (__location__
" Failed to translate %s into sin_addr\n", s
));
444 static bool parse_ipv6(const char *s
, const char *ifaces
, unsigned port
, ctdb_sock_addr
*saddr
)
446 saddr
->ip6
.sin6_family
= AF_INET6
;
447 saddr
->ip6
.sin6_port
= htons(port
);
448 saddr
->ip6
.sin6_flowinfo
= 0;
449 saddr
->ip6
.sin6_scope_id
= 0;
451 if (inet_pton(AF_INET6
, s
, &saddr
->ip6
.sin6_addr
) != 1) {
452 DEBUG(DEBUG_ERR
, (__location__
" Failed to translate %s into sin6_addr\n", s
));
456 if (ifaces
&& IN6_IS_ADDR_LINKLOCAL(&saddr
->ip6
.sin6_addr
)) {
457 if (strchr(ifaces
, ',')) {
458 DEBUG(DEBUG_ERR
, (__location__
" Link local address %s "
459 "is specified for multiple ifaces %s\n",
463 saddr
->ip6
.sin6_scope_id
= if_nametoindex(ifaces
);
471 bool parse_ip_port(const char *addr
, ctdb_sock_addr
*saddr
)
473 TALLOC_CTX
*tmp_ctx
= talloc_new(NULL
);
479 s
= talloc_strdup(tmp_ctx
, addr
);
481 DEBUG(DEBUG_ERR
, (__location__
" Failed strdup()\n"));
482 talloc_free(tmp_ctx
);
488 DEBUG(DEBUG_ERR
, (__location__
" This addr: %s does not contain a port number\n", s
));
489 talloc_free(tmp_ctx
);
493 port
= strtoul(p
+1, &endp
, 10);
494 if (endp
== NULL
|| *endp
!= 0) {
495 /* trailing garbage */
496 DEBUG(DEBUG_ERR
, (__location__
" Trailing garbage after the port in %s\n", s
));
497 talloc_free(tmp_ctx
);
503 /* now is this a ipv4 or ipv6 address ?*/
504 ret
= parse_ip(s
, NULL
, port
, saddr
);
506 talloc_free(tmp_ctx
);
513 bool parse_ip(const char *addr
, const char *ifaces
, unsigned port
, ctdb_sock_addr
*saddr
)
518 ZERO_STRUCTP(saddr
); /* valgrind :-) */
520 /* now is this a ipv4 or ipv6 address ?*/
521 p
= index(addr
, ':');
523 ret
= parse_ipv4(addr
, port
, &saddr
->ip
);
525 ret
= parse_ipv6(addr
, ifaces
, port
, saddr
);
534 bool parse_ip_mask(const char *str
, const char *ifaces
, ctdb_sock_addr
*addr
, unsigned *mask
)
536 TALLOC_CTX
*tmp_ctx
= talloc_new(NULL
);
542 s
= talloc_strdup(tmp_ctx
, str
);
544 DEBUG(DEBUG_ERR
, (__location__
" Failed strdup()\n"));
545 talloc_free(tmp_ctx
);
551 DEBUG(DEBUG_ERR
, (__location__
" This addr: %s does not contain a mask\n", s
));
552 talloc_free(tmp_ctx
);
556 *mask
= strtoul(p
+1, &endp
, 10);
557 if (endp
== NULL
|| *endp
!= 0) {
558 /* trailing garbage */
559 DEBUG(DEBUG_ERR
, (__location__
" Trailing garbage after the mask in %s\n", s
));
560 talloc_free(tmp_ctx
);
566 /* now is this a ipv4 or ipv6 address ?*/
567 ret
= parse_ip(s
, ifaces
, 0, addr
);
569 talloc_free(tmp_ctx
);
574 This is used to canonicalize a ctdb_sock_addr structure.
576 void ctdb_canonicalize_ip(const ctdb_sock_addr
*ip
, ctdb_sock_addr
*cip
)
578 char prefix
[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff };
580 memcpy(cip
, ip
, sizeof (*cip
));
582 if ( (ip
->sa
.sa_family
== AF_INET6
)
583 && !memcmp(&ip
->ip6
.sin6_addr
, prefix
, 12)) {
584 memset(cip
, 0, sizeof(*cip
));
585 #ifdef HAVE_SOCK_SIN_LEN
586 cip
->ip
.sin_len
= sizeof(*cip
);
588 cip
->ip
.sin_family
= AF_INET
;
589 cip
->ip
.sin_port
= ip
->ip6
.sin6_port
;
590 memcpy(&cip
->ip
.sin_addr
, &ip
->ip6
.sin6_addr
.s6_addr
[12], 4);
594 bool ctdb_same_ip(const ctdb_sock_addr
*tip1
, const ctdb_sock_addr
*tip2
)
596 ctdb_sock_addr ip1
, ip2
;
598 ctdb_canonicalize_ip(tip1
, &ip1
);
599 ctdb_canonicalize_ip(tip2
, &ip2
);
601 if (ip1
.sa
.sa_family
!= ip2
.sa
.sa_family
) {
605 switch (ip1
.sa
.sa_family
) {
607 return ip1
.ip
.sin_addr
.s_addr
== ip2
.ip
.sin_addr
.s_addr
;
609 return !memcmp(&ip1
.ip6
.sin6_addr
.s6_addr
[0],
610 &ip2
.ip6
.sin6_addr
.s6_addr
[0],
613 DEBUG(DEBUG_ERR
, (__location__
" CRITICAL Can not compare sockaddr structures of type %u\n", ip1
.sa
.sa_family
));
621 compare two ctdb_sock_addr structures
623 bool ctdb_same_sockaddr(const ctdb_sock_addr
*ip1
, const ctdb_sock_addr
*ip2
)
625 return ctdb_same_ip(ip1
, ip2
) && ip1
->ip
.sin_port
== ip2
->ip
.sin_port
;
628 char *ctdb_addr_to_str(ctdb_sock_addr
*addr
)
630 static char cip
[128] = "";
632 switch (addr
->sa
.sa_family
) {
634 inet_ntop(addr
->ip
.sin_family
, &addr
->ip
.sin_addr
, cip
, sizeof(cip
));
637 inet_ntop(addr
->ip6
.sin6_family
, &addr
->ip6
.sin6_addr
, cip
, sizeof(cip
));
640 DEBUG(DEBUG_ERR
, (__location__
" ERROR, unknown family %u\n", addr
->sa
.sa_family
));
641 ctdb_external_trace();
647 unsigned ctdb_addr_to_port(ctdb_sock_addr
*addr
)
649 switch (addr
->sa
.sa_family
) {
651 return ntohs(addr
->ip
.sin_port
);
654 return ntohs(addr
->ip6
.sin6_port
);
657 DEBUG(DEBUG_ERR
, (__location__
" ERROR, unknown family %u\n", addr
->sa
.sa_family
));
663 void ctdb_block_signal(int signum
)
667 sigaddset(&set
,signum
);
668 sigprocmask(SIG_BLOCK
,&set
,NULL
);
671 void ctdb_unblock_signal(int signum
)
675 sigaddset(&set
,signum
);
676 sigprocmask(SIG_UNBLOCK
,&set
,NULL
);
679 struct debug_levels debug_levels
[] = {
680 {DEBUG_EMERG
, "EMERG"},
681 {DEBUG_ALERT
, "ALERT"},
682 {DEBUG_CRIT
, "CRIT"},
684 {DEBUG_WARNING
, "WARNING"},
685 {DEBUG_NOTICE
, "NOTICE"},
686 {DEBUG_INFO
, "INFO"},
687 {DEBUG_DEBUG
, "DEBUG"},
691 const char *get_debug_by_level(int32_t level
)
695 for (i
=0; debug_levels
[i
].description
!= NULL
; i
++) {
696 if (debug_levels
[i
].level
== level
) {
697 return debug_levels
[i
].description
;
703 int32_t get_debug_by_desc(const char *desc
)
707 for (i
=0; debug_levels
[i
].description
!= NULL
; i
++) {
708 if (!strcasecmp(debug_levels
[i
].description
, desc
)) {
709 return debug_levels
[i
].level
;
716 /* we don't lock future pages here; it would increase the chance that
717 * we'd fail to mmap later on. */
718 void ctdb_lockdown_memory(struct ctdb_context
*ctdb
)
720 #if defined(HAVE_MLOCKALL) && !defined(_AIX_)
721 /* Extra stack, please! */
723 memset(dummy
, 0, sizeof(dummy
));
725 if (ctdb
->valgrinding
) {
729 /* Ignore when running in local daemons mode */
734 /* Avoid compiler optimizing out dummy. */
735 mlock(dummy
, sizeof(dummy
));
736 if (mlockall(MCL_CURRENT
) != 0) {
737 DEBUG(DEBUG_WARNING
,("Failed to lockdown memory: %s'\n",
743 const char *ctdb_eventscript_call_names
[] = {
760 /* Runstate handling */
762 enum ctdb_runstate runstate
;
765 { CTDB_RUNSTATE_UNKNOWN
, "UNKNOWN" },
766 { CTDB_RUNSTATE_INIT
, "INIT" },
767 { CTDB_RUNSTATE_SETUP
, "SETUP" },
768 { CTDB_RUNSTATE_FIRST_RECOVERY
, "FIRST_RECOVERY" },
769 { CTDB_RUNSTATE_STARTUP
, "STARTUP" },
770 { CTDB_RUNSTATE_RUNNING
, "RUNNING" },
771 { CTDB_RUNSTATE_SHUTDOWN
, "SHUTDOWN" },
775 const char *runstate_to_string(enum ctdb_runstate runstate
)
778 for (i
=0; runstate_map
[i
].label
!= NULL
; i
++) {
779 if (runstate_map
[i
].runstate
== runstate
) {
780 return runstate_map
[i
].label
;
784 return runstate_map
[0].label
;
787 enum ctdb_runstate
runstate_from_string(const char *label
)
790 for (i
=0; runstate_map
[i
].label
!= NULL
; i
++) {
791 if (strcasecmp(runstate_map
[i
].label
, label
) == 0) {
792 return runstate_map
[i
].runstate
;
796 return CTDB_RUNSTATE_UNKNOWN
;
799 void ctdb_set_runstate(struct ctdb_context
*ctdb
, enum ctdb_runstate runstate
)
801 if (runstate
<= ctdb
->runstate
) {
802 ctdb_fatal(ctdb
, "runstate must always increase");
805 DEBUG(DEBUG_NOTICE
,("Set runstate to %s (%d)\n",
806 runstate_to_string(runstate
), runstate
));
807 ctdb
->runstate
= runstate
;
810 void ctdb_mkdir_p_or_die(struct ctdb_context
*ctdb
, const char *dir
, int mode
)
814 ret
= mkdir_p(dir
, mode
);
817 ("ctdb exiting with error: "
818 "failed to create directory \"%s\" (%s)\n",
819 dir
, strerror(errno
)));