4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "system/shmem.h"
26 #include "../include/ctdb_private.h"
28 int LogLevel
= DEBUG_NOTICE
;
29 int this_log_level
= 0;
32 return error string for last error
34 const char *ctdb_errstr(struct ctdb_context
*ctdb
)
41 remember an error message
43 void ctdb_set_error(struct ctdb_context
*ctdb
, const char *fmt
, ...)
46 talloc_free(ctdb
->err_msg
);
48 ctdb
->err_msg
= talloc_vasprintf(ctdb
, fmt
, ap
);
49 DEBUG(DEBUG_ERR
,("ctdb error: %s\n", ctdb
->err_msg
));
54 a fatal internal error occurred - no hope for recovery
56 void ctdb_fatal(struct ctdb_context
*ctdb
, const char *msg
)
58 DEBUG(DEBUG_ALERT
,("ctdb fatal error: %s\n", msg
));
63 like ctdb_fatal() but a core/backtrace would not be useful
65 void ctdb_die(struct ctdb_context
*ctdb
, const char *msg
)
67 DEBUG(DEBUG_ALERT
,("ctdb exiting with error: %s\n", msg
));
71 /* Invoke an external program to do some sort of tracing on the CTDB
72 * process. This might block for a little while. The external
73 * program is specified by the environment variable
74 * CTDB_EXTERNAL_TRACE. This program should take one argument: the
75 * pid of the process to trace. Commonly, the program would be a
76 * wrapper script around gcore.
78 void ctdb_external_trace(void)
81 const char * t
= getenv("CTDB_EXTERNAL_TRACE");
88 cmd
= talloc_asprintf(NULL
, "%s %lu", t
, (unsigned long) getpid());
89 DEBUG(DEBUG_WARNING
,("begin external trace: %s\n", cmd
));
91 DEBUG(DEBUG_WARNING
,("end external trace: %s\n", cmd
));
98 int ctdb_parse_address(struct ctdb_context
*ctdb
,
99 TALLOC_CTX
*mem_ctx
, const char *str
,
100 struct ctdb_address
*address
)
105 se
= getservbyname("ctdb", "tcp");
108 address
->address
= talloc_strdup(mem_ctx
, str
);
109 CTDB_NO_MEMORY(ctdb
, address
->address
);
112 address
->port
= CTDB_PORT
;
114 address
->port
= ntohs(se
->s_port
);
121 check if two addresses are the same
123 bool ctdb_same_address(struct ctdb_address
*a1
, struct ctdb_address
*a2
)
125 return strcmp(a1
->address
, a2
->address
) == 0 && a1
->port
== a2
->port
;
130 hash function for mapping data to a VNN - taken from tdb
132 uint32_t ctdb_hash(const TDB_DATA
*key
)
134 return tdb_jenkins_hash(discard_const(key
));
138 a type checking varient of idr_find
140 static void *_idr_find_type(struct idr_context
*idp
, int id
, const char *type
, const char *location
)
142 void *p
= idr_find(idp
, id
);
143 if (p
&& talloc_check_name(p
, type
) == NULL
) {
144 DEBUG(DEBUG_ERR
,("%s idr_find_type expected type %s but got %s\n",
145 location
, type
, talloc_get_name(p
)));
151 uint32_t ctdb_reqid_new(struct ctdb_context
*ctdb
, void *state
)
153 int id
= idr_get_new_above(ctdb
->idr
, state
, ctdb
->lastid
+1, INT_MAX
);
155 DEBUG(DEBUG_DEBUG
, ("Reqid wrap!\n"));
156 id
= idr_get_new(ctdb
->idr
, state
, INT_MAX
);
162 void *_ctdb_reqid_find(struct ctdb_context
*ctdb
, uint32_t reqid
, const char *type
, const char *location
)
166 p
= _idr_find_type(ctdb
->idr
, reqid
, type
, location
);
168 DEBUG(DEBUG_WARNING
, ("Could not find idr:%u\n",reqid
));
175 void ctdb_reqid_remove(struct ctdb_context
*ctdb
, uint32_t reqid
)
179 ret
= idr_remove(ctdb
->idr
, reqid
);
181 DEBUG(DEBUG_ERR
, ("Removing idr that does not exist\n"));
187 form a ctdb_rec_data record from a key/data pair
189 note that header may be NULL. If not NULL then it is included in the data portion
192 struct ctdb_rec_data
*ctdb_marshall_record(TALLOC_CTX
*mem_ctx
, uint32_t reqid
,
194 struct ctdb_ltdb_header
*header
,
198 struct ctdb_rec_data
*d
;
200 length
= offsetof(struct ctdb_rec_data
, data
) + key
.dsize
+
201 data
.dsize
+ (header
?sizeof(*header
):0);
202 d
= (struct ctdb_rec_data
*)talloc_size(mem_ctx
, length
);
208 d
->keylen
= key
.dsize
;
209 memcpy(&d
->data
[0], key
.dptr
, key
.dsize
);
211 d
->datalen
= data
.dsize
+ sizeof(*header
);
212 memcpy(&d
->data
[key
.dsize
], header
, sizeof(*header
));
213 memcpy(&d
->data
[key
.dsize
+sizeof(*header
)], data
.dptr
, data
.dsize
);
215 d
->datalen
= data
.dsize
;
216 memcpy(&d
->data
[key
.dsize
], data
.dptr
, data
.dsize
);
222 /* helper function for marshalling multiple records */
223 struct ctdb_marshall_buffer
*ctdb_marshall_add(TALLOC_CTX
*mem_ctx
,
224 struct ctdb_marshall_buffer
*m
,
228 struct ctdb_ltdb_header
*header
,
231 struct ctdb_rec_data
*r
;
232 size_t m_size
, r_size
;
233 struct ctdb_marshall_buffer
*m2
;
235 r
= ctdb_marshall_record(mem_ctx
, reqid
, key
, header
, data
);
242 m
= talloc_zero_size(mem_ctx
, offsetof(struct ctdb_marshall_buffer
, data
));
249 m_size
= talloc_get_size(m
);
250 r_size
= talloc_get_size(r
);
252 m2
= talloc_realloc_size(mem_ctx
, m
, m_size
+ r_size
);
258 memcpy(m_size
+ (uint8_t *)m2
, r
, r_size
);
267 /* we've finished marshalling, return a data blob with the marshalled records */
268 TDB_DATA
ctdb_marshall_finish(struct ctdb_marshall_buffer
*m
)
271 data
.dptr
= (uint8_t *)m
;
272 data
.dsize
= talloc_get_size(m
);
277 loop over a marshalling buffer
279 - pass r==NULL to start
280 - loop the number of times indicated by m->count
282 struct ctdb_rec_data
*ctdb_marshall_loop_next(struct ctdb_marshall_buffer
*m
, struct ctdb_rec_data
*r
,
284 struct ctdb_ltdb_header
*header
,
285 TDB_DATA
*key
, TDB_DATA
*data
)
288 r
= (struct ctdb_rec_data
*)&m
->data
[0];
290 r
= (struct ctdb_rec_data
*)(r
->length
+ (uint8_t *)r
);
298 key
->dptr
= &r
->data
[0];
299 key
->dsize
= r
->keylen
;
302 data
->dptr
= &r
->data
[r
->keylen
];
303 data
->dsize
= r
->datalen
;
304 if (header
!= NULL
) {
305 data
->dptr
+= sizeof(*header
);
306 data
->dsize
-= sizeof(*header
);
310 if (header
!= NULL
) {
311 if (r
->datalen
< sizeof(*header
)) {
314 *header
= *(struct ctdb_ltdb_header
*)&r
->data
[r
->keylen
];
326 #include <procinfo.h>
330 if possible, make this task real time
332 void ctdb_set_scheduler(struct ctdb_context
*ctdb
)
335 #if HAVE_THREAD_SETSCHED
336 struct thrdentry64 te
;
340 if (getthrds64(getpid(), &te
, sizeof(te
), &ti
, 1) != 1) {
341 DEBUG(DEBUG_ERR
, ("Unable to get thread information\n"));
345 if (ctdb
->saved_scheduler_param
== NULL
) {
346 ctdb
->saved_scheduler_param
= talloc_size(ctdb
, sizeof(te
));
348 *(struct thrdentry64
*)ctdb
->saved_scheduler_param
= te
;
350 if (thread_setsched(te
.ti_tid
, 0, SCHED_RR
) == -1) {
351 DEBUG(DEBUG_ERR
, ("Unable to set scheduler to SCHED_RR (%s)\n",
354 DEBUG(DEBUG_NOTICE
, ("Set scheduler to SCHED_RR\n"));
358 #if HAVE_SCHED_SETSCHEDULER
359 struct sched_param p
;
360 if (ctdb
->saved_scheduler_param
== NULL
) {
361 ctdb
->saved_scheduler_param
= talloc_size(ctdb
, sizeof(p
));
364 if (sched_getparam(0, (struct sched_param
*)ctdb
->saved_scheduler_param
) == -1) {
365 DEBUG(DEBUG_ERR
,("Unable to get old scheduler params\n"));
369 p
= *(struct sched_param
*)ctdb
->saved_scheduler_param
;
370 p
.sched_priority
= 1;
372 if (sched_setscheduler(0, SCHED_FIFO
, &p
) == -1) {
373 DEBUG(DEBUG_CRIT
,("Unable to set scheduler to SCHED_FIFO (%s)\n",
376 DEBUG(DEBUG_NOTICE
,("Set scheduler to SCHED_FIFO\n"));
383 restore previous scheduler parameters
385 void ctdb_restore_scheduler(struct ctdb_context
*ctdb
)
388 #if HAVE_THREAD_SETSCHED
389 struct thrdentry64 te
, *saved
;
393 if (getthrds64(getpid(), &te
, sizeof(te
), &ti
, 1) != 1) {
394 ctdb_fatal(ctdb
, "Unable to get thread information\n");
396 if (ctdb
->saved_scheduler_param
== NULL
) {
397 ctdb_fatal(ctdb
, "No saved scheduler parameters\n");
399 saved
= (struct thrdentry64
*)ctdb
->saved_scheduler_param
;
400 if (thread_setsched(te
.ti_tid
, saved
->ti_pri
, saved
->ti_policy
) == -1) {
401 ctdb_fatal(ctdb
, "Unable to restore old scheduler parameters\n");
405 #if HAVE_SCHED_SETSCHEDULER
406 if (ctdb
->saved_scheduler_param
== NULL
) {
407 ctdb_fatal(ctdb
, "No saved scheduler parameters\n");
409 if (sched_setscheduler(0, SCHED_OTHER
, (struct sched_param
*)ctdb
->saved_scheduler_param
) == -1) {
410 ctdb_fatal(ctdb
, "Unable to restore old scheduler parameters\n");
416 void set_nonblocking(int fd
)
420 v
= fcntl(fd
, F_GETFL
, 0);
422 DEBUG(DEBUG_WARNING
, ("Failed to get file status flags - %s\n",
426 if (fcntl(fd
, F_SETFL
, v
| O_NONBLOCK
) == -1) {
427 DEBUG(DEBUG_WARNING
, ("Failed to set non_blocking on fd - %s\n",
432 void set_close_on_exec(int fd
)
436 v
= fcntl(fd
, F_GETFD
, 0);
438 DEBUG(DEBUG_WARNING
, ("Failed to get file descriptor flags - %s\n",
442 if (fcntl(fd
, F_SETFD
, v
| FD_CLOEXEC
) != 0) {
443 DEBUG(DEBUG_WARNING
, ("Failed to set close_on_exec on fd - %s\n",
449 bool parse_ipv4(const char *s
, unsigned port
, struct sockaddr_in
*sin
)
451 sin
->sin_family
= AF_INET
;
452 sin
->sin_port
= htons(port
);
454 if (inet_pton(AF_INET
, s
, &sin
->sin_addr
) != 1) {
455 DEBUG(DEBUG_ERR
, (__location__
" Failed to translate %s into sin_addr\n", s
));
462 static bool parse_ipv6(const char *s
, const char *ifaces
, unsigned port
, ctdb_sock_addr
*saddr
)
464 saddr
->ip6
.sin6_family
= AF_INET6
;
465 saddr
->ip6
.sin6_port
= htons(port
);
466 saddr
->ip6
.sin6_flowinfo
= 0;
467 saddr
->ip6
.sin6_scope_id
= 0;
469 if (inet_pton(AF_INET6
, s
, &saddr
->ip6
.sin6_addr
) != 1) {
470 DEBUG(DEBUG_ERR
, (__location__
" Failed to translate %s into sin6_addr\n", s
));
474 if (ifaces
&& IN6_IS_ADDR_LINKLOCAL(&saddr
->ip6
.sin6_addr
)) {
475 if (strchr(ifaces
, ',')) {
476 DEBUG(DEBUG_ERR
, (__location__
" Link local address %s "
477 "is specified for multiple ifaces %s\n",
481 saddr
->ip6
.sin6_scope_id
= if_nametoindex(ifaces
);
489 bool parse_ip_port(const char *addr
, ctdb_sock_addr
*saddr
)
491 TALLOC_CTX
*tmp_ctx
= talloc_new(NULL
);
497 s
= talloc_strdup(tmp_ctx
, addr
);
499 DEBUG(DEBUG_ERR
, (__location__
" Failed strdup()\n"));
500 talloc_free(tmp_ctx
);
506 DEBUG(DEBUG_ERR
, (__location__
" This addr: %s does not contain a port number\n", s
));
507 talloc_free(tmp_ctx
);
511 port
= strtoul(p
+1, &endp
, 10);
512 if (endp
== NULL
|| *endp
!= 0) {
513 /* trailing garbage */
514 DEBUG(DEBUG_ERR
, (__location__
" Trailing garbage after the port in %s\n", s
));
515 talloc_free(tmp_ctx
);
521 /* now is this a ipv4 or ipv6 address ?*/
522 ret
= parse_ip(s
, NULL
, port
, saddr
);
524 talloc_free(tmp_ctx
);
531 bool parse_ip(const char *addr
, const char *ifaces
, unsigned port
, ctdb_sock_addr
*saddr
)
536 ZERO_STRUCTP(saddr
); /* valgrind :-) */
538 /* now is this a ipv4 or ipv6 address ?*/
539 p
= index(addr
, ':');
541 ret
= parse_ipv4(addr
, port
, &saddr
->ip
);
543 ret
= parse_ipv6(addr
, ifaces
, port
, saddr
);
552 bool parse_ip_mask(const char *str
, const char *ifaces
, ctdb_sock_addr
*addr
, unsigned *mask
)
554 TALLOC_CTX
*tmp_ctx
= talloc_new(NULL
);
560 s
= talloc_strdup(tmp_ctx
, str
);
562 DEBUG(DEBUG_ERR
, (__location__
" Failed strdup()\n"));
563 talloc_free(tmp_ctx
);
569 DEBUG(DEBUG_ERR
, (__location__
" This addr: %s does not contain a mask\n", s
));
570 talloc_free(tmp_ctx
);
574 *mask
= strtoul(p
+1, &endp
, 10);
575 if (endp
== NULL
|| *endp
!= 0) {
576 /* trailing garbage */
577 DEBUG(DEBUG_ERR
, (__location__
" Trailing garbage after the mask in %s\n", s
));
578 talloc_free(tmp_ctx
);
584 /* now is this a ipv4 or ipv6 address ?*/
585 ret
= parse_ip(s
, ifaces
, 0, addr
);
587 talloc_free(tmp_ctx
);
592 This is used to canonicalize a ctdb_sock_addr structure.
594 void ctdb_canonicalize_ip(const ctdb_sock_addr
*ip
, ctdb_sock_addr
*cip
)
596 char prefix
[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff };
598 memcpy(cip
, ip
, sizeof (*cip
));
600 if ( (ip
->sa
.sa_family
== AF_INET6
)
601 && !memcmp(&ip
->ip6
.sin6_addr
, prefix
, 12)) {
602 memset(cip
, 0, sizeof(*cip
));
603 #ifdef HAVE_SOCK_SIN_LEN
604 cip
->ip
.sin_len
= sizeof(*cip
);
606 cip
->ip
.sin_family
= AF_INET
;
607 cip
->ip
.sin_port
= ip
->ip6
.sin6_port
;
608 memcpy(&cip
->ip
.sin_addr
, &ip
->ip6
.sin6_addr
.s6_addr
[12], 4);
612 bool ctdb_same_ip(const ctdb_sock_addr
*tip1
, const ctdb_sock_addr
*tip2
)
614 ctdb_sock_addr ip1
, ip2
;
616 ctdb_canonicalize_ip(tip1
, &ip1
);
617 ctdb_canonicalize_ip(tip2
, &ip2
);
619 if (ip1
.sa
.sa_family
!= ip2
.sa
.sa_family
) {
623 switch (ip1
.sa
.sa_family
) {
625 return ip1
.ip
.sin_addr
.s_addr
== ip2
.ip
.sin_addr
.s_addr
;
627 return !memcmp(&ip1
.ip6
.sin6_addr
.s6_addr
[0],
628 &ip2
.ip6
.sin6_addr
.s6_addr
[0],
631 DEBUG(DEBUG_ERR
, (__location__
" CRITICAL Can not compare sockaddr structures of type %u\n", ip1
.sa
.sa_family
));
639 compare two ctdb_sock_addr structures
641 bool ctdb_same_sockaddr(const ctdb_sock_addr
*ip1
, const ctdb_sock_addr
*ip2
)
643 return ctdb_same_ip(ip1
, ip2
) && ip1
->ip
.sin_port
== ip2
->ip
.sin_port
;
646 char *ctdb_addr_to_str(ctdb_sock_addr
*addr
)
648 static char cip
[128] = "";
650 switch (addr
->sa
.sa_family
) {
652 inet_ntop(addr
->ip
.sin_family
, &addr
->ip
.sin_addr
, cip
, sizeof(cip
));
655 inet_ntop(addr
->ip6
.sin6_family
, &addr
->ip6
.sin6_addr
, cip
, sizeof(cip
));
658 DEBUG(DEBUG_ERR
, (__location__
" ERROR, unknown family %u\n", addr
->sa
.sa_family
));
659 ctdb_external_trace();
665 unsigned ctdb_addr_to_port(ctdb_sock_addr
*addr
)
667 switch (addr
->sa
.sa_family
) {
669 return ntohs(addr
->ip
.sin_port
);
672 return ntohs(addr
->ip6
.sin6_port
);
675 DEBUG(DEBUG_ERR
, (__location__
" ERROR, unknown family %u\n", addr
->sa
.sa_family
));
681 void ctdb_block_signal(int signum
)
685 sigaddset(&set
,signum
);
686 sigprocmask(SIG_BLOCK
,&set
,NULL
);
689 void ctdb_unblock_signal(int signum
)
693 sigaddset(&set
,signum
);
694 sigprocmask(SIG_UNBLOCK
,&set
,NULL
);
697 struct debug_levels debug_levels
[] = {
698 {DEBUG_EMERG
, "EMERG"},
699 {DEBUG_ALERT
, "ALERT"},
700 {DEBUG_CRIT
, "CRIT"},
702 {DEBUG_WARNING
, "WARNING"},
703 {DEBUG_NOTICE
, "NOTICE"},
704 {DEBUG_INFO
, "INFO"},
705 {DEBUG_DEBUG
, "DEBUG"},
709 const char *get_debug_by_level(int32_t level
)
713 for (i
=0; debug_levels
[i
].description
!= NULL
; i
++) {
714 if (debug_levels
[i
].level
== level
) {
715 return debug_levels
[i
].description
;
721 int32_t get_debug_by_desc(const char *desc
)
725 for (i
=0; debug_levels
[i
].description
!= NULL
; i
++) {
726 if (!strcasecmp(debug_levels
[i
].description
, desc
)) {
727 return debug_levels
[i
].level
;
734 /* we don't lock future pages here; it would increase the chance that
735 * we'd fail to mmap later on. */
736 void ctdb_lockdown_memory(struct ctdb_context
*ctdb
)
738 #if defined(HAVE_MLOCKALL) && !defined(_AIX_)
739 /* Extra stack, please! */
741 memset(dummy
, 0, sizeof(dummy
));
743 if (ctdb
->valgrinding
) {
747 /* Ignore when running in local daemons mode */
752 /* Avoid compiler optimizing out dummy. */
753 mlock(dummy
, sizeof(dummy
));
754 if (mlockall(MCL_CURRENT
) != 0) {
755 DEBUG(DEBUG_WARNING
,("Failed to lockdown memory: %s'\n",
761 const char *ctdb_eventscript_call_names
[] = {
778 /* Runstate handling */
780 enum ctdb_runstate runstate
;
783 { CTDB_RUNSTATE_UNKNOWN
, "UNKNOWN" },
784 { CTDB_RUNSTATE_INIT
, "INIT" },
785 { CTDB_RUNSTATE_SETUP
, "SETUP" },
786 { CTDB_RUNSTATE_FIRST_RECOVERY
, "FIRST_RECOVERY" },
787 { CTDB_RUNSTATE_STARTUP
, "STARTUP" },
788 { CTDB_RUNSTATE_RUNNING
, "RUNNING" },
789 { CTDB_RUNSTATE_SHUTDOWN
, "SHUTDOWN" },
793 const char *runstate_to_string(enum ctdb_runstate runstate
)
796 for (i
=0; runstate_map
[i
].label
!= NULL
; i
++) {
797 if (runstate_map
[i
].runstate
== runstate
) {
798 return runstate_map
[i
].label
;
802 return runstate_map
[0].label
;
805 enum ctdb_runstate
runstate_from_string(const char *label
)
808 for (i
=0; runstate_map
[i
].label
!= NULL
; i
++) {
809 if (strcasecmp(runstate_map
[i
].label
, label
) == 0) {
810 return runstate_map
[i
].runstate
;
814 return CTDB_RUNSTATE_UNKNOWN
;
817 void ctdb_set_runstate(struct ctdb_context
*ctdb
, enum ctdb_runstate runstate
)
819 if (runstate
<= ctdb
->runstate
) {
820 ctdb_fatal(ctdb
, "runstate must always increase");
823 DEBUG(DEBUG_NOTICE
,("Set runstate to %s (%d)\n",
824 runstate_to_string(runstate
), runstate
));
825 ctdb
->runstate
= runstate
;
828 void ctdb_mkdir_p_or_die(struct ctdb_context
*ctdb
, const char *dir
, int mode
)
832 ret
= mkdir_p(dir
, mode
);
835 ("ctdb exiting with error: "
836 "failed to create directory \"%s\" (%s)\n",
837 dir
, strerror(errno
)));