4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 #include "system/network.h"
27 #include "lib/util/debug.h"
29 #include "common/logging.h"
30 #include "common/rb_tree.h"
32 #include "protocol/protocol_api.h"
34 #include "server/ipalloc_private.h"
36 /* Initialise main ipalloc state and sub-structures */
37 struct ipalloc_state
*
38 ipalloc_state_init(TALLOC_CTX
*mem_ctx
,
40 enum ipalloc_algorithm algorithm
,
43 bool no_ip_host_on_all_disabled
,
44 uint32_t *force_rebalance_nodes
)
46 struct ipalloc_state
*ipalloc_state
=
47 talloc_zero(mem_ctx
, struct ipalloc_state
);
48 if (ipalloc_state
== NULL
) {
49 DEBUG(DEBUG_ERR
, (__location__
" Out of memory\n"));
53 ipalloc_state
->num
= num_nodes
;
55 ipalloc_state
->noiphost
= bitmap_talloc(ipalloc_state
,
57 if (ipalloc_state
->noiphost
== NULL
) {
58 DEBUG(DEBUG_ERR
, (__location__
" Out of memory\n"));
62 ipalloc_state
->algorithm
= algorithm
;
63 ipalloc_state
->no_ip_takeover
= no_ip_takeover
;
64 ipalloc_state
->no_ip_failback
= no_ip_failback
;
65 ipalloc_state
->no_ip_host_on_all_disabled
= no_ip_host_on_all_disabled
;
66 ipalloc_state
->force_rebalance_nodes
= force_rebalance_nodes
;
70 talloc_free(ipalloc_state
);
74 static void *add_ip_callback(void *parm
, void *data
)
76 struct public_ip_list
*this_ip
= parm
;
77 struct public_ip_list
*prev_ip
= data
;
79 if (prev_ip
== NULL
) {
82 if (this_ip
->pnn
== -1) {
83 this_ip
->pnn
= prev_ip
->pnn
;
89 static int getips_count_callback(void *param
, void *data
)
91 struct public_ip_list
**ip_list
= (struct public_ip_list
**)param
;
92 struct public_ip_list
*new_ip
= (struct public_ip_list
*)data
;
94 new_ip
->next
= *ip_list
;
99 /* Nodes only know about those public addresses that they are
100 * configured to serve and no individual node has a full list of all
101 * public addresses configured across the cluster. Therefore, a
102 * merged list of all public addresses needs to be built so that IP
103 * allocation can be done. */
104 static struct public_ip_list
*
105 create_merged_ip_list(struct ipalloc_state
*ipalloc_state
)
108 struct public_ip_list
*ip_list
;
109 struct ctdb_public_ip_list
*public_ips
;
110 struct trbt_tree
*ip_tree
;
112 ip_tree
= trbt_create(ipalloc_state
, 0);
114 if (ipalloc_state
->known_public_ips
== NULL
) {
115 DEBUG(DEBUG_ERR
, ("Known public IPs not set\n"));
119 for (i
=0; i
< ipalloc_state
->num
; i
++) {
121 public_ips
= &ipalloc_state
->known_public_ips
[i
];
123 for (j
=0; j
< public_ips
->num
; j
++) {
124 struct public_ip_list
*tmp_ip
;
126 /* This is returned as part of ip_list */
127 tmp_ip
= talloc_zero(ipalloc_state
, struct public_ip_list
);
128 if (tmp_ip
== NULL
) {
130 (__location__
" out of memory\n"));
131 talloc_free(ip_tree
);
135 /* Do not use information about IP addresses hosted
136 * on other nodes, it may not be accurate */
137 if (public_ips
->ip
[j
].pnn
== i
) {
138 tmp_ip
->pnn
= public_ips
->ip
[j
].pnn
;
142 tmp_ip
->addr
= public_ips
->ip
[j
].addr
;
145 trbt_insertarray32_callback(ip_tree
,
146 IP_KEYLEN
, ip_key(&public_ips
->ip
[j
].addr
),
153 trbt_traversearray32(ip_tree
, IP_KEYLEN
, getips_count_callback
, &ip_list
);
154 talloc_free(ip_tree
);
159 static bool populate_bitmap(struct ipalloc_state
*ipalloc_state
)
161 struct public_ip_list
*ip
= NULL
;
164 for (ip
= ipalloc_state
->all_ips
; ip
!= NULL
; ip
= ip
->next
) {
166 ip
->known_on
= bitmap_talloc(ip
, ipalloc_state
->num
);
167 if (ip
->known_on
== NULL
) {
171 ip
->available_on
= bitmap_talloc(ip
, ipalloc_state
->num
);
172 if (ip
->available_on
== NULL
) {
176 for (i
= 0; i
< ipalloc_state
->num
; i
++) {
177 struct ctdb_public_ip_list
*known
=
178 &ipalloc_state
->known_public_ips
[i
];
179 struct ctdb_public_ip_list
*avail
=
180 &ipalloc_state
->available_public_ips
[i
];
182 /* Check to see if "ip" is available on node "i" */
183 for (j
= 0; j
< avail
->num
; j
++) {
184 if (ctdb_sock_addr_same_ip(
185 &ip
->addr
, &avail
->ip
[j
].addr
)) {
186 bitmap_set(ip
->available_on
, i
);
191 /* Optimisation: available => known */
192 if (bitmap_query(ip
->available_on
, i
)) {
193 bitmap_set(ip
->known_on
, i
);
197 /* Check to see if "ip" is known on node "i" */
198 for (j
= 0; j
< known
->num
; j
++) {
199 if (ctdb_sock_addr_same_ip(
200 &ip
->addr
, &known
->ip
[j
].addr
)) {
201 bitmap_set(ip
->known_on
, i
);
211 static bool all_nodes_are_disabled(struct ctdb_node_map
*nodemap
)
215 for (i
=0;i
<nodemap
->num
;i
++) {
216 if (!(nodemap
->node
[i
].flags
&
217 (NODE_FLAGS_INACTIVE
|NODE_FLAGS_DISABLED
))) {
218 /* Found one completely healthy node */
226 /* Set internal flags for IP allocation:
228 * Set NOIPHOST ip flag for each INACTIVE node
229 * if all nodes are disabled:
230 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
232 * Set NOIPHOST ip flags for disabled nodes
234 void ipalloc_set_node_flags(struct ipalloc_state
*ipalloc_state
,
235 struct ctdb_node_map
*nodemap
)
238 bool all_disabled
= all_nodes_are_disabled(nodemap
);
240 for (i
=0;i
<nodemap
->num
;i
++) {
241 /* Can not host IPs on INACTIVE node */
242 if (nodemap
->node
[i
].flags
& NODE_FLAGS_INACTIVE
) {
243 bitmap_set(ipalloc_state
->noiphost
, i
);
246 /* If node is disabled then it can only host IPs if
247 * all nodes are disabled and NoIPHostOnAllDisabled is
250 if (nodemap
->node
[i
].flags
& NODE_FLAGS_DISABLED
) {
251 if (!(all_disabled
&&
252 ipalloc_state
->no_ip_host_on_all_disabled
== 0)) {
254 bitmap_set(ipalloc_state
->noiphost
, i
);
260 void ipalloc_set_public_ips(struct ipalloc_state
*ipalloc_state
,
261 struct ctdb_public_ip_list
*known_ips
,
262 struct ctdb_public_ip_list
*available_ips
)
264 ipalloc_state
->available_public_ips
= available_ips
;
265 ipalloc_state
->known_public_ips
= known_ips
;
268 /* This can only return false if there are no available IPs *and*
269 * there are no IP addresses currently allocated. If the latter is
270 * true then the cluster can clearly host IPs... just not necessarily
272 bool ipalloc_can_host_ips(struct ipalloc_state
*ipalloc_state
)
275 bool have_ips
= false;
277 for (i
=0; i
< ipalloc_state
->num
; i
++) {
278 struct ctdb_public_ip_list
*ips
=
279 ipalloc_state
->known_public_ips
;
280 if (ips
[i
].num
!= 0) {
283 /* Succeed if an address is hosted on node i */
284 for (j
=0; j
< ips
[i
].num
; j
++) {
285 if (ips
[i
].ip
[j
].pnn
== i
) {
296 /* At this point there are known addresses but none are
297 * hosted. Need to check if cluster can now host some
300 for (i
=0; i
< ipalloc_state
->num
; i
++) {
301 if (ipalloc_state
->available_public_ips
[i
].num
!= 0) {
309 /* The calculation part of the IP allocation algorithm. */
310 struct public_ip_list
*ipalloc(struct ipalloc_state
*ipalloc_state
)
314 ipalloc_state
->all_ips
= create_merged_ip_list(ipalloc_state
);
315 if (ipalloc_state
->all_ips
== NULL
) {
319 if (!populate_bitmap(ipalloc_state
)) {
323 switch (ipalloc_state
->algorithm
) {
325 ret
= ipalloc_lcp2(ipalloc_state
);
327 case IPALLOC_DETERMINISTIC
:
328 ret
= ipalloc_deterministic(ipalloc_state
);
330 case IPALLOC_NONDETERMINISTIC
:
331 ret
= ipalloc_nondeterministic(ipalloc_state
);
335 /* at this point ->pnn is the node which will own each IP
336 or -1 if there is no node that can cover this ip
339 return (ret
? ipalloc_state
->all_ips
: NULL
);