tile: nohz: warn if nohz_full uses hypervisor shared cores
[linux-2.6/btrfs-unstable.git] / net / tipc / node.c
blob86152de8248da7164cde4b932ac7be1b6b6b2245
1 /*
2 * net/tipc/node.c: TIPC node management routines
4 * Copyright (c) 2000-2006, 2012-2014, Ericsson AB
5 * Copyright (c) 2005-2006, 2010-2014, Wind River Systems
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the names of the copyright holders nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
20 * Alternatively, this software may be distributed under the terms of the
21 * GNU General Public License ("GPL") version 2 as published by the Free
22 * Software Foundation.
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 #include "core.h"
38 #include "link.h"
39 #include "node.h"
40 #include "name_distr.h"
41 #include "socket.h"
43 static void node_lost_contact(struct tipc_node *n_ptr);
44 static void node_established_contact(struct tipc_node *n_ptr);
46 struct tipc_sock_conn {
47 u32 port;
48 u32 peer_port;
49 u32 peer_node;
50 struct list_head list;
53 static const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
54 [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC },
55 [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 },
56 [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }
60 * A trivial power-of-two bitmask technique is used for speed, since this
61 * operation is done for every incoming TIPC packet. The number of hash table
62 * entries has been chosen so that no hash chain exceeds 8 nodes and will
63 * usually be much smaller (typically only a single node).
65 static unsigned int tipc_hashfn(u32 addr)
67 return addr & (NODE_HTABLE_SIZE - 1);
71 * tipc_node_find - locate specified node object, if it exists
73 struct tipc_node *tipc_node_find(struct net *net, u32 addr)
75 struct tipc_net *tn = net_generic(net, tipc_net_id);
76 struct tipc_node *node;
78 if (unlikely(!in_own_cluster_exact(net, addr)))
79 return NULL;
81 rcu_read_lock();
82 hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)],
83 hash) {
84 if (node->addr == addr) {
85 rcu_read_unlock();
86 return node;
89 rcu_read_unlock();
90 return NULL;
93 struct tipc_node *tipc_node_create(struct net *net, u32 addr)
95 struct tipc_net *tn = net_generic(net, tipc_net_id);
96 struct tipc_node *n_ptr, *temp_node;
98 spin_lock_bh(&tn->node_list_lock);
99 n_ptr = tipc_node_find(net, addr);
100 if (n_ptr)
101 goto exit;
102 n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC);
103 if (!n_ptr) {
104 pr_warn("Node creation failed, no memory\n");
105 goto exit;
107 n_ptr->addr = addr;
108 n_ptr->net = net;
109 spin_lock_init(&n_ptr->lock);
110 INIT_HLIST_NODE(&n_ptr->hash);
111 INIT_LIST_HEAD(&n_ptr->list);
112 INIT_LIST_HEAD(&n_ptr->publ_list);
113 INIT_LIST_HEAD(&n_ptr->conn_sks);
114 __skb_queue_head_init(&n_ptr->bclink.deferred_queue);
115 hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
116 list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
117 if (n_ptr->addr < temp_node->addr)
118 break;
120 list_add_tail_rcu(&n_ptr->list, &temp_node->list);
121 n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
122 n_ptr->signature = INVALID_NODE_SIG;
123 exit:
124 spin_unlock_bh(&tn->node_list_lock);
125 return n_ptr;
128 static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr)
130 list_del_rcu(&n_ptr->list);
131 hlist_del_rcu(&n_ptr->hash);
132 kfree_rcu(n_ptr, rcu);
135 void tipc_node_stop(struct net *net)
137 struct tipc_net *tn = net_generic(net, tipc_net_id);
138 struct tipc_node *node, *t_node;
140 spin_lock_bh(&tn->node_list_lock);
141 list_for_each_entry_safe(node, t_node, &tn->node_list, list)
142 tipc_node_delete(tn, node);
143 spin_unlock_bh(&tn->node_list_lock);
146 int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port)
148 struct tipc_node *node;
149 struct tipc_sock_conn *conn;
151 if (in_own_node(net, dnode))
152 return 0;
154 node = tipc_node_find(net, dnode);
155 if (!node) {
156 pr_warn("Connecting sock to node 0x%x failed\n", dnode);
157 return -EHOSTUNREACH;
159 conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
160 if (!conn)
161 return -EHOSTUNREACH;
162 conn->peer_node = dnode;
163 conn->port = port;
164 conn->peer_port = peer_port;
166 tipc_node_lock(node);
167 list_add_tail(&conn->list, &node->conn_sks);
168 tipc_node_unlock(node);
169 return 0;
172 void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
174 struct tipc_node *node;
175 struct tipc_sock_conn *conn, *safe;
177 if (in_own_node(net, dnode))
178 return;
180 node = tipc_node_find(net, dnode);
181 if (!node)
182 return;
184 tipc_node_lock(node);
185 list_for_each_entry_safe(conn, safe, &node->conn_sks, list) {
186 if (port != conn->port)
187 continue;
188 list_del(&conn->list);
189 kfree(conn);
191 tipc_node_unlock(node);
195 * tipc_node_link_up - handle addition of link
197 * Link becomes active (alone or shared) or standby, depending on its priority.
199 void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
201 struct tipc_link **active = &n_ptr->active_links[0];
203 n_ptr->working_links++;
204 n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP;
205 n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
207 pr_debug("Established link <%s> on network plane %c\n",
208 l_ptr->name, l_ptr->net_plane);
210 if (!active[0]) {
211 active[0] = active[1] = l_ptr;
212 node_established_contact(n_ptr);
213 goto exit;
215 if (l_ptr->priority < active[0]->priority) {
216 pr_debug("New link <%s> becomes standby\n", l_ptr->name);
217 goto exit;
219 tipc_link_dup_queue_xmit(active[0], l_ptr);
220 if (l_ptr->priority == active[0]->priority) {
221 active[0] = l_ptr;
222 goto exit;
224 pr_debug("Old link <%s> becomes standby\n", active[0]->name);
225 if (active[1] != active[0])
226 pr_debug("Old link <%s> becomes standby\n", active[1]->name);
227 active[0] = active[1] = l_ptr;
228 exit:
229 /* Leave room for changeover header when returning 'mtu' to users: */
230 n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE;
231 n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE;
235 * node_select_active_links - select active link
237 static void node_select_active_links(struct tipc_node *n_ptr)
239 struct tipc_link **active = &n_ptr->active_links[0];
240 u32 i;
241 u32 highest_prio = 0;
243 active[0] = active[1] = NULL;
245 for (i = 0; i < MAX_BEARERS; i++) {
246 struct tipc_link *l_ptr = n_ptr->links[i];
248 if (!l_ptr || !tipc_link_is_up(l_ptr) ||
249 (l_ptr->priority < highest_prio))
250 continue;
252 if (l_ptr->priority > highest_prio) {
253 highest_prio = l_ptr->priority;
254 active[0] = active[1] = l_ptr;
255 } else {
256 active[1] = l_ptr;
262 * tipc_node_link_down - handle loss of link
264 void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
266 struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
267 struct tipc_link **active;
269 n_ptr->working_links--;
270 n_ptr->action_flags |= TIPC_NOTIFY_LINK_DOWN;
271 n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
273 if (!tipc_link_is_active(l_ptr)) {
274 pr_debug("Lost standby link <%s> on network plane %c\n",
275 l_ptr->name, l_ptr->net_plane);
276 return;
278 pr_debug("Lost link <%s> on network plane %c\n",
279 l_ptr->name, l_ptr->net_plane);
281 active = &n_ptr->active_links[0];
282 if (active[0] == l_ptr)
283 active[0] = active[1];
284 if (active[1] == l_ptr)
285 active[1] = active[0];
286 if (active[0] == l_ptr)
287 node_select_active_links(n_ptr);
288 if (tipc_node_is_up(n_ptr))
289 tipc_link_failover_send_queue(l_ptr);
290 else
291 node_lost_contact(n_ptr);
293 /* Leave room for changeover header when returning 'mtu' to users: */
294 if (active[0]) {
295 n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE;
296 n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE;
297 return;
300 /* Loopback link went down? No fragmentation needed from now on. */
301 if (n_ptr->addr == tn->own_addr) {
302 n_ptr->act_mtus[0] = MAX_MSG_SIZE;
303 n_ptr->act_mtus[1] = MAX_MSG_SIZE;
307 int tipc_node_active_links(struct tipc_node *n_ptr)
309 return n_ptr->active_links[0] != NULL;
312 int tipc_node_is_up(struct tipc_node *n_ptr)
314 return tipc_node_active_links(n_ptr);
317 void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
319 n_ptr->links[l_ptr->bearer_id] = l_ptr;
320 n_ptr->link_cnt++;
323 void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
325 int i;
327 for (i = 0; i < MAX_BEARERS; i++) {
328 if (l_ptr != n_ptr->links[i])
329 continue;
330 n_ptr->links[i] = NULL;
331 n_ptr->link_cnt--;
335 static void node_established_contact(struct tipc_node *n_ptr)
337 n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
338 n_ptr->bclink.oos_state = 0;
339 n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net);
340 tipc_bclink_add_node(n_ptr->net, n_ptr->addr);
343 static void node_lost_contact(struct tipc_node *n_ptr)
345 char addr_string[16];
346 struct tipc_sock_conn *conn, *safe;
347 struct list_head *conns = &n_ptr->conn_sks;
348 struct sk_buff *skb;
349 struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
350 uint i;
352 pr_debug("Lost contact with %s\n",
353 tipc_addr_string_fill(addr_string, n_ptr->addr));
355 /* Flush broadcast link info associated with lost node */
356 if (n_ptr->bclink.recv_permitted) {
357 __skb_queue_purge(&n_ptr->bclink.deferred_queue);
359 if (n_ptr->bclink.reasm_buf) {
360 kfree_skb(n_ptr->bclink.reasm_buf);
361 n_ptr->bclink.reasm_buf = NULL;
364 tipc_bclink_remove_node(n_ptr->net, n_ptr->addr);
365 tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ);
367 n_ptr->bclink.recv_permitted = false;
370 /* Abort link changeover */
371 for (i = 0; i < MAX_BEARERS; i++) {
372 struct tipc_link *l_ptr = n_ptr->links[i];
373 if (!l_ptr)
374 continue;
375 l_ptr->reset_checkpoint = l_ptr->next_in_no;
376 l_ptr->exp_msg_count = 0;
377 tipc_link_reset_fragments(l_ptr);
379 /* Link marked for deletion after failover? => do it now */
380 if (l_ptr->flags & LINK_STOPPED)
381 tipc_link_delete(l_ptr);
384 n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN;
386 /* Prevent re-contact with node until cleanup is done */
387 n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN;
389 /* Notify publications from this node */
390 n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN;
392 /* Notify sockets connected to node */
393 list_for_each_entry_safe(conn, safe, conns, list) {
394 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
395 SHORT_H_SIZE, 0, tn->own_addr,
396 conn->peer_node, conn->port,
397 conn->peer_port, TIPC_ERR_NO_NODE);
398 if (likely(skb)) {
399 skb_queue_tail(n_ptr->inputq, skb);
400 n_ptr->action_flags |= TIPC_MSG_EVT;
402 list_del(&conn->list);
403 kfree(conn);
408 * tipc_node_get_linkname - get the name of a link
410 * @bearer_id: id of the bearer
411 * @node: peer node address
412 * @linkname: link name output buffer
414 * Returns 0 on success
416 int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
417 char *linkname, size_t len)
419 struct tipc_link *link;
420 struct tipc_node *node = tipc_node_find(net, addr);
422 if ((bearer_id >= MAX_BEARERS) || !node)
423 return -EINVAL;
424 tipc_node_lock(node);
425 link = node->links[bearer_id];
426 if (link) {
427 strncpy(linkname, link->name, len);
428 tipc_node_unlock(node);
429 return 0;
431 tipc_node_unlock(node);
432 return -EINVAL;
435 void tipc_node_unlock(struct tipc_node *node)
437 struct net *net = node->net;
438 u32 addr = 0;
439 u32 flags = node->action_flags;
440 u32 link_id = 0;
441 struct list_head *publ_list;
442 struct sk_buff_head *inputq = node->inputq;
443 struct sk_buff_head *namedq;
445 if (likely(!flags || (flags == TIPC_MSG_EVT))) {
446 node->action_flags = 0;
447 spin_unlock_bh(&node->lock);
448 if (flags == TIPC_MSG_EVT)
449 tipc_sk_rcv(net, inputq);
450 return;
453 addr = node->addr;
454 link_id = node->link_id;
455 namedq = node->namedq;
456 publ_list = &node->publ_list;
458 node->action_flags &= ~(TIPC_MSG_EVT |
459 TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
460 TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP |
461 TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT |
462 TIPC_NAMED_MSG_EVT);
464 spin_unlock_bh(&node->lock);
466 if (flags & TIPC_NOTIFY_NODE_DOWN)
467 tipc_publ_notify(net, publ_list, addr);
469 if (flags & TIPC_WAKEUP_BCAST_USERS)
470 tipc_bclink_wakeup_users(net);
472 if (flags & TIPC_NOTIFY_NODE_UP)
473 tipc_named_node_up(net, addr);
475 if (flags & TIPC_NOTIFY_LINK_UP)
476 tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
477 TIPC_NODE_SCOPE, link_id, addr);
479 if (flags & TIPC_NOTIFY_LINK_DOWN)
480 tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
481 link_id, addr);
483 if (flags & TIPC_MSG_EVT)
484 tipc_sk_rcv(net, inputq);
486 if (flags & TIPC_NAMED_MSG_EVT)
487 tipc_named_rcv(net, namedq);
489 if (flags & TIPC_BCAST_MSG_EVT)
490 tipc_bclink_input(net);
493 /* Caller should hold node lock for the passed node */
494 static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node)
496 void *hdr;
497 struct nlattr *attrs;
499 hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
500 NLM_F_MULTI, TIPC_NL_NODE_GET);
501 if (!hdr)
502 return -EMSGSIZE;
504 attrs = nla_nest_start(msg->skb, TIPC_NLA_NODE);
505 if (!attrs)
506 goto msg_full;
508 if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr))
509 goto attr_msg_full;
510 if (tipc_node_is_up(node))
511 if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP))
512 goto attr_msg_full;
514 nla_nest_end(msg->skb, attrs);
515 genlmsg_end(msg->skb, hdr);
517 return 0;
519 attr_msg_full:
520 nla_nest_cancel(msg->skb, attrs);
521 msg_full:
522 genlmsg_cancel(msg->skb, hdr);
524 return -EMSGSIZE;
527 int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
529 int err;
530 struct net *net = sock_net(skb->sk);
531 struct tipc_net *tn = net_generic(net, tipc_net_id);
532 int done = cb->args[0];
533 int last_addr = cb->args[1];
534 struct tipc_node *node;
535 struct tipc_nl_msg msg;
537 if (done)
538 return 0;
540 msg.skb = skb;
541 msg.portid = NETLINK_CB(cb->skb).portid;
542 msg.seq = cb->nlh->nlmsg_seq;
544 rcu_read_lock();
546 if (last_addr && !tipc_node_find(net, last_addr)) {
547 rcu_read_unlock();
548 /* We never set seq or call nl_dump_check_consistent() this
549 * means that setting prev_seq here will cause the consistence
550 * check to fail in the netlink callback handler. Resulting in
551 * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if
552 * the node state changed while we released the lock.
554 cb->prev_seq = 1;
555 return -EPIPE;
558 list_for_each_entry_rcu(node, &tn->node_list, list) {
559 if (last_addr) {
560 if (node->addr == last_addr)
561 last_addr = 0;
562 else
563 continue;
566 tipc_node_lock(node);
567 err = __tipc_nl_add_node(&msg, node);
568 if (err) {
569 last_addr = node->addr;
570 tipc_node_unlock(node);
571 goto out;
574 tipc_node_unlock(node);
576 done = 1;
577 out:
578 cb->args[0] = done;
579 cb->args[1] = last_addr;
580 rcu_read_unlock();
582 return skb->len;