tile: nohz: warn if nohz_full uses hypervisor shared cores
[linux-2.6/btrfs-unstable.git] / net / tipc / msg.c
blobb6eb90cd3ef7053ffe8d73143a53b8021e2770d3
1 /*
2 * net/tipc/msg.c: TIPC message header routines
4 * Copyright (c) 2000-2006, 2014, Ericsson AB
5 * Copyright (c) 2005, 2010-2011, Wind River Systems
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the names of the copyright holders nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
20 * Alternatively, this software may be distributed under the terms of the
21 * GNU General Public License ("GPL") version 2 as published by the Free
22 * Software Foundation.
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 #include <net/sock.h>
38 #include "core.h"
39 #include "msg.h"
40 #include "addr.h"
41 #include "name_table.h"
43 #define MAX_FORWARD_SIZE 1024
45 static unsigned int align(unsigned int i)
47 return (i + 3) & ~3u;
50 /**
51 * tipc_buf_acquire - creates a TIPC message buffer
52 * @size: message size (including TIPC header)
54 * Returns a new buffer with data pointers set to the specified size.
56 * NOTE: Headroom is reserved to allow prepending of a data link header.
57 * There may also be unrequested tailroom present at the buffer's end.
59 struct sk_buff *tipc_buf_acquire(u32 size)
61 struct sk_buff *skb;
62 unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
64 skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
65 if (skb) {
66 skb_reserve(skb, BUF_HEADROOM);
67 skb_put(skb, size);
68 skb->next = NULL;
70 return skb;
73 void tipc_msg_init(u32 own_node, struct tipc_msg *m, u32 user, u32 type,
74 u32 hsize, u32 dnode)
76 memset(m, 0, hsize);
77 msg_set_version(m);
78 msg_set_user(m, user);
79 msg_set_hdr_sz(m, hsize);
80 msg_set_size(m, hsize);
81 msg_set_prevnode(m, own_node);
82 msg_set_type(m, type);
83 if (hsize > SHORT_H_SIZE) {
84 msg_set_orignode(m, own_node);
85 msg_set_destnode(m, dnode);
89 struct sk_buff *tipc_msg_create(uint user, uint type,
90 uint hdr_sz, uint data_sz, u32 dnode,
91 u32 onode, u32 dport, u32 oport, int errcode)
93 struct tipc_msg *msg;
94 struct sk_buff *buf;
96 buf = tipc_buf_acquire(hdr_sz + data_sz);
97 if (unlikely(!buf))
98 return NULL;
100 msg = buf_msg(buf);
101 tipc_msg_init(onode, msg, user, type, hdr_sz, dnode);
102 msg_set_size(msg, hdr_sz + data_sz);
103 msg_set_origport(msg, oport);
104 msg_set_destport(msg, dport);
105 msg_set_errcode(msg, errcode);
106 if (hdr_sz > SHORT_H_SIZE) {
107 msg_set_orignode(msg, onode);
108 msg_set_destnode(msg, dnode);
110 return buf;
113 /* tipc_buf_append(): Append a buffer to the fragment list of another buffer
114 * @*headbuf: in: NULL for first frag, otherwise value returned from prev call
115 * out: set when successful non-complete reassembly, otherwise NULL
116 * @*buf: in: the buffer to append. Always defined
117 * out: head buf after successful complete reassembly, otherwise NULL
118 * Returns 1 when reassembly complete, otherwise 0
120 int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
122 struct sk_buff *head = *headbuf;
123 struct sk_buff *frag = *buf;
124 struct sk_buff *tail;
125 struct tipc_msg *msg;
126 u32 fragid;
127 int delta;
128 bool headstolen;
130 if (!frag)
131 goto err;
133 msg = buf_msg(frag);
134 fragid = msg_type(msg);
135 frag->next = NULL;
136 skb_pull(frag, msg_hdr_sz(msg));
138 if (fragid == FIRST_FRAGMENT) {
139 if (unlikely(head))
140 goto err;
141 if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
142 goto err;
143 head = *headbuf = frag;
144 skb_frag_list_init(head);
145 TIPC_SKB_CB(head)->tail = NULL;
146 *buf = NULL;
147 return 0;
150 if (!head)
151 goto err;
153 if (skb_try_coalesce(head, frag, &headstolen, &delta)) {
154 kfree_skb_partial(frag, headstolen);
155 } else {
156 tail = TIPC_SKB_CB(head)->tail;
157 if (!skb_has_frag_list(head))
158 skb_shinfo(head)->frag_list = frag;
159 else
160 tail->next = frag;
161 head->truesize += frag->truesize;
162 head->data_len += frag->len;
163 head->len += frag->len;
164 TIPC_SKB_CB(head)->tail = frag;
167 if (fragid == LAST_FRAGMENT) {
168 *buf = head;
169 TIPC_SKB_CB(head)->tail = NULL;
170 *headbuf = NULL;
171 return 1;
173 *buf = NULL;
174 return 0;
176 err:
177 pr_warn_ratelimited("Unable to build fragment list\n");
178 kfree_skb(*buf);
179 kfree_skb(*headbuf);
180 *buf = *headbuf = NULL;
181 return 0;
186 * tipc_msg_build - create buffer chain containing specified header and data
187 * @mhdr: Message header, to be prepended to data
188 * @m: User message
189 * @dsz: Total length of user data
190 * @pktmax: Max packet size that can be used
191 * @list: Buffer or chain of buffers to be returned to caller
193 * Returns message data size or errno: -ENOMEM, -EFAULT
195 int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
196 int offset, int dsz, int pktmax, struct sk_buff_head *list)
198 int mhsz = msg_hdr_sz(mhdr);
199 int msz = mhsz + dsz;
200 int pktno = 1;
201 int pktsz;
202 int pktrem = pktmax;
203 int drem = dsz;
204 struct tipc_msg pkthdr;
205 struct sk_buff *skb;
206 char *pktpos;
207 int rc;
209 msg_set_size(mhdr, msz);
211 /* No fragmentation needed? */
212 if (likely(msz <= pktmax)) {
213 skb = tipc_buf_acquire(msz);
214 if (unlikely(!skb))
215 return -ENOMEM;
216 skb_orphan(skb);
217 __skb_queue_tail(list, skb);
218 skb_copy_to_linear_data(skb, mhdr, mhsz);
219 pktpos = skb->data + mhsz;
220 if (copy_from_iter(pktpos, dsz, &m->msg_iter) == dsz)
221 return dsz;
222 rc = -EFAULT;
223 goto error;
226 /* Prepare reusable fragment header */
227 tipc_msg_init(msg_prevnode(mhdr), &pkthdr, MSG_FRAGMENTER,
228 FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr));
229 msg_set_size(&pkthdr, pktmax);
230 msg_set_fragm_no(&pkthdr, pktno);
232 /* Prepare first fragment */
233 skb = tipc_buf_acquire(pktmax);
234 if (!skb)
235 return -ENOMEM;
236 skb_orphan(skb);
237 __skb_queue_tail(list, skb);
238 pktpos = skb->data;
239 skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE);
240 pktpos += INT_H_SIZE;
241 pktrem -= INT_H_SIZE;
242 skb_copy_to_linear_data_offset(skb, INT_H_SIZE, mhdr, mhsz);
243 pktpos += mhsz;
244 pktrem -= mhsz;
246 do {
247 if (drem < pktrem)
248 pktrem = drem;
250 if (copy_from_iter(pktpos, pktrem, &m->msg_iter) != pktrem) {
251 rc = -EFAULT;
252 goto error;
254 drem -= pktrem;
256 if (!drem)
257 break;
259 /* Prepare new fragment: */
260 if (drem < (pktmax - INT_H_SIZE))
261 pktsz = drem + INT_H_SIZE;
262 else
263 pktsz = pktmax;
264 skb = tipc_buf_acquire(pktsz);
265 if (!skb) {
266 rc = -ENOMEM;
267 goto error;
269 skb_orphan(skb);
270 __skb_queue_tail(list, skb);
271 msg_set_type(&pkthdr, FRAGMENT);
272 msg_set_size(&pkthdr, pktsz);
273 msg_set_fragm_no(&pkthdr, ++pktno);
274 skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE);
275 pktpos = skb->data + INT_H_SIZE;
276 pktrem = pktsz - INT_H_SIZE;
278 } while (1);
279 msg_set_type(buf_msg(skb), LAST_FRAGMENT);
280 return dsz;
281 error:
282 __skb_queue_purge(list);
283 __skb_queue_head_init(list);
284 return rc;
288 * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one
289 * @list: the buffer chain of the existing buffer ("bundle")
290 * @skb: buffer to be appended
291 * @mtu: max allowable size for the bundle buffer
292 * Consumes buffer if successful
293 * Returns true if bundling could be performed, otherwise false
295 bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu)
297 struct sk_buff *bskb = skb_peek_tail(list);
298 struct tipc_msg *bmsg = buf_msg(bskb);
299 struct tipc_msg *msg = buf_msg(skb);
300 unsigned int bsz = msg_size(bmsg);
301 unsigned int msz = msg_size(msg);
302 u32 start = align(bsz);
303 u32 max = mtu - INT_H_SIZE;
304 u32 pad = start - bsz;
306 if (likely(msg_user(msg) == MSG_FRAGMENTER))
307 return false;
308 if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL))
309 return false;
310 if (unlikely(msg_user(msg) == BCAST_PROTOCOL))
311 return false;
312 if (likely(msg_user(bmsg) != MSG_BUNDLER))
313 return false;
314 if (likely(!TIPC_SKB_CB(bskb)->bundling))
315 return false;
316 if (unlikely(skb_tailroom(bskb) < (pad + msz)))
317 return false;
318 if (unlikely(max < (start + msz)))
319 return false;
321 skb_put(bskb, pad + msz);
322 skb_copy_to_linear_data_offset(bskb, start, skb->data, msz);
323 msg_set_size(bmsg, start + msz);
324 msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1);
325 kfree_skb(skb);
326 return true;
330 * tipc_msg_extract(): extract bundled inner packet from buffer
331 * @skb: linear outer buffer, to be extracted from.
332 * @iskb: extracted inner buffer, to be returned
333 * @pos: position of msg to be extracted. Returns with pointer of next msg
334 * Consumes outer buffer when last packet extracted
335 * Returns true when when there is an extracted buffer, otherwise false
337 bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
339 struct tipc_msg *msg = buf_msg(skb);
340 int imsz;
341 struct tipc_msg *imsg = (struct tipc_msg *)(msg_data(msg) + *pos);
343 /* Is there space left for shortest possible message? */
344 if (*pos > (msg_data_sz(msg) - SHORT_H_SIZE))
345 goto none;
346 imsz = msg_size(imsg);
348 /* Is there space left for current message ? */
349 if ((*pos + imsz) > msg_data_sz(msg))
350 goto none;
351 *iskb = tipc_buf_acquire(imsz);
352 if (!*iskb)
353 goto none;
354 skb_copy_to_linear_data(*iskb, imsg, imsz);
355 *pos += align(imsz);
356 return true;
357 none:
358 kfree_skb(skb);
359 *iskb = NULL;
360 return false;
364 * tipc_msg_make_bundle(): Create bundle buf and append message to its tail
365 * @list: the buffer chain
366 * @skb: buffer to be appended and replaced
367 * @mtu: max allowable size for the bundle buffer, inclusive header
368 * @dnode: destination node for message. (Not always present in header)
369 * Replaces buffer if successful
370 * Returns true if success, otherwise false
372 bool tipc_msg_make_bundle(struct sk_buff_head *list,
373 struct sk_buff *skb, u32 mtu, u32 dnode)
375 struct sk_buff *bskb;
376 struct tipc_msg *bmsg;
377 struct tipc_msg *msg = buf_msg(skb);
378 u32 msz = msg_size(msg);
379 u32 max = mtu - INT_H_SIZE;
381 if (msg_user(msg) == MSG_FRAGMENTER)
382 return false;
383 if (msg_user(msg) == CHANGEOVER_PROTOCOL)
384 return false;
385 if (msg_user(msg) == BCAST_PROTOCOL)
386 return false;
387 if (msz > (max / 2))
388 return false;
390 bskb = tipc_buf_acquire(max);
391 if (!bskb)
392 return false;
394 skb_trim(bskb, INT_H_SIZE);
395 bmsg = buf_msg(bskb);
396 tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0,
397 INT_H_SIZE, dnode);
398 msg_set_seqno(bmsg, msg_seqno(msg));
399 msg_set_ack(bmsg, msg_ack(msg));
400 msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
401 TIPC_SKB_CB(bskb)->bundling = true;
402 __skb_queue_tail(list, bskb);
403 return tipc_msg_bundle(list, skb, mtu);
407 * tipc_msg_reverse(): swap source and destination addresses and add error code
408 * @buf: buffer containing message to be reversed
409 * @dnode: return value: node where to send message after reversal
410 * @err: error code to be set in message
411 * Consumes buffer if failure
412 * Returns true if success, otherwise false
414 bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode,
415 int err)
417 struct tipc_msg *msg = buf_msg(buf);
418 uint imp = msg_importance(msg);
419 struct tipc_msg ohdr;
420 uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE);
422 if (skb_linearize(buf))
423 goto exit;
424 if (msg_dest_droppable(msg))
425 goto exit;
426 if (msg_errcode(msg))
427 goto exit;
429 memcpy(&ohdr, msg, msg_hdr_sz(msg));
430 imp = min_t(uint, imp + 1, TIPC_CRITICAL_IMPORTANCE);
431 if (msg_isdata(msg))
432 msg_set_importance(msg, imp);
433 msg_set_errcode(msg, err);
434 msg_set_origport(msg, msg_destport(&ohdr));
435 msg_set_destport(msg, msg_origport(&ohdr));
436 msg_set_prevnode(msg, own_addr);
437 if (!msg_short(msg)) {
438 msg_set_orignode(msg, msg_destnode(&ohdr));
439 msg_set_destnode(msg, msg_orignode(&ohdr));
441 msg_set_size(msg, msg_hdr_sz(msg) + rdsz);
442 skb_trim(buf, msg_size(msg));
443 skb_orphan(buf);
444 *dnode = msg_orignode(&ohdr);
445 return true;
446 exit:
447 kfree_skb(buf);
448 *dnode = 0;
449 return false;
453 * tipc_msg_lookup_dest(): try to find new destination for named message
454 * @skb: the buffer containing the message.
455 * @dnode: return value: next-hop node, if destination found
456 * @err: return value: error code to use, if message to be rejected
457 * Does not consume buffer
458 * Returns true if a destination is found, false otherwise
460 bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb,
461 u32 *dnode, int *err)
463 struct tipc_msg *msg = buf_msg(skb);
464 u32 dport;
466 if (!msg_isdata(msg))
467 return false;
468 if (!msg_named(msg))
469 return false;
470 *err = -TIPC_ERR_NO_NAME;
471 if (skb_linearize(skb))
472 return false;
473 if (msg_reroute_cnt(msg) > 0)
474 return false;
475 *dnode = addr_domain(net, msg_lookup_scope(msg));
476 dport = tipc_nametbl_translate(net, msg_nametype(msg),
477 msg_nameinst(msg), dnode);
478 if (!dport)
479 return false;
480 msg_incr_reroute_cnt(msg);
481 msg_set_destnode(msg, *dnode);
482 msg_set_destport(msg, dport);
483 *err = TIPC_OK;
484 return true;
487 /* tipc_msg_reassemble() - clone a buffer chain of fragments and
488 * reassemble the clones into one message
490 struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list)
492 struct sk_buff *skb;
493 struct sk_buff *frag = NULL;
494 struct sk_buff *head = NULL;
495 int hdr_sz;
497 /* Copy header if single buffer */
498 if (skb_queue_len(list) == 1) {
499 skb = skb_peek(list);
500 hdr_sz = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb));
501 return __pskb_copy(skb, hdr_sz, GFP_ATOMIC);
504 /* Clone all fragments and reassemble */
505 skb_queue_walk(list, skb) {
506 frag = skb_clone(skb, GFP_ATOMIC);
507 if (!frag)
508 goto error;
509 frag->next = NULL;
510 if (tipc_buf_append(&head, &frag))
511 break;
512 if (!head)
513 goto error;
515 return frag;
516 error:
517 pr_warn("Failed do clone local mcast rcv buffer\n");
518 kfree_skb(head);
519 return NULL;