mm: further fix swapin race condition
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / rxrpc / ar-output.c
blob5f22e263eda748ffdbe06f802626588a70776004
1 /* RxRPC packet transmission
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 #include <linux/net.h>
13 #include <linux/gfp.h>
14 #include <linux/skbuff.h>
15 #include <linux/circ_buf.h>
16 #include <net/sock.h>
17 #include <net/af_rxrpc.h>
18 #include "ar-internal.h"
20 int rxrpc_resend_timeout = 4;
22 static int rxrpc_send_data(struct kiocb *iocb,
23 struct rxrpc_sock *rx,
24 struct rxrpc_call *call,
25 struct msghdr *msg, size_t len);
28 * extract control messages from the sendmsg() control buffer
30 static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
31 unsigned long *user_call_ID,
32 enum rxrpc_command *command,
33 u32 *abort_code,
34 bool server)
36 struct cmsghdr *cmsg;
37 int len;
39 *command = RXRPC_CMD_SEND_DATA;
41 if (msg->msg_controllen == 0)
42 return -EINVAL;
44 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
45 if (!CMSG_OK(msg, cmsg))
46 return -EINVAL;
48 len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
49 _debug("CMSG %d, %d, %d",
50 cmsg->cmsg_level, cmsg->cmsg_type, len);
52 if (cmsg->cmsg_level != SOL_RXRPC)
53 continue;
55 switch (cmsg->cmsg_type) {
56 case RXRPC_USER_CALL_ID:
57 if (msg->msg_flags & MSG_CMSG_COMPAT) {
58 if (len != sizeof(u32))
59 return -EINVAL;
60 *user_call_ID = *(u32 *) CMSG_DATA(cmsg);
61 } else {
62 if (len != sizeof(unsigned long))
63 return -EINVAL;
64 *user_call_ID = *(unsigned long *)
65 CMSG_DATA(cmsg);
67 _debug("User Call ID %lx", *user_call_ID);
68 break;
70 case RXRPC_ABORT:
71 if (*command != RXRPC_CMD_SEND_DATA)
72 return -EINVAL;
73 *command = RXRPC_CMD_SEND_ABORT;
74 if (len != sizeof(*abort_code))
75 return -EINVAL;
76 *abort_code = *(unsigned int *) CMSG_DATA(cmsg);
77 _debug("Abort %x", *abort_code);
78 if (*abort_code == 0)
79 return -EINVAL;
80 break;
82 case RXRPC_ACCEPT:
83 if (*command != RXRPC_CMD_SEND_DATA)
84 return -EINVAL;
85 *command = RXRPC_CMD_ACCEPT;
86 if (len != 0)
87 return -EINVAL;
88 if (!server)
89 return -EISCONN;
90 break;
92 default:
93 return -EINVAL;
97 _leave(" = 0");
98 return 0;
102 * abort a call, sending an ABORT packet to the peer
104 static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
106 write_lock_bh(&call->state_lock);
108 if (call->state <= RXRPC_CALL_COMPLETE) {
109 call->state = RXRPC_CALL_LOCALLY_ABORTED;
110 call->abort_code = abort_code;
111 set_bit(RXRPC_CALL_ABORT, &call->events);
112 del_timer_sync(&call->resend_timer);
113 del_timer_sync(&call->ack_timer);
114 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
115 clear_bit(RXRPC_CALL_ACK, &call->events);
116 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
117 rxrpc_queue_call(call);
120 write_unlock_bh(&call->state_lock);
124 * send a message forming part of a client call through an RxRPC socket
125 * - caller holds the socket locked
126 * - the socket may be either a client socket or a server socket
128 int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
129 struct rxrpc_transport *trans, struct msghdr *msg,
130 size_t len)
132 struct rxrpc_conn_bundle *bundle;
133 enum rxrpc_command cmd;
134 struct rxrpc_call *call;
135 unsigned long user_call_ID = 0;
136 struct key *key;
137 __be16 service_id;
138 u32 abort_code = 0;
139 int ret;
141 _enter("");
143 ASSERT(trans != NULL);
145 ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
146 false);
147 if (ret < 0)
148 return ret;
150 bundle = NULL;
151 if (trans) {
152 service_id = rx->service_id;
153 if (msg->msg_name) {
154 struct sockaddr_rxrpc *srx =
155 (struct sockaddr_rxrpc *) msg->msg_name;
156 service_id = htons(srx->srx_service);
158 key = rx->key;
159 if (key && !rx->key->payload.data)
160 key = NULL;
161 bundle = rxrpc_get_bundle(rx, trans, key, service_id,
162 GFP_KERNEL);
163 if (IS_ERR(bundle))
164 return PTR_ERR(bundle);
167 call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID,
168 abort_code == 0, GFP_KERNEL);
169 if (trans)
170 rxrpc_put_bundle(trans, bundle);
171 if (IS_ERR(call)) {
172 _leave(" = %ld", PTR_ERR(call));
173 return PTR_ERR(call);
176 _debug("CALL %d USR %lx ST %d on CONN %p",
177 call->debug_id, call->user_call_ID, call->state, call->conn);
179 if (call->state >= RXRPC_CALL_COMPLETE) {
180 /* it's too late for this call */
181 ret = -ESHUTDOWN;
182 } else if (cmd == RXRPC_CMD_SEND_ABORT) {
183 rxrpc_send_abort(call, abort_code);
184 } else if (cmd != RXRPC_CMD_SEND_DATA) {
185 ret = -EINVAL;
186 } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
187 /* request phase complete for this client call */
188 ret = -EPROTO;
189 } else {
190 ret = rxrpc_send_data(iocb, rx, call, msg, len);
193 rxrpc_put_call(call);
194 _leave(" = %d", ret);
195 return ret;
199 * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
200 * @call: The call to send data through
201 * @msg: The data to send
202 * @len: The amount of data to send
204 * Allow a kernel service to send data on a call. The call must be in an state
205 * appropriate to sending data. No control data should be supplied in @msg,
206 * nor should an address be supplied. MSG_MORE should be flagged if there's
207 * more data to come, otherwise this data will end the transmission phase.
209 int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
210 size_t len)
212 int ret;
214 _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
216 ASSERTCMP(msg->msg_name, ==, NULL);
217 ASSERTCMP(msg->msg_control, ==, NULL);
219 lock_sock(&call->socket->sk);
221 _debug("CALL %d USR %lx ST %d on CONN %p",
222 call->debug_id, call->user_call_ID, call->state, call->conn);
224 if (call->state >= RXRPC_CALL_COMPLETE) {
225 ret = -ESHUTDOWN; /* it's too late for this call */
226 } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
227 call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
228 call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
229 ret = -EPROTO; /* request phase complete for this client call */
230 } else {
231 mm_segment_t oldfs = get_fs();
232 set_fs(KERNEL_DS);
233 ret = rxrpc_send_data(NULL, call->socket, call, msg, len);
234 set_fs(oldfs);
237 release_sock(&call->socket->sk);
238 _leave(" = %d", ret);
239 return ret;
242 EXPORT_SYMBOL(rxrpc_kernel_send_data);
245 * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
246 * @call: The call to be aborted
247 * @abort_code: The abort code to stick into the ABORT packet
249 * Allow a kernel service to abort a call, if it's still in an abortable state.
251 void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
253 _enter("{%d},%d", call->debug_id, abort_code);
255 lock_sock(&call->socket->sk);
257 _debug("CALL %d USR %lx ST %d on CONN %p",
258 call->debug_id, call->user_call_ID, call->state, call->conn);
260 if (call->state < RXRPC_CALL_COMPLETE)
261 rxrpc_send_abort(call, abort_code);
263 release_sock(&call->socket->sk);
264 _leave("");
267 EXPORT_SYMBOL(rxrpc_kernel_abort_call);
270 * send a message through a server socket
271 * - caller holds the socket locked
273 int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
274 struct msghdr *msg, size_t len)
276 enum rxrpc_command cmd;
277 struct rxrpc_call *call;
278 unsigned long user_call_ID = 0;
279 u32 abort_code = 0;
280 int ret;
282 _enter("");
284 ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
285 true);
286 if (ret < 0)
287 return ret;
289 if (cmd == RXRPC_CMD_ACCEPT) {
290 call = rxrpc_accept_call(rx, user_call_ID);
291 if (IS_ERR(call))
292 return PTR_ERR(call);
293 rxrpc_put_call(call);
294 return 0;
297 call = rxrpc_find_server_call(rx, user_call_ID);
298 if (!call)
299 return -EBADSLT;
300 if (call->state >= RXRPC_CALL_COMPLETE) {
301 ret = -ESHUTDOWN;
302 goto out;
305 switch (cmd) {
306 case RXRPC_CMD_SEND_DATA:
307 if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
308 call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
309 call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
310 /* Tx phase not yet begun for this call */
311 ret = -EPROTO;
312 break;
315 ret = rxrpc_send_data(iocb, rx, call, msg, len);
316 break;
318 case RXRPC_CMD_SEND_ABORT:
319 rxrpc_send_abort(call, abort_code);
320 break;
321 default:
322 BUG();
325 out:
326 rxrpc_put_call(call);
327 _leave(" = %d", ret);
328 return ret;
332 * send a packet through the transport endpoint
334 int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
336 struct kvec iov[1];
337 struct msghdr msg;
338 int ret, opt;
340 _enter(",{%d}", skb->len);
342 iov[0].iov_base = skb->head;
343 iov[0].iov_len = skb->len;
345 msg.msg_name = &trans->peer->srx.transport.sin;
346 msg.msg_namelen = sizeof(trans->peer->srx.transport.sin);
347 msg.msg_control = NULL;
348 msg.msg_controllen = 0;
349 msg.msg_flags = 0;
351 /* send the packet with the don't fragment bit set if we currently
352 * think it's small enough */
353 if (skb->len - sizeof(struct rxrpc_header) < trans->peer->maxdata) {
354 down_read(&trans->local->defrag_sem);
355 /* send the packet by UDP
356 * - returns -EMSGSIZE if UDP would have to fragment the packet
357 * to go out of the interface
358 * - in which case, we'll have processed the ICMP error
359 * message and update the peer record
361 ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
362 iov[0].iov_len);
364 up_read(&trans->local->defrag_sem);
365 if (ret == -EMSGSIZE)
366 goto send_fragmentable;
368 _leave(" = %d [%u]", ret, trans->peer->maxdata);
369 return ret;
372 send_fragmentable:
373 /* attempt to send this message with fragmentation enabled */
374 _debug("send fragment");
376 down_write(&trans->local->defrag_sem);
377 opt = IP_PMTUDISC_DONT;
378 ret = kernel_setsockopt(trans->local->socket, SOL_IP, IP_MTU_DISCOVER,
379 (char *) &opt, sizeof(opt));
380 if (ret == 0) {
381 ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
382 iov[0].iov_len);
384 opt = IP_PMTUDISC_DO;
385 kernel_setsockopt(trans->local->socket, SOL_IP,
386 IP_MTU_DISCOVER, (char *) &opt, sizeof(opt));
389 up_write(&trans->local->defrag_sem);
390 _leave(" = %d [frag %u]", ret, trans->peer->maxdata);
391 return ret;
395 * wait for space to appear in the transmit/ACK window
396 * - caller holds the socket locked
398 static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
399 struct rxrpc_call *call,
400 long *timeo)
402 DECLARE_WAITQUEUE(myself, current);
403 int ret;
405 _enter(",{%d},%ld",
406 CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz),
407 *timeo);
409 add_wait_queue(&call->tx_waitq, &myself);
411 for (;;) {
412 set_current_state(TASK_INTERRUPTIBLE);
413 ret = 0;
414 if (CIRC_SPACE(call->acks_head, call->acks_tail,
415 call->acks_winsz) > 0)
416 break;
417 if (signal_pending(current)) {
418 ret = sock_intr_errno(*timeo);
419 break;
422 release_sock(&rx->sk);
423 *timeo = schedule_timeout(*timeo);
424 lock_sock(&rx->sk);
427 remove_wait_queue(&call->tx_waitq, &myself);
428 set_current_state(TASK_RUNNING);
429 _leave(" = %d", ret);
430 return ret;
434 * attempt to schedule an instant Tx resend
436 static inline void rxrpc_instant_resend(struct rxrpc_call *call)
438 read_lock_bh(&call->state_lock);
439 if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
440 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
441 if (call->state < RXRPC_CALL_COMPLETE &&
442 !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
443 rxrpc_queue_call(call);
445 read_unlock_bh(&call->state_lock);
449 * queue a packet for transmission, set the resend timer and attempt
450 * to send the packet immediately
452 static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
453 bool last)
455 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
456 int ret;
458 _net("queue skb %p [%d]", skb, call->acks_head);
460 ASSERT(call->acks_window != NULL);
461 call->acks_window[call->acks_head] = (unsigned long) skb;
462 smp_wmb();
463 call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1);
465 if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
466 _debug("________awaiting reply/ACK__________");
467 write_lock_bh(&call->state_lock);
468 switch (call->state) {
469 case RXRPC_CALL_CLIENT_SEND_REQUEST:
470 call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
471 break;
472 case RXRPC_CALL_SERVER_ACK_REQUEST:
473 call->state = RXRPC_CALL_SERVER_SEND_REPLY;
474 if (!last)
475 break;
476 case RXRPC_CALL_SERVER_SEND_REPLY:
477 call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
478 break;
479 default:
480 break;
482 write_unlock_bh(&call->state_lock);
485 _proto("Tx DATA %%%u { #%u }",
486 ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
488 sp->need_resend = 0;
489 sp->resend_at = jiffies + rxrpc_resend_timeout * HZ;
490 if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
491 _debug("run timer");
492 call->resend_timer.expires = sp->resend_at;
493 add_timer(&call->resend_timer);
496 /* attempt to cancel the rx-ACK timer, deferring reply transmission if
497 * we're ACK'ing the request phase of an incoming call */
498 ret = -EAGAIN;
499 if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
500 /* the packet may be freed by rxrpc_process_call() before this
501 * returns */
502 ret = rxrpc_send_packet(call->conn->trans, skb);
503 _net("sent skb %p", skb);
504 } else {
505 _debug("failed to delete ACK timer");
508 if (ret < 0) {
509 _debug("need instant resend %d", ret);
510 sp->need_resend = 1;
511 rxrpc_instant_resend(call);
514 _leave("");
518 * send data through a socket
519 * - must be called in process context
520 * - caller holds the socket locked
522 static int rxrpc_send_data(struct kiocb *iocb,
523 struct rxrpc_sock *rx,
524 struct rxrpc_call *call,
525 struct msghdr *msg, size_t len)
527 struct rxrpc_skb_priv *sp;
528 unsigned char __user *from;
529 struct sk_buff *skb;
530 struct iovec *iov;
531 struct sock *sk = &rx->sk;
532 long timeo;
533 bool more;
534 int ret, ioc, segment, copied;
536 _enter(",,,{%zu},%zu", msg->msg_iovlen, len);
538 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
540 /* this should be in poll */
541 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
543 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
544 return -EPIPE;
546 iov = msg->msg_iov;
547 ioc = msg->msg_iovlen - 1;
548 from = iov->iov_base;
549 segment = iov->iov_len;
550 iov++;
551 more = msg->msg_flags & MSG_MORE;
553 skb = call->tx_pending;
554 call->tx_pending = NULL;
556 copied = 0;
557 do {
558 int copy;
560 if (segment > len)
561 segment = len;
563 _debug("SEGMENT %d @%p", segment, from);
565 if (!skb) {
566 size_t size, chunk, max, space;
568 _debug("alloc");
570 if (CIRC_SPACE(call->acks_head, call->acks_tail,
571 call->acks_winsz) <= 0) {
572 ret = -EAGAIN;
573 if (msg->msg_flags & MSG_DONTWAIT)
574 goto maybe_error;
575 ret = rxrpc_wait_for_tx_window(rx, call,
576 &timeo);
577 if (ret < 0)
578 goto maybe_error;
581 max = call->conn->trans->peer->maxdata;
582 max -= call->conn->security_size;
583 max &= ~(call->conn->size_align - 1UL);
585 chunk = max;
586 if (chunk > len && !more)
587 chunk = len;
589 space = chunk + call->conn->size_align;
590 space &= ~(call->conn->size_align - 1UL);
592 size = space + call->conn->header_size;
594 _debug("SIZE: %zu/%zu/%zu", chunk, space, size);
596 /* create a buffer that we can retain until it's ACK'd */
597 skb = sock_alloc_send_skb(
598 sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
599 if (!skb)
600 goto maybe_error;
602 rxrpc_new_skb(skb);
604 _debug("ALLOC SEND %p", skb);
606 ASSERTCMP(skb->mark, ==, 0);
608 _debug("HS: %u", call->conn->header_size);
609 skb_reserve(skb, call->conn->header_size);
610 skb->len += call->conn->header_size;
612 sp = rxrpc_skb(skb);
613 sp->remain = chunk;
614 if (sp->remain > skb_tailroom(skb))
615 sp->remain = skb_tailroom(skb);
617 _net("skb: hr %d, tr %d, hl %d, rm %d",
618 skb_headroom(skb),
619 skb_tailroom(skb),
620 skb_headlen(skb),
621 sp->remain);
623 skb->ip_summed = CHECKSUM_UNNECESSARY;
626 _debug("append");
627 sp = rxrpc_skb(skb);
629 /* append next segment of data to the current buffer */
630 copy = skb_tailroom(skb);
631 ASSERTCMP(copy, >, 0);
632 if (copy > segment)
633 copy = segment;
634 if (copy > sp->remain)
635 copy = sp->remain;
637 _debug("add");
638 ret = skb_add_data(skb, from, copy);
639 _debug("added");
640 if (ret < 0)
641 goto efault;
642 sp->remain -= copy;
643 skb->mark += copy;
644 copied += copy;
646 len -= copy;
647 segment -= copy;
648 from += copy;
649 while (segment == 0 && ioc > 0) {
650 from = iov->iov_base;
651 segment = iov->iov_len;
652 iov++;
653 ioc--;
655 if (len == 0) {
656 segment = 0;
657 ioc = 0;
660 /* check for the far side aborting the call or a network error
661 * occurring */
662 if (call->state > RXRPC_CALL_COMPLETE)
663 goto call_aborted;
665 /* add the packet to the send queue if it's now full */
666 if (sp->remain <= 0 || (segment == 0 && !more)) {
667 struct rxrpc_connection *conn = call->conn;
668 size_t pad;
670 /* pad out if we're using security */
671 if (conn->security) {
672 pad = conn->security_size + skb->mark;
673 pad = conn->size_align - pad;
674 pad &= conn->size_align - 1;
675 _debug("pad %zu", pad);
676 if (pad)
677 memset(skb_put(skb, pad), 0, pad);
680 sp->hdr.epoch = conn->epoch;
681 sp->hdr.cid = call->cid;
682 sp->hdr.callNumber = call->call_id;
683 sp->hdr.seq =
684 htonl(atomic_inc_return(&call->sequence));
685 sp->hdr.serial =
686 htonl(atomic_inc_return(&conn->serial));
687 sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
688 sp->hdr.userStatus = 0;
689 sp->hdr.securityIndex = conn->security_ix;
690 sp->hdr._rsvd = 0;
691 sp->hdr.serviceId = conn->service_id;
693 sp->hdr.flags = conn->out_clientflag;
694 if (len == 0 && !more)
695 sp->hdr.flags |= RXRPC_LAST_PACKET;
696 else if (CIRC_SPACE(call->acks_head, call->acks_tail,
697 call->acks_winsz) > 1)
698 sp->hdr.flags |= RXRPC_MORE_PACKETS;
700 ret = rxrpc_secure_packet(
701 call, skb, skb->mark,
702 skb->head + sizeof(struct rxrpc_header));
703 if (ret < 0)
704 goto out;
706 memcpy(skb->head, &sp->hdr,
707 sizeof(struct rxrpc_header));
708 rxrpc_queue_packet(call, skb, segment == 0 && !more);
709 skb = NULL;
712 } while (segment > 0);
714 success:
715 ret = copied;
716 out:
717 call->tx_pending = skb;
718 _leave(" = %d", ret);
719 return ret;
721 call_aborted:
722 rxrpc_free_skb(skb);
723 if (call->state == RXRPC_CALL_NETWORK_ERROR)
724 ret = call->conn->trans->peer->net_error;
725 else
726 ret = -ECONNABORTED;
727 _leave(" = %d", ret);
728 return ret;
730 maybe_error:
731 if (copied)
732 goto success;
733 goto out;
735 efault:
736 ret = -EFAULT;
737 goto out;