1 /* RxRPC packet transmission
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 #include <linux/net.h>
15 #include <linux/gfp.h>
16 #include <linux/skbuff.h>
17 #include <linux/export.h>
19 #include <net/af_rxrpc.h>
20 #include "ar-internal.h"
22 struct rxrpc_ack_buffer
{
23 struct rxrpc_wire_header whdr
;
24 struct rxrpc_ackpacket ack
;
27 struct rxrpc_ackinfo ackinfo
;
30 struct rxrpc_abort_buffer
{
31 struct rxrpc_wire_header whdr
;
35 static const char rxrpc_keepalive_string
[] = "";
38 * Arrange for a keepalive ping a certain time after we last transmitted. This
39 * lets the far side know we're still interested in this call and helps keep
40 * the route through any intervening firewall open.
42 * Receiving a response to the ping will prevent the ->expect_rx_by timer from
45 static void rxrpc_set_keepalive(struct rxrpc_call
*call
)
47 unsigned long now
= jiffies
, keepalive_at
= call
->next_rx_timo
/ 6;
50 WRITE_ONCE(call
->keepalive_at
, keepalive_at
);
51 rxrpc_reduce_call_timer(call
, keepalive_at
, now
,
52 rxrpc_timer_set_for_keepalive
);
56 * Fill out an ACK packet.
58 static size_t rxrpc_fill_out_ack(struct rxrpc_connection
*conn
,
59 struct rxrpc_call
*call
,
60 struct rxrpc_ack_buffer
*pkt
,
61 rxrpc_seq_t
*_hard_ack
,
65 rxrpc_serial_t serial
;
66 rxrpc_seq_t hard_ack
, top
, seq
;
71 /* Barrier against rxrpc_input_data(). */
72 serial
= call
->ackr_serial
;
73 hard_ack
= READ_ONCE(call
->rx_hard_ack
);
74 top
= smp_load_acquire(&call
->rx_top
);
75 *_hard_ack
= hard_ack
;
78 pkt
->ack
.bufferSpace
= htons(8);
79 pkt
->ack
.maxSkew
= htons(call
->ackr_skew
);
80 pkt
->ack
.firstPacket
= htonl(hard_ack
+ 1);
81 pkt
->ack
.previousPacket
= htonl(call
->ackr_prev_seq
);
82 pkt
->ack
.serial
= htonl(serial
);
83 pkt
->ack
.reason
= reason
;
84 pkt
->ack
.nAcks
= top
- hard_ack
;
86 if (reason
== RXRPC_ACK_PING
)
87 pkt
->whdr
.flags
|= RXRPC_REQUEST_ACK
;
89 if (after(top
, hard_ack
)) {
92 ix
= seq
& RXRPC_RXTX_BUFF_MASK
;
93 if (call
->rxtx_buffer
[ix
])
94 *ackp
++ = RXRPC_ACK_TYPE_ACK
;
96 *ackp
++ = RXRPC_ACK_TYPE_NACK
;
98 } while (before_eq(seq
, top
));
101 mtu
= conn
->params
.peer
->if_mtu
;
102 mtu
-= conn
->params
.peer
->hdrsize
;
103 jmax
= (call
->nr_jumbo_bad
> 3) ? 1 : rxrpc_rx_jumbo_max
;
104 pkt
->ackinfo
.rxMTU
= htonl(rxrpc_rx_mtu
);
105 pkt
->ackinfo
.maxMTU
= htonl(mtu
);
106 pkt
->ackinfo
.rwind
= htonl(call
->rx_winsize
);
107 pkt
->ackinfo
.jumbo_max
= htonl(jmax
);
112 return top
- hard_ack
+ 3;
116 * Send an ACK call packet.
118 int rxrpc_send_ack_packet(struct rxrpc_call
*call
, bool ping
,
119 rxrpc_serial_t
*_serial
)
121 struct rxrpc_connection
*conn
= NULL
;
122 struct rxrpc_ack_buffer
*pkt
;
125 rxrpc_serial_t serial
;
126 rxrpc_seq_t hard_ack
, top
;
132 spin_lock_bh(&call
->lock
);
134 conn
= rxrpc_get_connection_maybe(call
->conn
);
135 spin_unlock_bh(&call
->lock
);
139 pkt
= kzalloc(sizeof(*pkt
), GFP_KERNEL
);
141 rxrpc_put_connection(conn
);
145 msg
.msg_name
= &call
->peer
->srx
.transport
;
146 msg
.msg_namelen
= call
->peer
->srx
.transport_len
;
147 msg
.msg_control
= NULL
;
148 msg
.msg_controllen
= 0;
151 pkt
->whdr
.epoch
= htonl(conn
->proto
.epoch
);
152 pkt
->whdr
.cid
= htonl(call
->cid
);
153 pkt
->whdr
.callNumber
= htonl(call
->call_id
);
155 pkt
->whdr
.type
= RXRPC_PACKET_TYPE_ACK
;
156 pkt
->whdr
.flags
= RXRPC_SLOW_START_OK
| conn
->out_clientflag
;
157 pkt
->whdr
.userStatus
= 0;
158 pkt
->whdr
.securityIndex
= call
->security_ix
;
160 pkt
->whdr
.serviceId
= htons(call
->service_id
);
162 spin_lock_bh(&call
->lock
);
164 reason
= RXRPC_ACK_PING
;
166 reason
= call
->ackr_reason
;
167 if (!call
->ackr_reason
) {
168 spin_unlock_bh(&call
->lock
);
172 call
->ackr_reason
= 0;
174 n
= rxrpc_fill_out_ack(conn
, call
, pkt
, &hard_ack
, &top
, reason
);
176 spin_unlock_bh(&call
->lock
);
178 iov
[0].iov_base
= pkt
;
179 iov
[0].iov_len
= sizeof(pkt
->whdr
) + sizeof(pkt
->ack
) + n
;
180 iov
[1].iov_base
= &pkt
->ackinfo
;
181 iov
[1].iov_len
= sizeof(pkt
->ackinfo
);
182 len
= iov
[0].iov_len
+ iov
[1].iov_len
;
184 serial
= atomic_inc_return(&conn
->serial
);
185 pkt
->whdr
.serial
= htonl(serial
);
186 trace_rxrpc_tx_ack(call
, serial
,
187 ntohl(pkt
->ack
.firstPacket
),
188 ntohl(pkt
->ack
.serial
),
189 pkt
->ack
.reason
, pkt
->ack
.nAcks
);
194 call
->ping_serial
= serial
;
196 /* We need to stick a time in before we send the packet in case
197 * the reply gets back before kernel_sendmsg() completes - but
198 * asking UDP to send the packet can take a relatively long
199 * time, so we update the time after, on the assumption that
200 * the packet transmission is more likely to happen towards the
201 * end of the kernel_sendmsg() call.
203 call
->ping_time
= ktime_get_real();
204 set_bit(RXRPC_CALL_PINGING
, &call
->flags
);
205 trace_rxrpc_rtt_tx(call
, rxrpc_rtt_tx_ping
, serial
);
208 ret
= kernel_sendmsg(conn
->params
.local
->socket
, &msg
, iov
, 2, len
);
209 now
= ktime_get_real();
211 call
->ping_time
= now
;
212 conn
->params
.peer
->last_tx_at
= ktime_get_real();
214 trace_rxrpc_tx_fail(call
->debug_id
, serial
, ret
,
215 rxrpc_tx_fail_call_ack
);
217 if (call
->state
< RXRPC_CALL_COMPLETE
) {
220 clear_bit(RXRPC_CALL_PINGING
, &call
->flags
);
221 rxrpc_propose_ACK(call
, pkt
->ack
.reason
,
222 ntohs(pkt
->ack
.maxSkew
),
223 ntohl(pkt
->ack
.serial
),
225 rxrpc_propose_ack_retry_tx
);
227 spin_lock_bh(&call
->lock
);
228 if (after(hard_ack
, call
->ackr_consumed
))
229 call
->ackr_consumed
= hard_ack
;
230 if (after(top
, call
->ackr_seen
))
231 call
->ackr_seen
= top
;
232 spin_unlock_bh(&call
->lock
);
235 rxrpc_set_keepalive(call
);
239 rxrpc_put_connection(conn
);
245 * Send an ABORT call packet.
247 int rxrpc_send_abort_packet(struct rxrpc_call
*call
)
249 struct rxrpc_connection
*conn
= NULL
;
250 struct rxrpc_abort_buffer pkt
;
253 rxrpc_serial_t serial
;
256 /* Don't bother sending aborts for a client call once the server has
257 * hard-ACK'd all of its request data. After that point, we're not
258 * going to stop the operation proceeding, and whilst we might limit
259 * the reply, it's not worth it if we can send a new call on the same
260 * channel instead, thereby closing off this call.
262 if (rxrpc_is_client_call(call
) &&
263 test_bit(RXRPC_CALL_TX_LAST
, &call
->flags
))
266 spin_lock_bh(&call
->lock
);
268 conn
= rxrpc_get_connection_maybe(call
->conn
);
269 spin_unlock_bh(&call
->lock
);
273 msg
.msg_name
= &call
->peer
->srx
.transport
;
274 msg
.msg_namelen
= call
->peer
->srx
.transport_len
;
275 msg
.msg_control
= NULL
;
276 msg
.msg_controllen
= 0;
279 pkt
.whdr
.epoch
= htonl(conn
->proto
.epoch
);
280 pkt
.whdr
.cid
= htonl(call
->cid
);
281 pkt
.whdr
.callNumber
= htonl(call
->call_id
);
283 pkt
.whdr
.type
= RXRPC_PACKET_TYPE_ABORT
;
284 pkt
.whdr
.flags
= conn
->out_clientflag
;
285 pkt
.whdr
.userStatus
= 0;
286 pkt
.whdr
.securityIndex
= call
->security_ix
;
288 pkt
.whdr
.serviceId
= htons(call
->service_id
);
289 pkt
.abort_code
= htonl(call
->abort_code
);
291 iov
[0].iov_base
= &pkt
;
292 iov
[0].iov_len
= sizeof(pkt
);
294 serial
= atomic_inc_return(&conn
->serial
);
295 pkt
.whdr
.serial
= htonl(serial
);
297 ret
= kernel_sendmsg(conn
->params
.local
->socket
,
298 &msg
, iov
, 1, sizeof(pkt
));
299 conn
->params
.peer
->last_tx_at
= ktime_get_real();
301 trace_rxrpc_tx_fail(call
->debug_id
, serial
, ret
,
302 rxrpc_tx_fail_call_abort
);
305 rxrpc_put_connection(conn
);
310 * send a packet through the transport endpoint
312 int rxrpc_send_data_packet(struct rxrpc_call
*call
, struct sk_buff
*skb
,
315 struct rxrpc_connection
*conn
= call
->conn
;
316 struct rxrpc_wire_header whdr
;
317 struct rxrpc_skb_priv
*sp
= rxrpc_skb(skb
);
320 rxrpc_serial_t serial
;
325 _enter(",{%d}", skb
->len
);
327 /* Each transmission of a Tx packet needs a new serial number */
328 serial
= atomic_inc_return(&conn
->serial
);
330 whdr
.epoch
= htonl(conn
->proto
.epoch
);
331 whdr
.cid
= htonl(call
->cid
);
332 whdr
.callNumber
= htonl(call
->call_id
);
333 whdr
.seq
= htonl(sp
->hdr
.seq
);
334 whdr
.serial
= htonl(serial
);
335 whdr
.type
= RXRPC_PACKET_TYPE_DATA
;
336 whdr
.flags
= sp
->hdr
.flags
;
338 whdr
.securityIndex
= call
->security_ix
;
339 whdr
._rsvd
= htons(sp
->hdr
._rsvd
);
340 whdr
.serviceId
= htons(call
->service_id
);
342 if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE
, &conn
->flags
) &&
344 whdr
.userStatus
= RXRPC_USERSTATUS_SERVICE_UPGRADE
;
346 iov
[0].iov_base
= &whdr
;
347 iov
[0].iov_len
= sizeof(whdr
);
348 iov
[1].iov_base
= skb
->head
;
349 iov
[1].iov_len
= skb
->len
;
350 len
= iov
[0].iov_len
+ iov
[1].iov_len
;
352 msg
.msg_name
= &call
->peer
->srx
.transport
;
353 msg
.msg_namelen
= call
->peer
->srx
.transport_len
;
354 msg
.msg_control
= NULL
;
355 msg
.msg_controllen
= 0;
358 /* If our RTT cache needs working on, request an ACK. Also request
359 * ACKs if a DATA packet appears to have been lost.
361 if (!(sp
->hdr
.flags
& RXRPC_LAST_PACKET
) &&
362 (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST
, &call
->events
) ||
364 call
->cong_mode
== RXRPC_CALL_SLOW_START
||
365 (call
->peer
->rtt_usage
< 3 && sp
->hdr
.seq
& 1) ||
366 ktime_before(ktime_add_ms(call
->peer
->rtt_last_req
, 1000),
368 whdr
.flags
|= RXRPC_REQUEST_ACK
;
370 if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS
)) {
372 if ((lose
++ & 7) == 7) {
379 _proto("Tx DATA %%%u { #%u }", serial
, sp
->hdr
.seq
);
381 /* send the packet with the don't fragment bit set if we currently
382 * think it's small enough */
383 if (iov
[1].iov_len
>= call
->peer
->maxdata
)
384 goto send_fragmentable
;
386 down_read(&conn
->params
.local
->defrag_sem
);
387 /* send the packet by UDP
388 * - returns -EMSGSIZE if UDP would have to fragment the packet
389 * to go out of the interface
390 * - in which case, we'll have processed the ICMP error
391 * message and update the peer record
393 ret
= kernel_sendmsg(conn
->params
.local
->socket
, &msg
, iov
, 2, len
);
394 conn
->params
.peer
->last_tx_at
= ktime_get_real();
396 up_read(&conn
->params
.local
->defrag_sem
);
398 trace_rxrpc_tx_fail(call
->debug_id
, serial
, ret
,
399 rxrpc_tx_fail_call_data_nofrag
);
400 if (ret
== -EMSGSIZE
)
401 goto send_fragmentable
;
404 trace_rxrpc_tx_data(call
, sp
->hdr
.seq
, serial
, whdr
.flags
,
407 ktime_t now
= ktime_get_real();
410 sp
->hdr
.serial
= serial
;
411 if (whdr
.flags
& RXRPC_REQUEST_ACK
) {
412 call
->peer
->rtt_last_req
= now
;
413 trace_rxrpc_rtt_tx(call
, rxrpc_rtt_tx_data
, serial
);
414 if (call
->peer
->rtt_usage
> 1) {
415 unsigned long nowj
= jiffies
, ack_lost_at
;
417 ack_lost_at
= nsecs_to_jiffies(2 * call
->peer
->rtt
);
422 WRITE_ONCE(call
->ack_lost_at
, ack_lost_at
);
423 rxrpc_reduce_call_timer(call
, ack_lost_at
, nowj
,
424 rxrpc_timer_set_for_lost_ack
);
428 if (sp
->hdr
.seq
== 1 &&
429 !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER
,
431 unsigned long nowj
= jiffies
, expect_rx_by
;
433 expect_rx_by
= nowj
+ call
->next_rx_timo
;
434 WRITE_ONCE(call
->expect_rx_by
, expect_rx_by
);
435 rxrpc_reduce_call_timer(call
, expect_rx_by
, nowj
,
436 rxrpc_timer_set_for_normal
);
440 rxrpc_set_keepalive(call
);
442 _leave(" = %d [%u]", ret
, call
->peer
->maxdata
);
446 /* attempt to send this message with fragmentation enabled */
447 _debug("send fragment");
449 down_write(&conn
->params
.local
->defrag_sem
);
451 switch (conn
->params
.local
->srx
.transport
.family
) {
453 opt
= IP_PMTUDISC_DONT
;
454 ret
= kernel_setsockopt(conn
->params
.local
->socket
,
455 SOL_IP
, IP_MTU_DISCOVER
,
456 (char *)&opt
, sizeof(opt
));
458 ret
= kernel_sendmsg(conn
->params
.local
->socket
, &msg
,
460 conn
->params
.peer
->last_tx_at
= ktime_get_real();
462 opt
= IP_PMTUDISC_DO
;
463 kernel_setsockopt(conn
->params
.local
->socket
, SOL_IP
,
465 (char *)&opt
, sizeof(opt
));
469 #ifdef CONFIG_AF_RXRPC_IPV6
471 opt
= IPV6_PMTUDISC_DONT
;
472 ret
= kernel_setsockopt(conn
->params
.local
->socket
,
473 SOL_IPV6
, IPV6_MTU_DISCOVER
,
474 (char *)&opt
, sizeof(opt
));
476 ret
= kernel_sendmsg(conn
->params
.local
->socket
, &msg
,
478 conn
->params
.peer
->last_tx_at
= ktime_get_real();
480 opt
= IPV6_PMTUDISC_DO
;
481 kernel_setsockopt(conn
->params
.local
->socket
,
482 SOL_IPV6
, IPV6_MTU_DISCOVER
,
483 (char *)&opt
, sizeof(opt
));
490 trace_rxrpc_tx_fail(call
->debug_id
, serial
, ret
,
491 rxrpc_tx_fail_call_data_frag
);
493 up_write(&conn
->params
.local
->defrag_sem
);
498 * reject packets through the local endpoint
500 void rxrpc_reject_packets(struct rxrpc_local
*local
)
502 struct sockaddr_rxrpc srx
;
503 struct rxrpc_skb_priv
*sp
;
504 struct rxrpc_wire_header whdr
;
512 _enter("%d", local
->debug_id
);
514 iov
[0].iov_base
= &whdr
;
515 iov
[0].iov_len
= sizeof(whdr
);
516 iov
[1].iov_base
= &code
;
517 iov
[1].iov_len
= sizeof(code
);
518 size
= sizeof(whdr
) + sizeof(code
);
520 msg
.msg_name
= &srx
.transport
;
521 msg
.msg_control
= NULL
;
522 msg
.msg_controllen
= 0;
525 memset(&whdr
, 0, sizeof(whdr
));
526 whdr
.type
= RXRPC_PACKET_TYPE_ABORT
;
528 while ((skb
= skb_dequeue(&local
->reject_queue
))) {
529 rxrpc_see_skb(skb
, rxrpc_skb_rx_seen
);
532 if (rxrpc_extract_addr_from_skb(local
, &srx
, skb
) == 0) {
533 msg
.msg_namelen
= srx
.transport_len
;
535 code
= htonl(skb
->priority
);
537 whdr
.epoch
= htonl(sp
->hdr
.epoch
);
538 whdr
.cid
= htonl(sp
->hdr
.cid
);
539 whdr
.callNumber
= htonl(sp
->hdr
.callNumber
);
540 whdr
.serviceId
= htons(sp
->hdr
.serviceId
);
541 whdr
.flags
= sp
->hdr
.flags
;
542 whdr
.flags
^= RXRPC_CLIENT_INITIATED
;
543 whdr
.flags
&= RXRPC_CLIENT_INITIATED
;
545 ret
= kernel_sendmsg(local
->socket
, &msg
, iov
, 2, size
);
547 trace_rxrpc_tx_fail(local
->debug_id
, 0, ret
,
548 rxrpc_tx_fail_reject
);
551 rxrpc_free_skb(skb
, rxrpc_skb_rx_freed
);
558 * Send a VERSION reply to a peer as a keepalive.
560 void rxrpc_send_keepalive(struct rxrpc_peer
*peer
)
562 struct rxrpc_wire_header whdr
;
570 msg
.msg_name
= &peer
->srx
.transport
;
571 msg
.msg_namelen
= peer
->srx
.transport_len
;
572 msg
.msg_control
= NULL
;
573 msg
.msg_controllen
= 0;
576 whdr
.epoch
= htonl(peer
->local
->rxnet
->epoch
);
581 whdr
.type
= RXRPC_PACKET_TYPE_VERSION
; /* Not client-initiated */
582 whdr
.flags
= RXRPC_LAST_PACKET
;
584 whdr
.securityIndex
= 0;
588 iov
[0].iov_base
= &whdr
;
589 iov
[0].iov_len
= sizeof(whdr
);
590 iov
[1].iov_base
= (char *)rxrpc_keepalive_string
;
591 iov
[1].iov_len
= sizeof(rxrpc_keepalive_string
);
593 len
= iov
[0].iov_len
+ iov
[1].iov_len
;
595 _proto("Tx VERSION (keepalive)");
597 ret
= kernel_sendmsg(peer
->local
->socket
, &msg
, iov
, 2, len
);
599 trace_rxrpc_tx_fail(peer
->debug_id
, 0, ret
,
600 rxrpc_tx_fail_version_keepalive
);
602 peer
->last_tx_at
= ktime_get_real();