4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
39 DEFINE_SNMP_STAT(struct dccp_mib
, dccp_statistics
) __read_mostly
;
41 EXPORT_SYMBOL_GPL(dccp_statistics
);
43 atomic_t dccp_orphan_count
= ATOMIC_INIT(0);
45 EXPORT_SYMBOL_GPL(dccp_orphan_count
);
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo
= {
48 .lhash_lock
= RW_LOCK_UNLOCKED
,
49 .lhash_users
= ATOMIC_INIT(0),
50 .lhash_wait
= __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo
.lhash_wait
),
53 EXPORT_SYMBOL_GPL(dccp_hashinfo
);
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly
= 5;
58 void dccp_set_state(struct sock
*sk
, const int state
)
60 const int oldstate
= sk
->sk_state
;
62 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk
), sk
,
63 dccp_state_name(oldstate
), dccp_state_name(state
));
64 WARN_ON(state
== oldstate
);
68 if (oldstate
!= DCCP_OPEN
)
69 DCCP_INC_STATS(DCCP_MIB_CURRESTAB
);
70 /* Client retransmits all Confirm options until entering OPEN */
71 if (oldstate
== DCCP_PARTOPEN
)
72 dccp_feat_list_purge(&dccp_sk(sk
)->dccps_featneg
);
76 if (oldstate
== DCCP_OPEN
|| oldstate
== DCCP_ACTIVE_CLOSEREQ
||
77 oldstate
== DCCP_CLOSING
)
78 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS
);
80 sk
->sk_prot
->unhash(sk
);
81 if (inet_csk(sk
)->icsk_bind_hash
!= NULL
&&
82 !(sk
->sk_userlocks
& SOCK_BINDPORT_LOCK
))
86 if (oldstate
== DCCP_OPEN
)
87 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB
);
90 /* Change state AFTER socket is unhashed to avoid closed
91 * socket sitting in hash tables.
96 EXPORT_SYMBOL_GPL(dccp_set_state
);
98 static void dccp_finish_passive_close(struct sock
*sk
)
100 switch (sk
->sk_state
) {
101 case DCCP_PASSIVE_CLOSE
:
102 /* Node (client or server) has received Close packet. */
103 dccp_send_reset(sk
, DCCP_RESET_CODE_CLOSED
);
104 dccp_set_state(sk
, DCCP_CLOSED
);
106 case DCCP_PASSIVE_CLOSEREQ
:
108 * Client received CloseReq. We set the `active' flag so that
109 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
111 dccp_send_close(sk
, 1);
112 dccp_set_state(sk
, DCCP_CLOSING
);
116 void dccp_done(struct sock
*sk
)
118 dccp_set_state(sk
, DCCP_CLOSED
);
119 dccp_clear_xmit_timers(sk
);
121 sk
->sk_shutdown
= SHUTDOWN_MASK
;
123 if (!sock_flag(sk
, SOCK_DEAD
))
124 sk
->sk_state_change(sk
);
126 inet_csk_destroy_sock(sk
);
129 EXPORT_SYMBOL_GPL(dccp_done
);
131 const char *dccp_packet_name(const int type
)
133 static const char *dccp_packet_names
[] = {
134 [DCCP_PKT_REQUEST
] = "REQUEST",
135 [DCCP_PKT_RESPONSE
] = "RESPONSE",
136 [DCCP_PKT_DATA
] = "DATA",
137 [DCCP_PKT_ACK
] = "ACK",
138 [DCCP_PKT_DATAACK
] = "DATAACK",
139 [DCCP_PKT_CLOSEREQ
] = "CLOSEREQ",
140 [DCCP_PKT_CLOSE
] = "CLOSE",
141 [DCCP_PKT_RESET
] = "RESET",
142 [DCCP_PKT_SYNC
] = "SYNC",
143 [DCCP_PKT_SYNCACK
] = "SYNCACK",
146 if (type
>= DCCP_NR_PKT_TYPES
)
149 return dccp_packet_names
[type
];
152 EXPORT_SYMBOL_GPL(dccp_packet_name
);
154 const char *dccp_state_name(const int state
)
156 static char *dccp_state_names
[] = {
157 [DCCP_OPEN
] = "OPEN",
158 [DCCP_REQUESTING
] = "REQUESTING",
159 [DCCP_PARTOPEN
] = "PARTOPEN",
160 [DCCP_LISTEN
] = "LISTEN",
161 [DCCP_RESPOND
] = "RESPOND",
162 [DCCP_CLOSING
] = "CLOSING",
163 [DCCP_ACTIVE_CLOSEREQ
] = "CLOSEREQ",
164 [DCCP_PASSIVE_CLOSE
] = "PASSIVE_CLOSE",
165 [DCCP_PASSIVE_CLOSEREQ
] = "PASSIVE_CLOSEREQ",
166 [DCCP_TIME_WAIT
] = "TIME_WAIT",
167 [DCCP_CLOSED
] = "CLOSED",
170 if (state
>= DCCP_MAX_STATES
)
171 return "INVALID STATE!";
173 return dccp_state_names
[state
];
176 EXPORT_SYMBOL_GPL(dccp_state_name
);
178 int dccp_init_sock(struct sock
*sk
, const __u8 ctl_sock_initialized
)
180 struct dccp_sock
*dp
= dccp_sk(sk
);
181 struct inet_connection_sock
*icsk
= inet_csk(sk
);
183 icsk
->icsk_rto
= DCCP_TIMEOUT_INIT
;
184 icsk
->icsk_syn_retries
= sysctl_dccp_request_retries
;
185 sk
->sk_state
= DCCP_CLOSED
;
186 sk
->sk_write_space
= dccp_write_space
;
187 icsk
->icsk_sync_mss
= dccp_sync_mss
;
188 dp
->dccps_mss_cache
= TCP_MIN_RCVMSS
;
189 dp
->dccps_rate_last
= jiffies
;
190 dp
->dccps_role
= DCCP_ROLE_UNDEFINED
;
191 dp
->dccps_service
= DCCP_SERVICE_CODE_IS_ABSENT
;
192 dp
->dccps_tx_qlen
= sysctl_dccp_tx_qlen
;
194 dccp_init_xmit_timers(sk
);
196 INIT_LIST_HEAD(&dp
->dccps_featneg
);
197 /* control socket doesn't need feat nego */
198 if (likely(ctl_sock_initialized
))
199 return dccp_feat_init(sk
);
203 EXPORT_SYMBOL_GPL(dccp_init_sock
);
205 void dccp_destroy_sock(struct sock
*sk
)
207 struct dccp_sock
*dp
= dccp_sk(sk
);
210 * DCCP doesn't use sk_write_queue, just sk_send_head
211 * for retransmissions
213 if (sk
->sk_send_head
!= NULL
) {
214 kfree_skb(sk
->sk_send_head
);
215 sk
->sk_send_head
= NULL
;
218 /* Clean up a referenced DCCP bind bucket. */
219 if (inet_csk(sk
)->icsk_bind_hash
!= NULL
)
222 kfree(dp
->dccps_service_list
);
223 dp
->dccps_service_list
= NULL
;
225 if (dp
->dccps_hc_rx_ackvec
!= NULL
) {
226 dccp_ackvec_free(dp
->dccps_hc_rx_ackvec
);
227 dp
->dccps_hc_rx_ackvec
= NULL
;
229 ccid_hc_rx_delete(dp
->dccps_hc_rx_ccid
, sk
);
230 ccid_hc_tx_delete(dp
->dccps_hc_tx_ccid
, sk
);
231 dp
->dccps_hc_rx_ccid
= dp
->dccps_hc_tx_ccid
= NULL
;
233 /* clean up feature negotiation state */
234 dccp_feat_list_purge(&dp
->dccps_featneg
);
237 EXPORT_SYMBOL_GPL(dccp_destroy_sock
);
239 static inline int dccp_listen_start(struct sock
*sk
, int backlog
)
241 struct dccp_sock
*dp
= dccp_sk(sk
);
243 dp
->dccps_role
= DCCP_ROLE_LISTEN
;
244 /* do not start to listen if feature negotiation setup fails */
245 if (dccp_feat_finalise_settings(dp
))
247 return inet_csk_listen_start(sk
, backlog
);
250 static inline int dccp_need_reset(int state
)
252 return state
!= DCCP_CLOSED
&& state
!= DCCP_LISTEN
&&
253 state
!= DCCP_REQUESTING
;
256 int dccp_disconnect(struct sock
*sk
, int flags
)
258 struct inet_connection_sock
*icsk
= inet_csk(sk
);
259 struct inet_sock
*inet
= inet_sk(sk
);
261 const int old_state
= sk
->sk_state
;
263 if (old_state
!= DCCP_CLOSED
)
264 dccp_set_state(sk
, DCCP_CLOSED
);
267 * This corresponds to the ABORT function of RFC793, sec. 3.8
268 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
270 if (old_state
== DCCP_LISTEN
) {
271 inet_csk_listen_stop(sk
);
272 } else if (dccp_need_reset(old_state
)) {
273 dccp_send_reset(sk
, DCCP_RESET_CODE_ABORTED
);
274 sk
->sk_err
= ECONNRESET
;
275 } else if (old_state
== DCCP_REQUESTING
)
276 sk
->sk_err
= ECONNRESET
;
278 dccp_clear_xmit_timers(sk
);
280 __skb_queue_purge(&sk
->sk_receive_queue
);
281 __skb_queue_purge(&sk
->sk_write_queue
);
282 if (sk
->sk_send_head
!= NULL
) {
283 __kfree_skb(sk
->sk_send_head
);
284 sk
->sk_send_head
= NULL
;
289 if (!(sk
->sk_userlocks
& SOCK_BINDADDR_LOCK
))
290 inet_reset_saddr(sk
);
293 sock_reset_flag(sk
, SOCK_DONE
);
295 icsk
->icsk_backoff
= 0;
296 inet_csk_delack_init(sk
);
299 WARN_ON(inet
->num
&& !icsk
->icsk_bind_hash
);
301 sk
->sk_error_report(sk
);
305 EXPORT_SYMBOL_GPL(dccp_disconnect
);
308 * Wait for a DCCP event.
310 * Note that we don't need to lock the socket, as the upper poll layers
311 * take care of normal races (between the test and the event) and we don't
312 * go look at any of the socket buffers directly.
314 unsigned int dccp_poll(struct file
*file
, struct socket
*sock
,
318 struct sock
*sk
= sock
->sk
;
320 poll_wait(file
, sk
->sk_sleep
, wait
);
321 if (sk
->sk_state
== DCCP_LISTEN
)
322 return inet_csk_listen_poll(sk
);
324 /* Socket is not locked. We are protected from async events
325 by poll logic and correct handling of state changes
326 made by another threads is impossible in any case.
333 if (sk
->sk_shutdown
== SHUTDOWN_MASK
|| sk
->sk_state
== DCCP_CLOSED
)
335 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
336 mask
|= POLLIN
| POLLRDNORM
| POLLRDHUP
;
339 if ((1 << sk
->sk_state
) & ~(DCCPF_REQUESTING
| DCCPF_RESPOND
)) {
340 if (atomic_read(&sk
->sk_rmem_alloc
) > 0)
341 mask
|= POLLIN
| POLLRDNORM
;
343 if (!(sk
->sk_shutdown
& SEND_SHUTDOWN
)) {
344 if (sk_stream_wspace(sk
) >= sk_stream_min_wspace(sk
)) {
345 mask
|= POLLOUT
| POLLWRNORM
;
346 } else { /* send SIGIO later */
347 set_bit(SOCK_ASYNC_NOSPACE
,
348 &sk
->sk_socket
->flags
);
349 set_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
);
351 /* Race breaker. If space is freed after
352 * wspace test but before the flags are set,
353 * IO signal will be lost.
355 if (sk_stream_wspace(sk
) >= sk_stream_min_wspace(sk
))
356 mask
|= POLLOUT
| POLLWRNORM
;
363 EXPORT_SYMBOL_GPL(dccp_poll
);
365 int dccp_ioctl(struct sock
*sk
, int cmd
, unsigned long arg
)
371 if (sk
->sk_state
== DCCP_LISTEN
)
377 unsigned long amount
= 0;
379 skb
= skb_peek(&sk
->sk_receive_queue
);
382 * We will only return the amount of this packet since
383 * that is all that will be read.
387 rc
= put_user(amount
, (int __user
*)arg
);
399 EXPORT_SYMBOL_GPL(dccp_ioctl
);
401 static int dccp_setsockopt_service(struct sock
*sk
, const __be32 service
,
402 char __user
*optval
, int optlen
)
404 struct dccp_sock
*dp
= dccp_sk(sk
);
405 struct dccp_service_list
*sl
= NULL
;
407 if (service
== DCCP_SERVICE_INVALID_VALUE
||
408 optlen
> DCCP_SERVICE_LIST_MAX_LEN
* sizeof(u32
))
411 if (optlen
> sizeof(service
)) {
412 sl
= kmalloc(optlen
, GFP_KERNEL
);
416 sl
->dccpsl_nr
= optlen
/ sizeof(u32
) - 1;
417 if (copy_from_user(sl
->dccpsl_list
,
418 optval
+ sizeof(service
),
419 optlen
- sizeof(service
)) ||
420 dccp_list_has_service(sl
, DCCP_SERVICE_INVALID_VALUE
)) {
427 dp
->dccps_service
= service
;
429 kfree(dp
->dccps_service_list
);
431 dp
->dccps_service_list
= sl
;
436 static int dccp_setsockopt_cscov(struct sock
*sk
, int cscov
, bool rx
)
441 if (cscov
< 0 || cscov
> 15)
444 * Populate a list of permissible values, in the range cscov...15. This
445 * is necessary since feature negotiation of single values only works if
446 * both sides incidentally choose the same value. Since the list starts
447 * lowest-value first, negotiation will pick the smallest shared value.
453 list
= kmalloc(len
, GFP_KERNEL
);
457 for (i
= 0; i
< len
; i
++)
460 rc
= dccp_feat_register_sp(sk
, DCCPF_MIN_CSUM_COVER
, rx
, list
, len
);
464 dccp_sk(sk
)->dccps_pcrlen
= cscov
;
466 dccp_sk(sk
)->dccps_pcslen
= cscov
;
472 static int dccp_setsockopt_ccid(struct sock
*sk
, int type
,
473 char __user
*optval
, int optlen
)
478 if (optlen
< 1 || optlen
> DCCP_FEAT_MAX_SP_VALS
)
481 val
= kmalloc(optlen
, GFP_KERNEL
);
485 if (copy_from_user(val
, optval
, optlen
)) {
491 if (type
== DCCP_SOCKOPT_TX_CCID
|| type
== DCCP_SOCKOPT_CCID
)
492 rc
= dccp_feat_register_sp(sk
, DCCPF_CCID
, 1, val
, optlen
);
494 if (!rc
&& (type
== DCCP_SOCKOPT_RX_CCID
|| type
== DCCP_SOCKOPT_CCID
))
495 rc
= dccp_feat_register_sp(sk
, DCCPF_CCID
, 0, val
, optlen
);
502 static int do_dccp_setsockopt(struct sock
*sk
, int level
, int optname
,
503 char __user
*optval
, int optlen
)
505 struct dccp_sock
*dp
= dccp_sk(sk
);
509 case DCCP_SOCKOPT_PACKET_SIZE
:
510 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
512 case DCCP_SOCKOPT_CHANGE_L
:
513 case DCCP_SOCKOPT_CHANGE_R
:
514 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
516 case DCCP_SOCKOPT_CCID
:
517 case DCCP_SOCKOPT_RX_CCID
:
518 case DCCP_SOCKOPT_TX_CCID
:
519 return dccp_setsockopt_ccid(sk
, optname
, optval
, optlen
);
522 if (optlen
< (int)sizeof(int))
525 if (get_user(val
, (int __user
*)optval
))
528 if (optname
== DCCP_SOCKOPT_SERVICE
)
529 return dccp_setsockopt_service(sk
, val
, optval
, optlen
);
533 case DCCP_SOCKOPT_SERVER_TIMEWAIT
:
534 if (dp
->dccps_role
!= DCCP_ROLE_SERVER
)
537 dp
->dccps_server_timewait
= (val
!= 0);
539 case DCCP_SOCKOPT_SEND_CSCOV
:
540 err
= dccp_setsockopt_cscov(sk
, val
, false);
542 case DCCP_SOCKOPT_RECV_CSCOV
:
543 err
= dccp_setsockopt_cscov(sk
, val
, true);
545 case DCCP_SOCKOPT_QPOLICY_ID
:
546 if (sk
->sk_state
!= DCCP_CLOSED
)
548 else if (val
< 0 || val
>= DCCPQ_POLICY_MAX
)
551 dp
->dccps_qpolicy
= val
;
553 case DCCP_SOCKOPT_QPOLICY_TXQLEN
:
557 dp
->dccps_tx_qlen
= val
;
568 int dccp_setsockopt(struct sock
*sk
, int level
, int optname
,
569 char __user
*optval
, int optlen
)
571 if (level
!= SOL_DCCP
)
572 return inet_csk(sk
)->icsk_af_ops
->setsockopt(sk
, level
,
575 return do_dccp_setsockopt(sk
, level
, optname
, optval
, optlen
);
578 EXPORT_SYMBOL_GPL(dccp_setsockopt
);
581 int compat_dccp_setsockopt(struct sock
*sk
, int level
, int optname
,
582 char __user
*optval
, int optlen
)
584 if (level
!= SOL_DCCP
)
585 return inet_csk_compat_setsockopt(sk
, level
, optname
,
587 return do_dccp_setsockopt(sk
, level
, optname
, optval
, optlen
);
590 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt
);
593 static int dccp_getsockopt_service(struct sock
*sk
, int len
,
594 __be32 __user
*optval
,
597 const struct dccp_sock
*dp
= dccp_sk(sk
);
598 const struct dccp_service_list
*sl
;
599 int err
= -ENOENT
, slen
= 0, total_len
= sizeof(u32
);
602 if ((sl
= dp
->dccps_service_list
) != NULL
) {
603 slen
= sl
->dccpsl_nr
* sizeof(u32
);
612 if (put_user(total_len
, optlen
) ||
613 put_user(dp
->dccps_service
, optval
) ||
614 (sl
!= NULL
&& copy_to_user(optval
+ 1, sl
->dccpsl_list
, slen
)))
621 static int do_dccp_getsockopt(struct sock
*sk
, int level
, int optname
,
622 char __user
*optval
, int __user
*optlen
)
624 struct dccp_sock
*dp
;
627 if (get_user(len
, optlen
))
630 if (len
< (int)sizeof(int))
636 case DCCP_SOCKOPT_PACKET_SIZE
:
637 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
639 case DCCP_SOCKOPT_SERVICE
:
640 return dccp_getsockopt_service(sk
, len
,
641 (__be32 __user
*)optval
, optlen
);
642 case DCCP_SOCKOPT_GET_CUR_MPS
:
643 val
= dp
->dccps_mss_cache
;
645 case DCCP_SOCKOPT_AVAILABLE_CCIDS
:
646 return ccid_getsockopt_builtin_ccids(sk
, len
, optval
, optlen
);
647 case DCCP_SOCKOPT_TX_CCID
:
648 val
= ccid_get_current_tx_ccid(dp
);
652 case DCCP_SOCKOPT_RX_CCID
:
653 val
= ccid_get_current_rx_ccid(dp
);
657 case DCCP_SOCKOPT_SERVER_TIMEWAIT
:
658 val
= dp
->dccps_server_timewait
;
660 case DCCP_SOCKOPT_SEND_CSCOV
:
661 val
= dp
->dccps_pcslen
;
663 case DCCP_SOCKOPT_RECV_CSCOV
:
664 val
= dp
->dccps_pcrlen
;
666 case DCCP_SOCKOPT_QPOLICY_ID
:
667 val
= dp
->dccps_qpolicy
;
669 case DCCP_SOCKOPT_QPOLICY_TXQLEN
:
670 val
= dp
->dccps_tx_qlen
;
673 return ccid_hc_rx_getsockopt(dp
->dccps_hc_rx_ccid
, sk
, optname
,
674 len
, (u32 __user
*)optval
, optlen
);
676 return ccid_hc_tx_getsockopt(dp
->dccps_hc_tx_ccid
, sk
, optname
,
677 len
, (u32 __user
*)optval
, optlen
);
683 if (put_user(len
, optlen
) || copy_to_user(optval
, &val
, len
))
689 int dccp_getsockopt(struct sock
*sk
, int level
, int optname
,
690 char __user
*optval
, int __user
*optlen
)
692 if (level
!= SOL_DCCP
)
693 return inet_csk(sk
)->icsk_af_ops
->getsockopt(sk
, level
,
696 return do_dccp_getsockopt(sk
, level
, optname
, optval
, optlen
);
699 EXPORT_SYMBOL_GPL(dccp_getsockopt
);
702 int compat_dccp_getsockopt(struct sock
*sk
, int level
, int optname
,
703 char __user
*optval
, int __user
*optlen
)
705 if (level
!= SOL_DCCP
)
706 return inet_csk_compat_getsockopt(sk
, level
, optname
,
708 return do_dccp_getsockopt(sk
, level
, optname
, optval
, optlen
);
711 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt
);
714 static int dccp_msghdr_parse(struct msghdr
*msg
, struct sk_buff
*skb
)
716 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR(msg
);
719 * Assign an (opaque) qpolicy priority value to skb->priority.
721 * We are overloading this skb field for use with the qpolicy subystem.
722 * The skb->priority is normally used for the SO_PRIORITY option, which
723 * is initialised from sk_priority. Since the assignment of sk_priority
724 * to skb->priority happens later (on layer 3), we overload this field
725 * for use with queueing priorities as long as the skb is on layer 4.
726 * The default priority value (if nothing is set) is 0.
730 for (; cmsg
!= NULL
; cmsg
= CMSG_NXTHDR(msg
, cmsg
)) {
732 if (!CMSG_OK(msg
, cmsg
))
735 if (cmsg
->cmsg_level
!= SOL_DCCP
)
738 if (cmsg
->cmsg_type
<= DCCP_SCM_QPOLICY_MAX
&&
739 !dccp_qpolicy_param_ok(skb
->sk
, cmsg
->cmsg_type
))
742 switch (cmsg
->cmsg_type
) {
743 case DCCP_SCM_PRIORITY
:
744 if (cmsg
->cmsg_len
!= CMSG_LEN(sizeof(__u32
)))
746 skb
->priority
= *(__u32
*)CMSG_DATA(cmsg
);
755 int dccp_sendmsg(struct kiocb
*iocb
, struct sock
*sk
, struct msghdr
*msg
,
758 const struct dccp_sock
*dp
= dccp_sk(sk
);
759 const int flags
= msg
->msg_flags
;
760 const int noblock
= flags
& MSG_DONTWAIT
;
765 if (len
> dp
->dccps_mss_cache
)
770 if (dccp_qpolicy_full(sk
)) {
775 timeo
= sock_sndtimeo(sk
, noblock
);
778 * We have to use sk_stream_wait_connect here to set sk_write_pending,
779 * so that the trick in dccp_rcv_request_sent_state_process.
781 /* Wait for a connection to finish. */
782 if ((1 << sk
->sk_state
) & ~(DCCPF_OPEN
| DCCPF_PARTOPEN
))
783 if ((rc
= sk_stream_wait_connect(sk
, &timeo
)) != 0)
786 size
= sk
->sk_prot
->max_header
+ len
;
788 skb
= sock_alloc_send_skb(sk
, size
, noblock
, &rc
);
793 skb_reserve(skb
, sk
->sk_prot
->max_header
);
794 rc
= memcpy_fromiovec(skb_put(skb
, len
), msg
->msg_iov
, len
);
798 rc
= dccp_msghdr_parse(msg
, skb
);
802 dccp_qpolicy_push(sk
, skb
);
812 EXPORT_SYMBOL_GPL(dccp_sendmsg
);
814 int dccp_recvmsg(struct kiocb
*iocb
, struct sock
*sk
, struct msghdr
*msg
,
815 size_t len
, int nonblock
, int flags
, int *addr_len
)
817 const struct dccp_hdr
*dh
;
822 if (sk
->sk_state
== DCCP_LISTEN
) {
827 timeo
= sock_rcvtimeo(sk
, nonblock
);
830 struct sk_buff
*skb
= skb_peek(&sk
->sk_receive_queue
);
833 goto verify_sock_status
;
837 switch (dh
->dccph_type
) {
839 case DCCP_PKT_DATAACK
:
843 case DCCP_PKT_CLOSEREQ
:
844 if (!(flags
& MSG_PEEK
))
845 dccp_finish_passive_close(sk
);
848 dccp_pr_debug("found fin (%s) ok!\n",
849 dccp_packet_name(dh
->dccph_type
));
853 dccp_pr_debug("packet_type=%s\n",
854 dccp_packet_name(dh
->dccph_type
));
855 sk_eat_skb(sk
, skb
, 0);
858 if (sock_flag(sk
, SOCK_DONE
)) {
864 len
= sock_error(sk
);
868 if (sk
->sk_shutdown
& RCV_SHUTDOWN
) {
873 if (sk
->sk_state
== DCCP_CLOSED
) {
874 if (!sock_flag(sk
, SOCK_DONE
)) {
875 /* This occurs when user tries to read
876 * from never connected socket.
890 if (signal_pending(current
)) {
891 len
= sock_intr_errno(timeo
);
895 sk_wait_data(sk
, &timeo
);
900 else if (len
< skb
->len
)
901 msg
->msg_flags
|= MSG_TRUNC
;
903 if (skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, len
)) {
904 /* Exception. Bailout! */
909 if (!(flags
& MSG_PEEK
))
910 sk_eat_skb(sk
, skb
, 0);
918 EXPORT_SYMBOL_GPL(dccp_recvmsg
);
920 int inet_dccp_listen(struct socket
*sock
, int backlog
)
922 struct sock
*sk
= sock
->sk
;
923 unsigned char old_state
;
929 if (sock
->state
!= SS_UNCONNECTED
|| sock
->type
!= SOCK_DCCP
)
932 old_state
= sk
->sk_state
;
933 if (!((1 << old_state
) & (DCCPF_CLOSED
| DCCPF_LISTEN
)))
936 /* Really, if the socket is already in listen state
937 * we can only allow the backlog to be adjusted.
939 if (old_state
!= DCCP_LISTEN
) {
941 * FIXME: here it probably should be sk->sk_prot->listen_start
942 * see tcp_listen_start
944 err
= dccp_listen_start(sk
, backlog
);
948 sk
->sk_max_ack_backlog
= backlog
;
956 EXPORT_SYMBOL_GPL(inet_dccp_listen
);
958 static void dccp_terminate_connection(struct sock
*sk
)
960 u8 next_state
= DCCP_CLOSED
;
962 switch (sk
->sk_state
) {
963 case DCCP_PASSIVE_CLOSE
:
964 case DCCP_PASSIVE_CLOSEREQ
:
965 dccp_finish_passive_close(sk
);
968 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk
);
969 inet_csk_clear_xmit_timer(sk
, ICSK_TIME_DACK
);
972 dccp_send_close(sk
, 1);
974 if (dccp_sk(sk
)->dccps_role
== DCCP_ROLE_SERVER
&&
975 !dccp_sk(sk
)->dccps_server_timewait
)
976 next_state
= DCCP_ACTIVE_CLOSEREQ
;
978 next_state
= DCCP_CLOSING
;
981 dccp_set_state(sk
, next_state
);
985 void dccp_close(struct sock
*sk
, long timeout
)
987 struct dccp_sock
*dp
= dccp_sk(sk
);
989 u32 data_was_unread
= 0;
994 sk
->sk_shutdown
= SHUTDOWN_MASK
;
996 if (sk
->sk_state
== DCCP_LISTEN
) {
997 dccp_set_state(sk
, DCCP_CLOSED
);
1000 inet_csk_listen_stop(sk
);
1002 goto adjudge_to_death
;
1005 sk_stop_timer(sk
, &dp
->dccps_xmit_timer
);
1008 * We need to flush the recv. buffs. We do this only on the
1009 * descriptor close, not protocol-sourced closes, because the
1010 *reader process may not have drained the data yet!
1012 while ((skb
= __skb_dequeue(&sk
->sk_receive_queue
)) != NULL
) {
1013 data_was_unread
+= skb
->len
;
1017 if (data_was_unread
) {
1018 /* Unread data was tossed, send an appropriate Reset Code */
1019 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread
);
1020 dccp_send_reset(sk
, DCCP_RESET_CODE_ABORTED
);
1021 dccp_set_state(sk
, DCCP_CLOSED
);
1022 } else if (sock_flag(sk
, SOCK_LINGER
) && !sk
->sk_lingertime
) {
1023 /* Check zero linger _after_ checking for unread data. */
1024 sk
->sk_prot
->disconnect(sk
, 0);
1025 } else if (sk
->sk_state
!= DCCP_CLOSED
) {
1027 * Normal connection termination. May need to wait if there are
1028 * still packets in the TX queue that are delayed by the CCID.
1030 dccp_flush_write_queue(sk
, &timeout
);
1031 dccp_terminate_connection(sk
);
1035 * Flush write queue. This may be necessary in several cases:
1036 * - we have been closed by the peer but still have application data;
1037 * - abortive termination (unread data or zero linger time),
1038 * - normal termination but queue could not be flushed within time limit
1040 __skb_queue_purge(&sk
->sk_write_queue
);
1042 sk_stream_wait_close(sk
, timeout
);
1045 state
= sk
->sk_state
;
1048 atomic_inc(sk
->sk_prot
->orphan_count
);
1051 * It is the last release_sock in its life. It will remove backlog.
1055 * Now socket is owned by kernel and we acquire BH lock
1056 * to finish close. No need to check for user refs.
1060 WARN_ON(sock_owned_by_user(sk
));
1062 /* Have we already been destroyed by a softirq or backlog? */
1063 if (state
!= DCCP_CLOSED
&& sk
->sk_state
== DCCP_CLOSED
)
1066 if (sk
->sk_state
== DCCP_CLOSED
)
1067 inet_csk_destroy_sock(sk
);
1069 /* Otherwise, socket is reprieved until protocol close. */
1077 EXPORT_SYMBOL_GPL(dccp_close
);
1079 void dccp_shutdown(struct sock
*sk
, int how
)
1081 dccp_pr_debug("called shutdown(%x)\n", how
);
1084 EXPORT_SYMBOL_GPL(dccp_shutdown
);
1086 static inline int dccp_mib_init(void)
1088 return snmp_mib_init((void**)dccp_statistics
, sizeof(struct dccp_mib
));
1091 static inline void dccp_mib_exit(void)
1093 snmp_mib_free((void**)dccp_statistics
);
1096 static int thash_entries
;
1097 module_param(thash_entries
, int, 0444);
1098 MODULE_PARM_DESC(thash_entries
, "Number of ehash buckets");
1100 #ifdef CONFIG_IP_DCCP_DEBUG
1102 module_param(dccp_debug
, bool, 0644);
1103 MODULE_PARM_DESC(dccp_debug
, "Enable debug messages");
1105 EXPORT_SYMBOL_GPL(dccp_debug
);
1108 static int __init
dccp_init(void)
1111 int ehash_order
, bhash_order
, i
;
1114 BUILD_BUG_ON(sizeof(struct dccp_skb_cb
) >
1115 FIELD_SIZEOF(struct sk_buff
, cb
));
1117 dccp_hashinfo
.bind_bucket_cachep
=
1118 kmem_cache_create("dccp_bind_bucket",
1119 sizeof(struct inet_bind_bucket
), 0,
1120 SLAB_HWCACHE_ALIGN
, NULL
);
1121 if (!dccp_hashinfo
.bind_bucket_cachep
)
1125 * Size and allocate the main established and bind bucket
1128 * The methodology is similar to that of the buffer cache.
1130 if (num_physpages
>= (128 * 1024))
1131 goal
= num_physpages
>> (21 - PAGE_SHIFT
);
1133 goal
= num_physpages
>> (23 - PAGE_SHIFT
);
1136 goal
= (thash_entries
*
1137 sizeof(struct inet_ehash_bucket
)) >> PAGE_SHIFT
;
1138 for (ehash_order
= 0; (1UL << ehash_order
) < goal
; ehash_order
++)
1141 dccp_hashinfo
.ehash_size
= (1UL << ehash_order
) * PAGE_SIZE
/
1142 sizeof(struct inet_ehash_bucket
);
1143 while (dccp_hashinfo
.ehash_size
&
1144 (dccp_hashinfo
.ehash_size
- 1))
1145 dccp_hashinfo
.ehash_size
--;
1146 dccp_hashinfo
.ehash
= (struct inet_ehash_bucket
*)
1147 __get_free_pages(GFP_ATOMIC
, ehash_order
);
1148 } while (!dccp_hashinfo
.ehash
&& --ehash_order
> 0);
1150 if (!dccp_hashinfo
.ehash
) {
1151 DCCP_CRIT("Failed to allocate DCCP established hash table");
1152 goto out_free_bind_bucket_cachep
;
1155 for (i
= 0; i
< dccp_hashinfo
.ehash_size
; i
++) {
1156 INIT_HLIST_HEAD(&dccp_hashinfo
.ehash
[i
].chain
);
1157 INIT_HLIST_HEAD(&dccp_hashinfo
.ehash
[i
].twchain
);
1160 if (inet_ehash_locks_alloc(&dccp_hashinfo
))
1161 goto out_free_dccp_ehash
;
1163 bhash_order
= ehash_order
;
1166 dccp_hashinfo
.bhash_size
= (1UL << bhash_order
) * PAGE_SIZE
/
1167 sizeof(struct inet_bind_hashbucket
);
1168 if ((dccp_hashinfo
.bhash_size
> (64 * 1024)) &&
1171 dccp_hashinfo
.bhash
= (struct inet_bind_hashbucket
*)
1172 __get_free_pages(GFP_ATOMIC
, bhash_order
);
1173 } while (!dccp_hashinfo
.bhash
&& --bhash_order
>= 0);
1175 if (!dccp_hashinfo
.bhash
) {
1176 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1177 goto out_free_dccp_locks
;
1180 for (i
= 0; i
< dccp_hashinfo
.bhash_size
; i
++) {
1181 spin_lock_init(&dccp_hashinfo
.bhash
[i
].lock
);
1182 INIT_HLIST_HEAD(&dccp_hashinfo
.bhash
[i
].chain
);
1185 rc
= dccp_mib_init();
1187 goto out_free_dccp_bhash
;
1189 rc
= dccp_ackvec_init();
1191 goto out_free_dccp_mib
;
1193 rc
= dccp_sysctl_init();
1195 goto out_ackvec_exit
;
1197 dccp_timestamping_init();
1204 out_free_dccp_bhash
:
1205 free_pages((unsigned long)dccp_hashinfo
.bhash
, bhash_order
);
1206 dccp_hashinfo
.bhash
= NULL
;
1207 out_free_dccp_locks
:
1208 inet_ehash_locks_free(&dccp_hashinfo
);
1209 out_free_dccp_ehash
:
1210 free_pages((unsigned long)dccp_hashinfo
.ehash
, ehash_order
);
1211 dccp_hashinfo
.ehash
= NULL
;
1212 out_free_bind_bucket_cachep
:
1213 kmem_cache_destroy(dccp_hashinfo
.bind_bucket_cachep
);
1214 dccp_hashinfo
.bind_bucket_cachep
= NULL
;
1218 static void __exit
dccp_fini(void)
1221 free_pages((unsigned long)dccp_hashinfo
.bhash
,
1222 get_order(dccp_hashinfo
.bhash_size
*
1223 sizeof(struct inet_bind_hashbucket
)));
1224 free_pages((unsigned long)dccp_hashinfo
.ehash
,
1225 get_order(dccp_hashinfo
.ehash_size
*
1226 sizeof(struct inet_ehash_bucket
)));
1227 inet_ehash_locks_free(&dccp_hashinfo
);
1228 kmem_cache_destroy(dccp_hashinfo
.bind_bucket_cachep
);
1233 module_init(dccp_init
);
1234 module_exit(dccp_fini
);
1236 MODULE_LICENSE("GPL");
1237 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1238 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");