Committer: Michael Beasley <mike@snafu.setup>
[mikesnafu-overlay.git] / net / dccp / proto.c
blobc91d3c1fd30d4239018dd23c1c132680ab067435
1 /*
2 * net/dccp/proto.c
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42 EXPORT_SYMBOL_GPL(dccp_statistics);
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
59 void dccp_set_state(struct sock *sk, const int state)
61 const int oldstate = sk->sk_state;
63 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
64 dccp_state_name(oldstate), dccp_state_name(state));
65 WARN_ON(state == oldstate);
67 switch (state) {
68 case DCCP_OPEN:
69 if (oldstate != DCCP_OPEN)
70 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71 break;
73 case DCCP_CLOSED:
74 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75 oldstate == DCCP_CLOSING)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
78 sk->sk_prot->unhash(sk);
79 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81 inet_put_port(sk);
82 /* fall through */
83 default:
84 if (oldstate == DCCP_OPEN)
85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
88 /* Change state AFTER socket is unhashed to avoid closed
89 * socket sitting in hash tables.
91 sk->sk_state = state;
94 EXPORT_SYMBOL_GPL(dccp_set_state);
96 static void dccp_finish_passive_close(struct sock *sk)
98 switch (sk->sk_state) {
99 case DCCP_PASSIVE_CLOSE:
100 /* Node (client or server) has received Close packet. */
101 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102 dccp_set_state(sk, DCCP_CLOSED);
103 break;
104 case DCCP_PASSIVE_CLOSEREQ:
106 * Client received CloseReq. We set the `active' flag so that
107 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
109 dccp_send_close(sk, 1);
110 dccp_set_state(sk, DCCP_CLOSING);
114 void dccp_done(struct sock *sk)
116 dccp_set_state(sk, DCCP_CLOSED);
117 dccp_clear_xmit_timers(sk);
119 sk->sk_shutdown = SHUTDOWN_MASK;
121 if (!sock_flag(sk, SOCK_DEAD))
122 sk->sk_state_change(sk);
123 else
124 inet_csk_destroy_sock(sk);
127 EXPORT_SYMBOL_GPL(dccp_done);
129 const char *dccp_packet_name(const int type)
131 static const char *dccp_packet_names[] = {
132 [DCCP_PKT_REQUEST] = "REQUEST",
133 [DCCP_PKT_RESPONSE] = "RESPONSE",
134 [DCCP_PKT_DATA] = "DATA",
135 [DCCP_PKT_ACK] = "ACK",
136 [DCCP_PKT_DATAACK] = "DATAACK",
137 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
138 [DCCP_PKT_CLOSE] = "CLOSE",
139 [DCCP_PKT_RESET] = "RESET",
140 [DCCP_PKT_SYNC] = "SYNC",
141 [DCCP_PKT_SYNCACK] = "SYNCACK",
144 if (type >= DCCP_NR_PKT_TYPES)
145 return "INVALID";
146 else
147 return dccp_packet_names[type];
150 EXPORT_SYMBOL_GPL(dccp_packet_name);
152 const char *dccp_state_name(const int state)
154 static char *dccp_state_names[] = {
155 [DCCP_OPEN] = "OPEN",
156 [DCCP_REQUESTING] = "REQUESTING",
157 [DCCP_PARTOPEN] = "PARTOPEN",
158 [DCCP_LISTEN] = "LISTEN",
159 [DCCP_RESPOND] = "RESPOND",
160 [DCCP_CLOSING] = "CLOSING",
161 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
162 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
163 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164 [DCCP_TIME_WAIT] = "TIME_WAIT",
165 [DCCP_CLOSED] = "CLOSED",
168 if (state >= DCCP_MAX_STATES)
169 return "INVALID STATE!";
170 else
171 return dccp_state_names[state];
174 EXPORT_SYMBOL_GPL(dccp_state_name);
176 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
178 struct dccp_sock *dp = dccp_sk(sk);
179 struct dccp_minisock *dmsk = dccp_msk(sk);
180 struct inet_connection_sock *icsk = inet_csk(sk);
182 dccp_minisock_init(&dp->dccps_minisock);
184 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
185 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
186 sk->sk_state = DCCP_CLOSED;
187 sk->sk_write_space = dccp_write_space;
188 icsk->icsk_sync_mss = dccp_sync_mss;
189 dp->dccps_mss_cache = 536;
190 dp->dccps_rate_last = jiffies;
191 dp->dccps_role = DCCP_ROLE_UNDEFINED;
192 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
193 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
195 dccp_init_xmit_timers(sk);
198 * FIXME: We're hardcoding the CCID, and doing this at this point makes
199 * the listening (master) sock get CCID control blocks, which is not
200 * necessary, but for now, to not mess with the test userspace apps,
201 * lets leave it here, later the real solution is to do this in a
202 * setsockopt(CCIDs-I-want/accept). -acme
204 if (likely(ctl_sock_initialized)) {
205 int rc = dccp_feat_init(dmsk);
207 if (rc)
208 return rc;
210 if (dmsk->dccpms_send_ack_vector) {
211 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212 if (dp->dccps_hc_rx_ackvec == NULL)
213 return -ENOMEM;
215 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
216 sk, GFP_KERNEL);
217 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
218 sk, GFP_KERNEL);
219 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220 dp->dccps_hc_tx_ccid == NULL)) {
221 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223 if (dmsk->dccpms_send_ack_vector) {
224 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225 dp->dccps_hc_rx_ackvec = NULL;
227 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
228 return -ENOMEM;
230 } else {
231 /* control socket doesn't need feat nego */
232 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233 INIT_LIST_HEAD(&dmsk->dccpms_conf);
236 return 0;
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
241 int dccp_destroy_sock(struct sock *sk)
243 struct dccp_sock *dp = dccp_sk(sk);
244 struct dccp_minisock *dmsk = dccp_msk(sk);
247 * DCCP doesn't use sk_write_queue, just sk_send_head
248 * for retransmissions
250 if (sk->sk_send_head != NULL) {
251 kfree_skb(sk->sk_send_head);
252 sk->sk_send_head = NULL;
255 /* Clean up a referenced DCCP bind bucket. */
256 if (inet_csk(sk)->icsk_bind_hash != NULL)
257 inet_put_port(sk);
259 kfree(dp->dccps_service_list);
260 dp->dccps_service_list = NULL;
262 if (dmsk->dccpms_send_ack_vector) {
263 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264 dp->dccps_hc_rx_ackvec = NULL;
266 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
270 /* clean up feature negotiation state */
271 dccp_feat_clean(dmsk);
273 return 0;
276 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
278 static inline int dccp_listen_start(struct sock *sk, int backlog)
280 struct dccp_sock *dp = dccp_sk(sk);
282 dp->dccps_role = DCCP_ROLE_LISTEN;
283 return inet_csk_listen_start(sk, backlog);
286 static inline int dccp_need_reset(int state)
288 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
289 state != DCCP_REQUESTING;
292 int dccp_disconnect(struct sock *sk, int flags)
294 struct inet_connection_sock *icsk = inet_csk(sk);
295 struct inet_sock *inet = inet_sk(sk);
296 int err = 0;
297 const int old_state = sk->sk_state;
299 if (old_state != DCCP_CLOSED)
300 dccp_set_state(sk, DCCP_CLOSED);
303 * This corresponds to the ABORT function of RFC793, sec. 3.8
304 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
306 if (old_state == DCCP_LISTEN) {
307 inet_csk_listen_stop(sk);
308 } else if (dccp_need_reset(old_state)) {
309 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
310 sk->sk_err = ECONNRESET;
311 } else if (old_state == DCCP_REQUESTING)
312 sk->sk_err = ECONNRESET;
314 dccp_clear_xmit_timers(sk);
315 __skb_queue_purge(&sk->sk_receive_queue);
316 if (sk->sk_send_head != NULL) {
317 __kfree_skb(sk->sk_send_head);
318 sk->sk_send_head = NULL;
321 inet->dport = 0;
323 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
324 inet_reset_saddr(sk);
326 sk->sk_shutdown = 0;
327 sock_reset_flag(sk, SOCK_DONE);
329 icsk->icsk_backoff = 0;
330 inet_csk_delack_init(sk);
331 __sk_dst_reset(sk);
333 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
335 sk->sk_error_report(sk);
336 return err;
339 EXPORT_SYMBOL_GPL(dccp_disconnect);
342 * Wait for a DCCP event.
344 * Note that we don't need to lock the socket, as the upper poll layers
345 * take care of normal races (between the test and the event) and we don't
346 * go look at any of the socket buffers directly.
348 unsigned int dccp_poll(struct file *file, struct socket *sock,
349 poll_table *wait)
351 unsigned int mask;
352 struct sock *sk = sock->sk;
354 poll_wait(file, sk->sk_sleep, wait);
355 if (sk->sk_state == DCCP_LISTEN)
356 return inet_csk_listen_poll(sk);
358 /* Socket is not locked. We are protected from async events
359 by poll logic and correct handling of state changes
360 made by another threads is impossible in any case.
363 mask = 0;
364 if (sk->sk_err)
365 mask = POLLERR;
367 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
368 mask |= POLLHUP;
369 if (sk->sk_shutdown & RCV_SHUTDOWN)
370 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
372 /* Connected? */
373 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
374 if (atomic_read(&sk->sk_rmem_alloc) > 0)
375 mask |= POLLIN | POLLRDNORM;
377 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
378 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
379 mask |= POLLOUT | POLLWRNORM;
380 } else { /* send SIGIO later */
381 set_bit(SOCK_ASYNC_NOSPACE,
382 &sk->sk_socket->flags);
383 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
385 /* Race breaker. If space is freed after
386 * wspace test but before the flags are set,
387 * IO signal will be lost.
389 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
390 mask |= POLLOUT | POLLWRNORM;
394 return mask;
397 EXPORT_SYMBOL_GPL(dccp_poll);
399 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
401 int rc = -ENOTCONN;
403 lock_sock(sk);
405 if (sk->sk_state == DCCP_LISTEN)
406 goto out;
408 switch (cmd) {
409 case SIOCINQ: {
410 struct sk_buff *skb;
411 unsigned long amount = 0;
413 skb = skb_peek(&sk->sk_receive_queue);
414 if (skb != NULL) {
416 * We will only return the amount of this packet since
417 * that is all that will be read.
419 amount = skb->len;
421 rc = put_user(amount, (int __user *)arg);
423 break;
424 default:
425 rc = -ENOIOCTLCMD;
426 break;
428 out:
429 release_sock(sk);
430 return rc;
433 EXPORT_SYMBOL_GPL(dccp_ioctl);
435 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
436 char __user *optval, int optlen)
438 struct dccp_sock *dp = dccp_sk(sk);
439 struct dccp_service_list *sl = NULL;
441 if (service == DCCP_SERVICE_INVALID_VALUE ||
442 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
443 return -EINVAL;
445 if (optlen > sizeof(service)) {
446 sl = kmalloc(optlen, GFP_KERNEL);
447 if (sl == NULL)
448 return -ENOMEM;
450 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
451 if (copy_from_user(sl->dccpsl_list,
452 optval + sizeof(service),
453 optlen - sizeof(service)) ||
454 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
455 kfree(sl);
456 return -EFAULT;
460 lock_sock(sk);
461 dp->dccps_service = service;
463 kfree(dp->dccps_service_list);
465 dp->dccps_service_list = sl;
466 release_sock(sk);
467 return 0;
470 /* byte 1 is feature. the rest is the preference list */
471 static int dccp_setsockopt_change(struct sock *sk, int type,
472 struct dccp_so_feat __user *optval)
474 struct dccp_so_feat opt;
475 u8 *val;
476 int rc;
478 if (copy_from_user(&opt, optval, sizeof(opt)))
479 return -EFAULT;
481 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
482 if (!val)
483 return -ENOMEM;
485 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
486 rc = -EFAULT;
487 goto out_free_val;
490 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
491 val, opt.dccpsf_len, GFP_KERNEL);
492 if (rc)
493 goto out_free_val;
495 out:
496 return rc;
498 out_free_val:
499 kfree(val);
500 goto out;
503 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
504 char __user *optval, int optlen)
506 struct dccp_sock *dp = dccp_sk(sk);
507 int val, err = 0;
509 if (optlen < sizeof(int))
510 return -EINVAL;
512 if (get_user(val, (int __user *)optval))
513 return -EFAULT;
515 if (optname == DCCP_SOCKOPT_SERVICE)
516 return dccp_setsockopt_service(sk, val, optval, optlen);
518 lock_sock(sk);
519 switch (optname) {
520 case DCCP_SOCKOPT_PACKET_SIZE:
521 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
522 err = 0;
523 break;
524 case DCCP_SOCKOPT_CHANGE_L:
525 if (optlen != sizeof(struct dccp_so_feat))
526 err = -EINVAL;
527 else
528 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
529 (struct dccp_so_feat __user *)
530 optval);
531 break;
532 case DCCP_SOCKOPT_CHANGE_R:
533 if (optlen != sizeof(struct dccp_so_feat))
534 err = -EINVAL;
535 else
536 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
537 (struct dccp_so_feat __user *)
538 optval);
539 break;
540 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
541 if (dp->dccps_role != DCCP_ROLE_SERVER)
542 err = -EOPNOTSUPP;
543 else
544 dp->dccps_server_timewait = (val != 0);
545 break;
546 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
547 if (val < 0 || val > 15)
548 err = -EINVAL;
549 else
550 dp->dccps_pcslen = val;
551 break;
552 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
553 if (val < 0 || val > 15)
554 err = -EINVAL;
555 else {
556 dp->dccps_pcrlen = val;
557 /* FIXME: add feature negotiation,
558 * ChangeL(MinimumChecksumCoverage, val) */
560 break;
561 default:
562 err = -ENOPROTOOPT;
563 break;
566 release_sock(sk);
567 return err;
570 int dccp_setsockopt(struct sock *sk, int level, int optname,
571 char __user *optval, int optlen)
573 if (level != SOL_DCCP)
574 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
575 optname, optval,
576 optlen);
577 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
580 EXPORT_SYMBOL_GPL(dccp_setsockopt);
582 #ifdef CONFIG_COMPAT
583 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
584 char __user *optval, int optlen)
586 if (level != SOL_DCCP)
587 return inet_csk_compat_setsockopt(sk, level, optname,
588 optval, optlen);
589 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
592 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
593 #endif
595 static int dccp_getsockopt_service(struct sock *sk, int len,
596 __be32 __user *optval,
597 int __user *optlen)
599 const struct dccp_sock *dp = dccp_sk(sk);
600 const struct dccp_service_list *sl;
601 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
603 lock_sock(sk);
604 if ((sl = dp->dccps_service_list) != NULL) {
605 slen = sl->dccpsl_nr * sizeof(u32);
606 total_len += slen;
609 err = -EINVAL;
610 if (total_len > len)
611 goto out;
613 err = 0;
614 if (put_user(total_len, optlen) ||
615 put_user(dp->dccps_service, optval) ||
616 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
617 err = -EFAULT;
618 out:
619 release_sock(sk);
620 return err;
623 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
624 char __user *optval, int __user *optlen)
626 struct dccp_sock *dp;
627 int val, len;
629 if (get_user(len, optlen))
630 return -EFAULT;
632 if (len < (int)sizeof(int))
633 return -EINVAL;
635 dp = dccp_sk(sk);
637 switch (optname) {
638 case DCCP_SOCKOPT_PACKET_SIZE:
639 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
640 return 0;
641 case DCCP_SOCKOPT_SERVICE:
642 return dccp_getsockopt_service(sk, len,
643 (__be32 __user *)optval, optlen);
644 case DCCP_SOCKOPT_GET_CUR_MPS:
645 val = dp->dccps_mss_cache;
646 break;
647 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
648 val = dp->dccps_server_timewait;
649 break;
650 case DCCP_SOCKOPT_SEND_CSCOV:
651 val = dp->dccps_pcslen;
652 break;
653 case DCCP_SOCKOPT_RECV_CSCOV:
654 val = dp->dccps_pcrlen;
655 break;
656 case 128 ... 191:
657 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
658 len, (u32 __user *)optval, optlen);
659 case 192 ... 255:
660 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
661 len, (u32 __user *)optval, optlen);
662 default:
663 return -ENOPROTOOPT;
666 len = sizeof(val);
667 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
668 return -EFAULT;
670 return 0;
673 int dccp_getsockopt(struct sock *sk, int level, int optname,
674 char __user *optval, int __user *optlen)
676 if (level != SOL_DCCP)
677 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
678 optname, optval,
679 optlen);
680 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
683 EXPORT_SYMBOL_GPL(dccp_getsockopt);
685 #ifdef CONFIG_COMPAT
686 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
687 char __user *optval, int __user *optlen)
689 if (level != SOL_DCCP)
690 return inet_csk_compat_getsockopt(sk, level, optname,
691 optval, optlen);
692 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
695 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
696 #endif
698 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
699 size_t len)
701 const struct dccp_sock *dp = dccp_sk(sk);
702 const int flags = msg->msg_flags;
703 const int noblock = flags & MSG_DONTWAIT;
704 struct sk_buff *skb;
705 int rc, size;
706 long timeo;
708 if (len > dp->dccps_mss_cache)
709 return -EMSGSIZE;
711 lock_sock(sk);
713 if (sysctl_dccp_tx_qlen &&
714 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
715 rc = -EAGAIN;
716 goto out_release;
719 timeo = sock_sndtimeo(sk, noblock);
722 * We have to use sk_stream_wait_connect here to set sk_write_pending,
723 * so that the trick in dccp_rcv_request_sent_state_process.
725 /* Wait for a connection to finish. */
726 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
727 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
728 goto out_release;
730 size = sk->sk_prot->max_header + len;
731 release_sock(sk);
732 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
733 lock_sock(sk);
734 if (skb == NULL)
735 goto out_release;
737 skb_reserve(skb, sk->sk_prot->max_header);
738 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
739 if (rc != 0)
740 goto out_discard;
742 skb_queue_tail(&sk->sk_write_queue, skb);
743 dccp_write_xmit(sk,0);
744 out_release:
745 release_sock(sk);
746 return rc ? : len;
747 out_discard:
748 kfree_skb(skb);
749 goto out_release;
752 EXPORT_SYMBOL_GPL(dccp_sendmsg);
754 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
755 size_t len, int nonblock, int flags, int *addr_len)
757 const struct dccp_hdr *dh;
758 long timeo;
760 lock_sock(sk);
762 if (sk->sk_state == DCCP_LISTEN) {
763 len = -ENOTCONN;
764 goto out;
767 timeo = sock_rcvtimeo(sk, nonblock);
769 do {
770 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
772 if (skb == NULL)
773 goto verify_sock_status;
775 dh = dccp_hdr(skb);
777 switch (dh->dccph_type) {
778 case DCCP_PKT_DATA:
779 case DCCP_PKT_DATAACK:
780 goto found_ok_skb;
782 case DCCP_PKT_CLOSE:
783 case DCCP_PKT_CLOSEREQ:
784 if (!(flags & MSG_PEEK))
785 dccp_finish_passive_close(sk);
786 /* fall through */
787 case DCCP_PKT_RESET:
788 dccp_pr_debug("found fin (%s) ok!\n",
789 dccp_packet_name(dh->dccph_type));
790 len = 0;
791 goto found_fin_ok;
792 default:
793 dccp_pr_debug("packet_type=%s\n",
794 dccp_packet_name(dh->dccph_type));
795 sk_eat_skb(sk, skb, 0);
797 verify_sock_status:
798 if (sock_flag(sk, SOCK_DONE)) {
799 len = 0;
800 break;
803 if (sk->sk_err) {
804 len = sock_error(sk);
805 break;
808 if (sk->sk_shutdown & RCV_SHUTDOWN) {
809 len = 0;
810 break;
813 if (sk->sk_state == DCCP_CLOSED) {
814 if (!sock_flag(sk, SOCK_DONE)) {
815 /* This occurs when user tries to read
816 * from never connected socket.
818 len = -ENOTCONN;
819 break;
821 len = 0;
822 break;
825 if (!timeo) {
826 len = -EAGAIN;
827 break;
830 if (signal_pending(current)) {
831 len = sock_intr_errno(timeo);
832 break;
835 sk_wait_data(sk, &timeo);
836 continue;
837 found_ok_skb:
838 if (len > skb->len)
839 len = skb->len;
840 else if (len < skb->len)
841 msg->msg_flags |= MSG_TRUNC;
843 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
844 /* Exception. Bailout! */
845 len = -EFAULT;
846 break;
848 found_fin_ok:
849 if (!(flags & MSG_PEEK))
850 sk_eat_skb(sk, skb, 0);
851 break;
852 } while (1);
853 out:
854 release_sock(sk);
855 return len;
858 EXPORT_SYMBOL_GPL(dccp_recvmsg);
860 int inet_dccp_listen(struct socket *sock, int backlog)
862 struct sock *sk = sock->sk;
863 unsigned char old_state;
864 int err;
866 lock_sock(sk);
868 err = -EINVAL;
869 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
870 goto out;
872 old_state = sk->sk_state;
873 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
874 goto out;
876 /* Really, if the socket is already in listen state
877 * we can only allow the backlog to be adjusted.
879 if (old_state != DCCP_LISTEN) {
881 * FIXME: here it probably should be sk->sk_prot->listen_start
882 * see tcp_listen_start
884 err = dccp_listen_start(sk, backlog);
885 if (err)
886 goto out;
888 sk->sk_max_ack_backlog = backlog;
889 err = 0;
891 out:
892 release_sock(sk);
893 return err;
896 EXPORT_SYMBOL_GPL(inet_dccp_listen);
898 static void dccp_terminate_connection(struct sock *sk)
900 u8 next_state = DCCP_CLOSED;
902 switch (sk->sk_state) {
903 case DCCP_PASSIVE_CLOSE:
904 case DCCP_PASSIVE_CLOSEREQ:
905 dccp_finish_passive_close(sk);
906 break;
907 case DCCP_PARTOPEN:
908 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
909 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
910 /* fall through */
911 case DCCP_OPEN:
912 dccp_send_close(sk, 1);
914 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
915 !dccp_sk(sk)->dccps_server_timewait)
916 next_state = DCCP_ACTIVE_CLOSEREQ;
917 else
918 next_state = DCCP_CLOSING;
919 /* fall through */
920 default:
921 dccp_set_state(sk, next_state);
925 void dccp_close(struct sock *sk, long timeout)
927 struct dccp_sock *dp = dccp_sk(sk);
928 struct sk_buff *skb;
929 u32 data_was_unread = 0;
930 int state;
932 lock_sock(sk);
934 sk->sk_shutdown = SHUTDOWN_MASK;
936 if (sk->sk_state == DCCP_LISTEN) {
937 dccp_set_state(sk, DCCP_CLOSED);
939 /* Special case. */
940 inet_csk_listen_stop(sk);
942 goto adjudge_to_death;
945 sk_stop_timer(sk, &dp->dccps_xmit_timer);
948 * We need to flush the recv. buffs. We do this only on the
949 * descriptor close, not protocol-sourced closes, because the
950 *reader process may not have drained the data yet!
952 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
953 data_was_unread += skb->len;
954 __kfree_skb(skb);
957 if (data_was_unread) {
958 /* Unread data was tossed, send an appropriate Reset Code */
959 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
960 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
961 dccp_set_state(sk, DCCP_CLOSED);
962 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
963 /* Check zero linger _after_ checking for unread data. */
964 sk->sk_prot->disconnect(sk, 0);
965 } else if (sk->sk_state != DCCP_CLOSED) {
966 dccp_terminate_connection(sk);
969 sk_stream_wait_close(sk, timeout);
971 adjudge_to_death:
972 state = sk->sk_state;
973 sock_hold(sk);
974 sock_orphan(sk);
975 atomic_inc(sk->sk_prot->orphan_count);
978 * It is the last release_sock in its life. It will remove backlog.
980 release_sock(sk);
982 * Now socket is owned by kernel and we acquire BH lock
983 * to finish close. No need to check for user refs.
985 local_bh_disable();
986 bh_lock_sock(sk);
987 BUG_TRAP(!sock_owned_by_user(sk));
989 /* Have we already been destroyed by a softirq or backlog? */
990 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
991 goto out;
993 if (sk->sk_state == DCCP_CLOSED)
994 inet_csk_destroy_sock(sk);
996 /* Otherwise, socket is reprieved until protocol close. */
998 out:
999 bh_unlock_sock(sk);
1000 local_bh_enable();
1001 sock_put(sk);
1004 EXPORT_SYMBOL_GPL(dccp_close);
1006 void dccp_shutdown(struct sock *sk, int how)
1008 dccp_pr_debug("called shutdown(%x)\n", how);
1011 EXPORT_SYMBOL_GPL(dccp_shutdown);
1013 static int __init dccp_mib_init(void)
1015 int rc = -ENOMEM;
1017 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1018 if (dccp_statistics[0] == NULL)
1019 goto out;
1021 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1022 if (dccp_statistics[1] == NULL)
1023 goto out_free_one;
1025 rc = 0;
1026 out:
1027 return rc;
1028 out_free_one:
1029 free_percpu(dccp_statistics[0]);
1030 dccp_statistics[0] = NULL;
1031 goto out;
1035 static void dccp_mib_exit(void)
1037 free_percpu(dccp_statistics[0]);
1038 free_percpu(dccp_statistics[1]);
1039 dccp_statistics[0] = dccp_statistics[1] = NULL;
1042 static int thash_entries;
1043 module_param(thash_entries, int, 0444);
1044 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1046 #ifdef CONFIG_IP_DCCP_DEBUG
1047 int dccp_debug;
1048 module_param(dccp_debug, bool, 0444);
1049 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1051 EXPORT_SYMBOL_GPL(dccp_debug);
1052 #endif
1054 static int __init dccp_init(void)
1056 unsigned long goal;
1057 int ehash_order, bhash_order, i;
1058 int rc = -ENOBUFS;
1060 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1061 FIELD_SIZEOF(struct sk_buff, cb));
1063 dccp_hashinfo.bind_bucket_cachep =
1064 kmem_cache_create("dccp_bind_bucket",
1065 sizeof(struct inet_bind_bucket), 0,
1066 SLAB_HWCACHE_ALIGN, NULL);
1067 if (!dccp_hashinfo.bind_bucket_cachep)
1068 goto out;
1071 * Size and allocate the main established and bind bucket
1072 * hash tables.
1074 * The methodology is similar to that of the buffer cache.
1076 if (num_physpages >= (128 * 1024))
1077 goal = num_physpages >> (21 - PAGE_SHIFT);
1078 else
1079 goal = num_physpages >> (23 - PAGE_SHIFT);
1081 if (thash_entries)
1082 goal = (thash_entries *
1083 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1084 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1086 do {
1087 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1088 sizeof(struct inet_ehash_bucket);
1089 while (dccp_hashinfo.ehash_size &
1090 (dccp_hashinfo.ehash_size - 1))
1091 dccp_hashinfo.ehash_size--;
1092 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1093 __get_free_pages(GFP_ATOMIC, ehash_order);
1094 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1096 if (!dccp_hashinfo.ehash) {
1097 DCCP_CRIT("Failed to allocate DCCP established hash table");
1098 goto out_free_bind_bucket_cachep;
1101 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1102 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1103 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1106 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1107 goto out_free_dccp_ehash;
1109 bhash_order = ehash_order;
1111 do {
1112 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1113 sizeof(struct inet_bind_hashbucket);
1114 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1115 bhash_order > 0)
1116 continue;
1117 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1118 __get_free_pages(GFP_ATOMIC, bhash_order);
1119 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1121 if (!dccp_hashinfo.bhash) {
1122 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1123 goto out_free_dccp_locks;
1126 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1127 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1128 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1131 rc = dccp_mib_init();
1132 if (rc)
1133 goto out_free_dccp_bhash;
1135 rc = dccp_ackvec_init();
1136 if (rc)
1137 goto out_free_dccp_mib;
1139 rc = dccp_sysctl_init();
1140 if (rc)
1141 goto out_ackvec_exit;
1143 dccp_timestamping_init();
1144 out:
1145 return rc;
1146 out_ackvec_exit:
1147 dccp_ackvec_exit();
1148 out_free_dccp_mib:
1149 dccp_mib_exit();
1150 out_free_dccp_bhash:
1151 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1152 dccp_hashinfo.bhash = NULL;
1153 out_free_dccp_locks:
1154 inet_ehash_locks_free(&dccp_hashinfo);
1155 out_free_dccp_ehash:
1156 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1157 dccp_hashinfo.ehash = NULL;
1158 out_free_bind_bucket_cachep:
1159 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1160 dccp_hashinfo.bind_bucket_cachep = NULL;
1161 goto out;
1164 static void __exit dccp_fini(void)
1166 dccp_mib_exit();
1167 free_pages((unsigned long)dccp_hashinfo.bhash,
1168 get_order(dccp_hashinfo.bhash_size *
1169 sizeof(struct inet_bind_hashbucket)));
1170 free_pages((unsigned long)dccp_hashinfo.ehash,
1171 get_order(dccp_hashinfo.ehash_size *
1172 sizeof(struct inet_ehash_bucket)));
1173 inet_ehash_locks_free(&dccp_hashinfo);
1174 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1175 dccp_ackvec_exit();
1176 dccp_sysctl_exit();
1179 module_init(dccp_init);
1180 module_exit(dccp_fini);
1182 MODULE_LICENSE("GPL");
1183 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1184 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");