Freezer: Fix s2disk resume from initrd
[linux-2.6/sactl.git] / net / dccp / proto.c
blob7a3bea9c28c172c8768414cf0fe8f616fb5e15b3
1 /*
2 * net/dccp/proto.c
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42 EXPORT_SYMBOL_GPL(dccp_statistics);
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
59 void dccp_set_state(struct sock *sk, const int state)
61 const int oldstate = sk->sk_state;
63 dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
64 dccp_role(sk), sk,
65 dccp_state_name(oldstate), dccp_state_name(state));
66 WARN_ON(state == oldstate);
68 switch (state) {
69 case DCCP_OPEN:
70 if (oldstate != DCCP_OPEN)
71 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
72 break;
74 case DCCP_CLOSED:
75 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
78 sk->sk_prot->unhash(sk);
79 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81 inet_put_port(&dccp_hashinfo, sk);
82 /* fall through */
83 default:
84 if (oldstate == DCCP_OPEN)
85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
88 /* Change state AFTER socket is unhashed to avoid closed
89 * socket sitting in hash tables.
91 sk->sk_state = state;
94 EXPORT_SYMBOL_GPL(dccp_set_state);
96 void dccp_done(struct sock *sk)
98 dccp_set_state(sk, DCCP_CLOSED);
99 dccp_clear_xmit_timers(sk);
101 sk->sk_shutdown = SHUTDOWN_MASK;
103 if (!sock_flag(sk, SOCK_DEAD))
104 sk->sk_state_change(sk);
105 else
106 inet_csk_destroy_sock(sk);
109 EXPORT_SYMBOL_GPL(dccp_done);
111 const char *dccp_packet_name(const int type)
113 static const char *dccp_packet_names[] = {
114 [DCCP_PKT_REQUEST] = "REQUEST",
115 [DCCP_PKT_RESPONSE] = "RESPONSE",
116 [DCCP_PKT_DATA] = "DATA",
117 [DCCP_PKT_ACK] = "ACK",
118 [DCCP_PKT_DATAACK] = "DATAACK",
119 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
120 [DCCP_PKT_CLOSE] = "CLOSE",
121 [DCCP_PKT_RESET] = "RESET",
122 [DCCP_PKT_SYNC] = "SYNC",
123 [DCCP_PKT_SYNCACK] = "SYNCACK",
126 if (type >= DCCP_NR_PKT_TYPES)
127 return "INVALID";
128 else
129 return dccp_packet_names[type];
132 EXPORT_SYMBOL_GPL(dccp_packet_name);
134 const char *dccp_state_name(const int state)
136 static char *dccp_state_names[] = {
137 [DCCP_OPEN] = "OPEN",
138 [DCCP_REQUESTING] = "REQUESTING",
139 [DCCP_PARTOPEN] = "PARTOPEN",
140 [DCCP_LISTEN] = "LISTEN",
141 [DCCP_RESPOND] = "RESPOND",
142 [DCCP_CLOSING] = "CLOSING",
143 [DCCP_TIME_WAIT] = "TIME_WAIT",
144 [DCCP_CLOSED] = "CLOSED",
147 if (state >= DCCP_MAX_STATES)
148 return "INVALID STATE!";
149 else
150 return dccp_state_names[state];
153 EXPORT_SYMBOL_GPL(dccp_state_name);
155 void dccp_hash(struct sock *sk)
157 inet_hash(&dccp_hashinfo, sk);
160 EXPORT_SYMBOL_GPL(dccp_hash);
162 void dccp_unhash(struct sock *sk)
164 inet_unhash(&dccp_hashinfo, sk);
167 EXPORT_SYMBOL_GPL(dccp_unhash);
169 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
171 struct dccp_sock *dp = dccp_sk(sk);
172 struct dccp_minisock *dmsk = dccp_msk(sk);
173 struct inet_connection_sock *icsk = inet_csk(sk);
175 dccp_minisock_init(&dp->dccps_minisock);
178 * FIXME: We're hardcoding the CCID, and doing this at this point makes
179 * the listening (master) sock get CCID control blocks, which is not
180 * necessary, but for now, to not mess with the test userspace apps,
181 * lets leave it here, later the real solution is to do this in a
182 * setsockopt(CCIDs-I-want/accept). -acme
184 if (likely(ctl_sock_initialized)) {
185 int rc = dccp_feat_init(dmsk);
187 if (rc)
188 return rc;
190 if (dmsk->dccpms_send_ack_vector) {
191 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
192 if (dp->dccps_hc_rx_ackvec == NULL)
193 return -ENOMEM;
195 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
196 sk, GFP_KERNEL);
197 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
198 sk, GFP_KERNEL);
199 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
200 dp->dccps_hc_tx_ccid == NULL)) {
201 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
202 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
203 if (dmsk->dccpms_send_ack_vector) {
204 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
205 dp->dccps_hc_rx_ackvec = NULL;
207 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
208 return -ENOMEM;
210 } else {
211 /* control socket doesn't need feat nego */
212 INIT_LIST_HEAD(&dmsk->dccpms_pending);
213 INIT_LIST_HEAD(&dmsk->dccpms_conf);
216 dccp_init_xmit_timers(sk);
217 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
218 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
219 sk->sk_state = DCCP_CLOSED;
220 sk->sk_write_space = dccp_write_space;
221 icsk->icsk_sync_mss = dccp_sync_mss;
222 dp->dccps_mss_cache = 536;
223 dp->dccps_rate_last = jiffies;
224 dp->dccps_role = DCCP_ROLE_UNDEFINED;
225 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
226 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
228 return 0;
231 EXPORT_SYMBOL_GPL(dccp_init_sock);
233 int dccp_destroy_sock(struct sock *sk)
235 struct dccp_sock *dp = dccp_sk(sk);
236 struct dccp_minisock *dmsk = dccp_msk(sk);
239 * DCCP doesn't use sk_write_queue, just sk_send_head
240 * for retransmissions
242 if (sk->sk_send_head != NULL) {
243 kfree_skb(sk->sk_send_head);
244 sk->sk_send_head = NULL;
247 /* Clean up a referenced DCCP bind bucket. */
248 if (inet_csk(sk)->icsk_bind_hash != NULL)
249 inet_put_port(&dccp_hashinfo, sk);
251 kfree(dp->dccps_service_list);
252 dp->dccps_service_list = NULL;
254 if (dmsk->dccpms_send_ack_vector) {
255 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
256 dp->dccps_hc_rx_ackvec = NULL;
258 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
259 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
260 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
262 /* clean up feature negotiation state */
263 dccp_feat_clean(dmsk);
265 return 0;
268 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
270 static inline int dccp_listen_start(struct sock *sk, int backlog)
272 struct dccp_sock *dp = dccp_sk(sk);
274 dp->dccps_role = DCCP_ROLE_LISTEN;
275 return inet_csk_listen_start(sk, backlog);
278 int dccp_disconnect(struct sock *sk, int flags)
280 struct inet_connection_sock *icsk = inet_csk(sk);
281 struct inet_sock *inet = inet_sk(sk);
282 int err = 0;
283 const int old_state = sk->sk_state;
285 if (old_state != DCCP_CLOSED)
286 dccp_set_state(sk, DCCP_CLOSED);
288 /* ABORT function of RFC793 */
289 if (old_state == DCCP_LISTEN) {
290 inet_csk_listen_stop(sk);
291 /* FIXME: do the active reset thing */
292 } else if (old_state == DCCP_REQUESTING)
293 sk->sk_err = ECONNRESET;
295 dccp_clear_xmit_timers(sk);
296 __skb_queue_purge(&sk->sk_receive_queue);
297 if (sk->sk_send_head != NULL) {
298 __kfree_skb(sk->sk_send_head);
299 sk->sk_send_head = NULL;
302 inet->dport = 0;
304 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
305 inet_reset_saddr(sk);
307 sk->sk_shutdown = 0;
308 sock_reset_flag(sk, SOCK_DONE);
310 icsk->icsk_backoff = 0;
311 inet_csk_delack_init(sk);
312 __sk_dst_reset(sk);
314 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
316 sk->sk_error_report(sk);
317 return err;
320 EXPORT_SYMBOL_GPL(dccp_disconnect);
323 * Wait for a DCCP event.
325 * Note that we don't need to lock the socket, as the upper poll layers
326 * take care of normal races (between the test and the event) and we don't
327 * go look at any of the socket buffers directly.
329 unsigned int dccp_poll(struct file *file, struct socket *sock,
330 poll_table *wait)
332 unsigned int mask;
333 struct sock *sk = sock->sk;
335 poll_wait(file, sk->sk_sleep, wait);
336 if (sk->sk_state == DCCP_LISTEN)
337 return inet_csk_listen_poll(sk);
339 /* Socket is not locked. We are protected from async events
340 by poll logic and correct handling of state changes
341 made by another threads is impossible in any case.
344 mask = 0;
345 if (sk->sk_err)
346 mask = POLLERR;
348 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
349 mask |= POLLHUP;
350 if (sk->sk_shutdown & RCV_SHUTDOWN)
351 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
353 /* Connected? */
354 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
355 if (atomic_read(&sk->sk_rmem_alloc) > 0)
356 mask |= POLLIN | POLLRDNORM;
358 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
359 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
360 mask |= POLLOUT | POLLWRNORM;
361 } else { /* send SIGIO later */
362 set_bit(SOCK_ASYNC_NOSPACE,
363 &sk->sk_socket->flags);
364 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
366 /* Race breaker. If space is freed after
367 * wspace test but before the flags are set,
368 * IO signal will be lost.
370 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
371 mask |= POLLOUT | POLLWRNORM;
375 return mask;
378 EXPORT_SYMBOL_GPL(dccp_poll);
380 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
382 int rc = -ENOTCONN;
384 lock_sock(sk);
386 if (sk->sk_state == DCCP_LISTEN)
387 goto out;
389 switch (cmd) {
390 case SIOCINQ: {
391 struct sk_buff *skb;
392 unsigned long amount = 0;
394 skb = skb_peek(&sk->sk_receive_queue);
395 if (skb != NULL) {
397 * We will only return the amount of this packet since
398 * that is all that will be read.
400 amount = skb->len;
402 rc = put_user(amount, (int __user *)arg);
404 break;
405 default:
406 rc = -ENOIOCTLCMD;
407 break;
409 out:
410 release_sock(sk);
411 return rc;
414 EXPORT_SYMBOL_GPL(dccp_ioctl);
416 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
417 char __user *optval, int optlen)
419 struct dccp_sock *dp = dccp_sk(sk);
420 struct dccp_service_list *sl = NULL;
422 if (service == DCCP_SERVICE_INVALID_VALUE ||
423 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
424 return -EINVAL;
426 if (optlen > sizeof(service)) {
427 sl = kmalloc(optlen, GFP_KERNEL);
428 if (sl == NULL)
429 return -ENOMEM;
431 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
432 if (copy_from_user(sl->dccpsl_list,
433 optval + sizeof(service),
434 optlen - sizeof(service)) ||
435 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
436 kfree(sl);
437 return -EFAULT;
441 lock_sock(sk);
442 dp->dccps_service = service;
444 kfree(dp->dccps_service_list);
446 dp->dccps_service_list = sl;
447 release_sock(sk);
448 return 0;
451 /* byte 1 is feature. the rest is the preference list */
452 static int dccp_setsockopt_change(struct sock *sk, int type,
453 struct dccp_so_feat __user *optval)
455 struct dccp_so_feat opt;
456 u8 *val;
457 int rc;
459 if (copy_from_user(&opt, optval, sizeof(opt)))
460 return -EFAULT;
462 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
463 if (!val)
464 return -ENOMEM;
466 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
467 rc = -EFAULT;
468 goto out_free_val;
471 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
472 val, opt.dccpsf_len, GFP_KERNEL);
473 if (rc)
474 goto out_free_val;
476 out:
477 return rc;
479 out_free_val:
480 kfree(val);
481 goto out;
484 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
485 char __user *optval, int optlen)
487 struct dccp_sock *dp = dccp_sk(sk);
488 int val, err = 0;
490 if (optlen < sizeof(int))
491 return -EINVAL;
493 if (get_user(val, (int __user *)optval))
494 return -EFAULT;
496 if (optname == DCCP_SOCKOPT_SERVICE)
497 return dccp_setsockopt_service(sk, val, optval, optlen);
499 lock_sock(sk);
500 switch (optname) {
501 case DCCP_SOCKOPT_PACKET_SIZE:
502 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
503 err = 0;
504 break;
505 case DCCP_SOCKOPT_CHANGE_L:
506 if (optlen != sizeof(struct dccp_so_feat))
507 err = -EINVAL;
508 else
509 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
510 (struct dccp_so_feat __user *)
511 optval);
512 break;
513 case DCCP_SOCKOPT_CHANGE_R:
514 if (optlen != sizeof(struct dccp_so_feat))
515 err = -EINVAL;
516 else
517 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
518 (struct dccp_so_feat __user *)
519 optval);
520 break;
521 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
522 if (val < 0 || val > 15)
523 err = -EINVAL;
524 else
525 dp->dccps_pcslen = val;
526 break;
527 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
528 if (val < 0 || val > 15)
529 err = -EINVAL;
530 else {
531 dp->dccps_pcrlen = val;
532 /* FIXME: add feature negotiation,
533 * ChangeL(MinimumChecksumCoverage, val) */
535 break;
536 default:
537 err = -ENOPROTOOPT;
538 break;
541 release_sock(sk);
542 return err;
545 int dccp_setsockopt(struct sock *sk, int level, int optname,
546 char __user *optval, int optlen)
548 if (level != SOL_DCCP)
549 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
550 optname, optval,
551 optlen);
552 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
555 EXPORT_SYMBOL_GPL(dccp_setsockopt);
557 #ifdef CONFIG_COMPAT
558 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
559 char __user *optval, int optlen)
561 if (level != SOL_DCCP)
562 return inet_csk_compat_setsockopt(sk, level, optname,
563 optval, optlen);
564 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
567 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
568 #endif
570 static int dccp_getsockopt_service(struct sock *sk, int len,
571 __be32 __user *optval,
572 int __user *optlen)
574 const struct dccp_sock *dp = dccp_sk(sk);
575 const struct dccp_service_list *sl;
576 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
578 lock_sock(sk);
579 if ((sl = dp->dccps_service_list) != NULL) {
580 slen = sl->dccpsl_nr * sizeof(u32);
581 total_len += slen;
584 err = -EINVAL;
585 if (total_len > len)
586 goto out;
588 err = 0;
589 if (put_user(total_len, optlen) ||
590 put_user(dp->dccps_service, optval) ||
591 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
592 err = -EFAULT;
593 out:
594 release_sock(sk);
595 return err;
598 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
599 char __user *optval, int __user *optlen)
601 struct dccp_sock *dp;
602 int val, len;
604 if (get_user(len, optlen))
605 return -EFAULT;
607 if (len < (int)sizeof(int))
608 return -EINVAL;
610 dp = dccp_sk(sk);
612 switch (optname) {
613 case DCCP_SOCKOPT_PACKET_SIZE:
614 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
615 return 0;
616 case DCCP_SOCKOPT_SERVICE:
617 return dccp_getsockopt_service(sk, len,
618 (__be32 __user *)optval, optlen);
619 case DCCP_SOCKOPT_GET_CUR_MPS:
620 val = dp->dccps_mss_cache;
621 len = sizeof(val);
622 break;
623 case DCCP_SOCKOPT_SEND_CSCOV:
624 val = dp->dccps_pcslen;
625 len = sizeof(val);
626 break;
627 case DCCP_SOCKOPT_RECV_CSCOV:
628 val = dp->dccps_pcrlen;
629 len = sizeof(val);
630 break;
631 case 128 ... 191:
632 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
633 len, (u32 __user *)optval, optlen);
634 case 192 ... 255:
635 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
636 len, (u32 __user *)optval, optlen);
637 default:
638 return -ENOPROTOOPT;
641 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
642 return -EFAULT;
644 return 0;
647 int dccp_getsockopt(struct sock *sk, int level, int optname,
648 char __user *optval, int __user *optlen)
650 if (level != SOL_DCCP)
651 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
652 optname, optval,
653 optlen);
654 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
657 EXPORT_SYMBOL_GPL(dccp_getsockopt);
659 #ifdef CONFIG_COMPAT
660 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
661 char __user *optval, int __user *optlen)
663 if (level != SOL_DCCP)
664 return inet_csk_compat_getsockopt(sk, level, optname,
665 optval, optlen);
666 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
669 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
670 #endif
672 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
673 size_t len)
675 const struct dccp_sock *dp = dccp_sk(sk);
676 const int flags = msg->msg_flags;
677 const int noblock = flags & MSG_DONTWAIT;
678 struct sk_buff *skb;
679 int rc, size;
680 long timeo;
682 if (len > dp->dccps_mss_cache)
683 return -EMSGSIZE;
685 lock_sock(sk);
687 if (sysctl_dccp_tx_qlen &&
688 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
689 rc = -EAGAIN;
690 goto out_release;
693 timeo = sock_sndtimeo(sk, noblock);
696 * We have to use sk_stream_wait_connect here to set sk_write_pending,
697 * so that the trick in dccp_rcv_request_sent_state_process.
699 /* Wait for a connection to finish. */
700 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
701 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
702 goto out_release;
704 size = sk->sk_prot->max_header + len;
705 release_sock(sk);
706 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
707 lock_sock(sk);
708 if (skb == NULL)
709 goto out_release;
711 skb_reserve(skb, sk->sk_prot->max_header);
712 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
713 if (rc != 0)
714 goto out_discard;
716 skb_queue_tail(&sk->sk_write_queue, skb);
717 dccp_write_xmit(sk,0);
718 out_release:
719 release_sock(sk);
720 return rc ? : len;
721 out_discard:
722 kfree_skb(skb);
723 goto out_release;
726 EXPORT_SYMBOL_GPL(dccp_sendmsg);
728 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
729 size_t len, int nonblock, int flags, int *addr_len)
731 const struct dccp_hdr *dh;
732 long timeo;
734 lock_sock(sk);
736 if (sk->sk_state == DCCP_LISTEN) {
737 len = -ENOTCONN;
738 goto out;
741 timeo = sock_rcvtimeo(sk, nonblock);
743 do {
744 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
746 if (skb == NULL)
747 goto verify_sock_status;
749 dh = dccp_hdr(skb);
751 if (dh->dccph_type == DCCP_PKT_DATA ||
752 dh->dccph_type == DCCP_PKT_DATAACK)
753 goto found_ok_skb;
755 if (dh->dccph_type == DCCP_PKT_RESET ||
756 dh->dccph_type == DCCP_PKT_CLOSE) {
757 dccp_pr_debug("found fin ok!\n");
758 len = 0;
759 goto found_fin_ok;
761 dccp_pr_debug("packet_type=%s\n",
762 dccp_packet_name(dh->dccph_type));
763 sk_eat_skb(sk, skb, 0);
764 verify_sock_status:
765 if (sock_flag(sk, SOCK_DONE)) {
766 len = 0;
767 break;
770 if (sk->sk_err) {
771 len = sock_error(sk);
772 break;
775 if (sk->sk_shutdown & RCV_SHUTDOWN) {
776 len = 0;
777 break;
780 if (sk->sk_state == DCCP_CLOSED) {
781 if (!sock_flag(sk, SOCK_DONE)) {
782 /* This occurs when user tries to read
783 * from never connected socket.
785 len = -ENOTCONN;
786 break;
788 len = 0;
789 break;
792 if (!timeo) {
793 len = -EAGAIN;
794 break;
797 if (signal_pending(current)) {
798 len = sock_intr_errno(timeo);
799 break;
802 sk_wait_data(sk, &timeo);
803 continue;
804 found_ok_skb:
805 if (len > skb->len)
806 len = skb->len;
807 else if (len < skb->len)
808 msg->msg_flags |= MSG_TRUNC;
810 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
811 /* Exception. Bailout! */
812 len = -EFAULT;
813 break;
815 found_fin_ok:
816 if (!(flags & MSG_PEEK))
817 sk_eat_skb(sk, skb, 0);
818 break;
819 } while (1);
820 out:
821 release_sock(sk);
822 return len;
825 EXPORT_SYMBOL_GPL(dccp_recvmsg);
827 int inet_dccp_listen(struct socket *sock, int backlog)
829 struct sock *sk = sock->sk;
830 unsigned char old_state;
831 int err;
833 lock_sock(sk);
835 err = -EINVAL;
836 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
837 goto out;
839 old_state = sk->sk_state;
840 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
841 goto out;
843 /* Really, if the socket is already in listen state
844 * we can only allow the backlog to be adjusted.
846 if (old_state != DCCP_LISTEN) {
848 * FIXME: here it probably should be sk->sk_prot->listen_start
849 * see tcp_listen_start
851 err = dccp_listen_start(sk, backlog);
852 if (err)
853 goto out;
855 sk->sk_max_ack_backlog = backlog;
856 err = 0;
858 out:
859 release_sock(sk);
860 return err;
863 EXPORT_SYMBOL_GPL(inet_dccp_listen);
865 static const unsigned char dccp_new_state[] = {
866 /* current state: new state: action: */
867 [0] = DCCP_CLOSED,
868 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
869 [DCCP_REQUESTING] = DCCP_CLOSED,
870 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
871 [DCCP_LISTEN] = DCCP_CLOSED,
872 [DCCP_RESPOND] = DCCP_CLOSED,
873 [DCCP_CLOSING] = DCCP_CLOSED,
874 [DCCP_TIME_WAIT] = DCCP_CLOSED,
875 [DCCP_CLOSED] = DCCP_CLOSED,
878 static int dccp_close_state(struct sock *sk)
880 const int next = dccp_new_state[sk->sk_state];
881 const int ns = next & DCCP_STATE_MASK;
883 if (ns != sk->sk_state)
884 dccp_set_state(sk, ns);
886 return next & DCCP_ACTION_FIN;
889 void dccp_close(struct sock *sk, long timeout)
891 struct dccp_sock *dp = dccp_sk(sk);
892 struct sk_buff *skb;
893 int state;
895 lock_sock(sk);
897 sk->sk_shutdown = SHUTDOWN_MASK;
899 if (sk->sk_state == DCCP_LISTEN) {
900 dccp_set_state(sk, DCCP_CLOSED);
902 /* Special case. */
903 inet_csk_listen_stop(sk);
905 goto adjudge_to_death;
908 sk_stop_timer(sk, &dp->dccps_xmit_timer);
911 * We need to flush the recv. buffs. We do this only on the
912 * descriptor close, not protocol-sourced closes, because the
913 *reader process may not have drained the data yet!
915 /* FIXME: check for unread data */
916 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
917 __kfree_skb(skb);
920 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
921 /* Check zero linger _after_ checking for unread data. */
922 sk->sk_prot->disconnect(sk, 0);
923 } else if (dccp_close_state(sk)) {
924 dccp_send_close(sk, 1);
927 sk_stream_wait_close(sk, timeout);
929 adjudge_to_death:
930 state = sk->sk_state;
931 sock_hold(sk);
932 sock_orphan(sk);
933 atomic_inc(sk->sk_prot->orphan_count);
936 * It is the last release_sock in its life. It will remove backlog.
938 release_sock(sk);
940 * Now socket is owned by kernel and we acquire BH lock
941 * to finish close. No need to check for user refs.
943 local_bh_disable();
944 bh_lock_sock(sk);
945 BUG_TRAP(!sock_owned_by_user(sk));
947 /* Have we already been destroyed by a softirq or backlog? */
948 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
949 goto out;
952 * The last release_sock may have processed the CLOSE or RESET
953 * packet moving sock to CLOSED state, if not we have to fire
954 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
955 * in draft-ietf-dccp-spec-11. -acme
957 if (sk->sk_state == DCCP_CLOSING) {
958 /* FIXME: should start at 2 * RTT */
959 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
960 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
961 inet_csk(sk)->icsk_rto,
962 DCCP_RTO_MAX);
963 #if 0
964 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
965 dccp_set_state(sk, DCCP_CLOSED);
966 #endif
969 if (sk->sk_state == DCCP_CLOSED)
970 inet_csk_destroy_sock(sk);
972 /* Otherwise, socket is reprieved until protocol close. */
974 out:
975 bh_unlock_sock(sk);
976 local_bh_enable();
977 sock_put(sk);
980 EXPORT_SYMBOL_GPL(dccp_close);
982 void dccp_shutdown(struct sock *sk, int how)
984 dccp_pr_debug("entry\n");
987 EXPORT_SYMBOL_GPL(dccp_shutdown);
989 static int __init dccp_mib_init(void)
991 int rc = -ENOMEM;
993 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
994 if (dccp_statistics[0] == NULL)
995 goto out;
997 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
998 if (dccp_statistics[1] == NULL)
999 goto out_free_one;
1001 rc = 0;
1002 out:
1003 return rc;
1004 out_free_one:
1005 free_percpu(dccp_statistics[0]);
1006 dccp_statistics[0] = NULL;
1007 goto out;
1011 static void dccp_mib_exit(void)
1013 free_percpu(dccp_statistics[0]);
1014 free_percpu(dccp_statistics[1]);
1015 dccp_statistics[0] = dccp_statistics[1] = NULL;
1018 static int thash_entries;
1019 module_param(thash_entries, int, 0444);
1020 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1022 #ifdef CONFIG_IP_DCCP_DEBUG
1023 int dccp_debug;
1024 module_param(dccp_debug, bool, 0444);
1025 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1027 EXPORT_SYMBOL_GPL(dccp_debug);
1028 #endif
1030 static int __init dccp_init(void)
1032 unsigned long goal;
1033 int ehash_order, bhash_order, i;
1034 int rc = -ENOBUFS;
1036 dccp_hashinfo.bind_bucket_cachep =
1037 kmem_cache_create("dccp_bind_bucket",
1038 sizeof(struct inet_bind_bucket), 0,
1039 SLAB_HWCACHE_ALIGN, NULL);
1040 if (!dccp_hashinfo.bind_bucket_cachep)
1041 goto out;
1044 * Size and allocate the main established and bind bucket
1045 * hash tables.
1047 * The methodology is similar to that of the buffer cache.
1049 if (num_physpages >= (128 * 1024))
1050 goal = num_physpages >> (21 - PAGE_SHIFT);
1051 else
1052 goal = num_physpages >> (23 - PAGE_SHIFT);
1054 if (thash_entries)
1055 goal = (thash_entries *
1056 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1057 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1059 do {
1060 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1061 sizeof(struct inet_ehash_bucket);
1062 while (dccp_hashinfo.ehash_size &
1063 (dccp_hashinfo.ehash_size - 1))
1064 dccp_hashinfo.ehash_size--;
1065 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1066 __get_free_pages(GFP_ATOMIC, ehash_order);
1067 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1069 if (!dccp_hashinfo.ehash) {
1070 DCCP_CRIT("Failed to allocate DCCP established hash table");
1071 goto out_free_bind_bucket_cachep;
1074 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1075 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1076 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1079 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1080 goto out_free_dccp_ehash;
1082 bhash_order = ehash_order;
1084 do {
1085 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1086 sizeof(struct inet_bind_hashbucket);
1087 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1088 bhash_order > 0)
1089 continue;
1090 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1091 __get_free_pages(GFP_ATOMIC, bhash_order);
1092 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1094 if (!dccp_hashinfo.bhash) {
1095 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1096 goto out_free_dccp_locks;
1099 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1100 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1101 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1104 rc = dccp_mib_init();
1105 if (rc)
1106 goto out_free_dccp_bhash;
1108 rc = dccp_ackvec_init();
1109 if (rc)
1110 goto out_free_dccp_mib;
1112 rc = dccp_sysctl_init();
1113 if (rc)
1114 goto out_ackvec_exit;
1116 dccp_timestamping_init();
1117 out:
1118 return rc;
1119 out_ackvec_exit:
1120 dccp_ackvec_exit();
1121 out_free_dccp_mib:
1122 dccp_mib_exit();
1123 out_free_dccp_bhash:
1124 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1125 dccp_hashinfo.bhash = NULL;
1126 out_free_dccp_locks:
1127 inet_ehash_locks_free(&dccp_hashinfo);
1128 out_free_dccp_ehash:
1129 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1130 dccp_hashinfo.ehash = NULL;
1131 out_free_bind_bucket_cachep:
1132 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1133 dccp_hashinfo.bind_bucket_cachep = NULL;
1134 goto out;
1137 static void __exit dccp_fini(void)
1139 dccp_mib_exit();
1140 free_pages((unsigned long)dccp_hashinfo.bhash,
1141 get_order(dccp_hashinfo.bhash_size *
1142 sizeof(struct inet_bind_hashbucket)));
1143 free_pages((unsigned long)dccp_hashinfo.ehash,
1144 get_order(dccp_hashinfo.ehash_size *
1145 sizeof(struct inet_ehash_bucket)));
1146 inet_ehash_locks_free(&dccp_hashinfo);
1147 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1148 dccp_ackvec_exit();
1149 dccp_sysctl_exit();
1152 module_init(dccp_init);
1153 module_exit(dccp_fini);
1155 MODULE_LICENSE("GPL");
1156 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1157 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");