dccp tfrc: Let dccp_tfrc_lib do the sampling work
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / dccp / proto.c
blobecf3be961e11218894661af68996e4154e5a9854
1 /*
2 * net/dccp/proto.c
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41 EXPORT_SYMBOL_GPL(dccp_statistics);
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 .lhash_lock = RW_LOCK_UNLOCKED,
49 .lhash_users = ATOMIC_INIT(0),
50 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
58 void dccp_set_state(struct sock *sk, const int state)
60 const int oldstate = sk->sk_state;
62 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
63 dccp_state_name(oldstate), dccp_state_name(state));
64 WARN_ON(state == oldstate);
66 switch (state) {
67 case DCCP_OPEN:
68 if (oldstate != DCCP_OPEN)
69 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70 /* Client retransmits all Confirm options until entering OPEN */
71 if (oldstate == DCCP_PARTOPEN)
72 dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
73 break;
75 case DCCP_CLOSED:
76 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
77 oldstate == DCCP_CLOSING)
78 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
80 sk->sk_prot->unhash(sk);
81 if (inet_csk(sk)->icsk_bind_hash != NULL &&
82 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
83 inet_put_port(sk);
84 /* fall through */
85 default:
86 if (oldstate == DCCP_OPEN)
87 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
90 /* Change state AFTER socket is unhashed to avoid closed
91 * socket sitting in hash tables.
93 sk->sk_state = state;
96 EXPORT_SYMBOL_GPL(dccp_set_state);
98 static void dccp_finish_passive_close(struct sock *sk)
100 switch (sk->sk_state) {
101 case DCCP_PASSIVE_CLOSE:
102 /* Node (client or server) has received Close packet. */
103 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
104 dccp_set_state(sk, DCCP_CLOSED);
105 break;
106 case DCCP_PASSIVE_CLOSEREQ:
108 * Client received CloseReq. We set the `active' flag so that
109 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
111 dccp_send_close(sk, 1);
112 dccp_set_state(sk, DCCP_CLOSING);
116 void dccp_done(struct sock *sk)
118 dccp_set_state(sk, DCCP_CLOSED);
119 dccp_clear_xmit_timers(sk);
121 sk->sk_shutdown = SHUTDOWN_MASK;
123 if (!sock_flag(sk, SOCK_DEAD))
124 sk->sk_state_change(sk);
125 else
126 inet_csk_destroy_sock(sk);
129 EXPORT_SYMBOL_GPL(dccp_done);
131 const char *dccp_packet_name(const int type)
133 static const char *dccp_packet_names[] = {
134 [DCCP_PKT_REQUEST] = "REQUEST",
135 [DCCP_PKT_RESPONSE] = "RESPONSE",
136 [DCCP_PKT_DATA] = "DATA",
137 [DCCP_PKT_ACK] = "ACK",
138 [DCCP_PKT_DATAACK] = "DATAACK",
139 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
140 [DCCP_PKT_CLOSE] = "CLOSE",
141 [DCCP_PKT_RESET] = "RESET",
142 [DCCP_PKT_SYNC] = "SYNC",
143 [DCCP_PKT_SYNCACK] = "SYNCACK",
146 if (type >= DCCP_NR_PKT_TYPES)
147 return "INVALID";
148 else
149 return dccp_packet_names[type];
152 EXPORT_SYMBOL_GPL(dccp_packet_name);
154 const char *dccp_state_name(const int state)
156 static char *dccp_state_names[] = {
157 [DCCP_OPEN] = "OPEN",
158 [DCCP_REQUESTING] = "REQUESTING",
159 [DCCP_PARTOPEN] = "PARTOPEN",
160 [DCCP_LISTEN] = "LISTEN",
161 [DCCP_RESPOND] = "RESPOND",
162 [DCCP_CLOSING] = "CLOSING",
163 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
164 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
165 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
166 [DCCP_TIME_WAIT] = "TIME_WAIT",
167 [DCCP_CLOSED] = "CLOSED",
170 if (state >= DCCP_MAX_STATES)
171 return "INVALID STATE!";
172 else
173 return dccp_state_names[state];
176 EXPORT_SYMBOL_GPL(dccp_state_name);
178 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
180 struct dccp_sock *dp = dccp_sk(sk);
181 struct inet_connection_sock *icsk = inet_csk(sk);
183 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
184 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
185 sk->sk_state = DCCP_CLOSED;
186 sk->sk_write_space = dccp_write_space;
187 icsk->icsk_sync_mss = dccp_sync_mss;
188 dp->dccps_mss_cache = TCP_MIN_RCVMSS;
189 dp->dccps_rate_last = jiffies;
190 dp->dccps_role = DCCP_ROLE_UNDEFINED;
191 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
192 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
194 dccp_init_xmit_timers(sk);
196 INIT_LIST_HEAD(&dp->dccps_featneg);
197 /* control socket doesn't need feat nego */
198 if (likely(ctl_sock_initialized))
199 return dccp_feat_init(sk);
200 return 0;
203 EXPORT_SYMBOL_GPL(dccp_init_sock);
205 void dccp_destroy_sock(struct sock *sk)
207 struct dccp_sock *dp = dccp_sk(sk);
210 * DCCP doesn't use sk_write_queue, just sk_send_head
211 * for retransmissions
213 if (sk->sk_send_head != NULL) {
214 kfree_skb(sk->sk_send_head);
215 sk->sk_send_head = NULL;
218 /* Clean up a referenced DCCP bind bucket. */
219 if (inet_csk(sk)->icsk_bind_hash != NULL)
220 inet_put_port(sk);
222 kfree(dp->dccps_service_list);
223 dp->dccps_service_list = NULL;
225 if (dp->dccps_hc_rx_ackvec != NULL) {
226 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
227 dp->dccps_hc_rx_ackvec = NULL;
229 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
230 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
231 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
233 /* clean up feature negotiation state */
234 dccp_feat_list_purge(&dp->dccps_featneg);
237 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
239 static inline int dccp_listen_start(struct sock *sk, int backlog)
241 struct dccp_sock *dp = dccp_sk(sk);
243 dp->dccps_role = DCCP_ROLE_LISTEN;
244 /* do not start to listen if feature negotiation setup fails */
245 if (dccp_feat_finalise_settings(dp))
246 return -EPROTO;
247 return inet_csk_listen_start(sk, backlog);
250 static inline int dccp_need_reset(int state)
252 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
253 state != DCCP_REQUESTING;
256 int dccp_disconnect(struct sock *sk, int flags)
258 struct inet_connection_sock *icsk = inet_csk(sk);
259 struct inet_sock *inet = inet_sk(sk);
260 int err = 0;
261 const int old_state = sk->sk_state;
263 if (old_state != DCCP_CLOSED)
264 dccp_set_state(sk, DCCP_CLOSED);
267 * This corresponds to the ABORT function of RFC793, sec. 3.8
268 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
270 if (old_state == DCCP_LISTEN) {
271 inet_csk_listen_stop(sk);
272 } else if (dccp_need_reset(old_state)) {
273 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
274 sk->sk_err = ECONNRESET;
275 } else if (old_state == DCCP_REQUESTING)
276 sk->sk_err = ECONNRESET;
278 dccp_clear_xmit_timers(sk);
280 __skb_queue_purge(&sk->sk_receive_queue);
281 __skb_queue_purge(&sk->sk_write_queue);
282 if (sk->sk_send_head != NULL) {
283 __kfree_skb(sk->sk_send_head);
284 sk->sk_send_head = NULL;
287 inet->dport = 0;
289 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
290 inet_reset_saddr(sk);
292 sk->sk_shutdown = 0;
293 sock_reset_flag(sk, SOCK_DONE);
295 icsk->icsk_backoff = 0;
296 inet_csk_delack_init(sk);
297 __sk_dst_reset(sk);
299 WARN_ON(inet->num && !icsk->icsk_bind_hash);
301 sk->sk_error_report(sk);
302 return err;
305 EXPORT_SYMBOL_GPL(dccp_disconnect);
308 * Wait for a DCCP event.
310 * Note that we don't need to lock the socket, as the upper poll layers
311 * take care of normal races (between the test and the event) and we don't
312 * go look at any of the socket buffers directly.
314 unsigned int dccp_poll(struct file *file, struct socket *sock,
315 poll_table *wait)
317 unsigned int mask;
318 struct sock *sk = sock->sk;
320 poll_wait(file, sk->sk_sleep, wait);
321 if (sk->sk_state == DCCP_LISTEN)
322 return inet_csk_listen_poll(sk);
324 /* Socket is not locked. We are protected from async events
325 by poll logic and correct handling of state changes
326 made by another threads is impossible in any case.
329 mask = 0;
330 if (sk->sk_err)
331 mask = POLLERR;
333 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
334 mask |= POLLHUP;
335 if (sk->sk_shutdown & RCV_SHUTDOWN)
336 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
338 /* Connected? */
339 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
340 if (atomic_read(&sk->sk_rmem_alloc) > 0)
341 mask |= POLLIN | POLLRDNORM;
343 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
344 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
345 mask |= POLLOUT | POLLWRNORM;
346 } else { /* send SIGIO later */
347 set_bit(SOCK_ASYNC_NOSPACE,
348 &sk->sk_socket->flags);
349 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
351 /* Race breaker. If space is freed after
352 * wspace test but before the flags are set,
353 * IO signal will be lost.
355 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
356 mask |= POLLOUT | POLLWRNORM;
360 return mask;
363 EXPORT_SYMBOL_GPL(dccp_poll);
365 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
367 int rc = -ENOTCONN;
369 lock_sock(sk);
371 if (sk->sk_state == DCCP_LISTEN)
372 goto out;
374 switch (cmd) {
375 case SIOCINQ: {
376 struct sk_buff *skb;
377 unsigned long amount = 0;
379 skb = skb_peek(&sk->sk_receive_queue);
380 if (skb != NULL) {
382 * We will only return the amount of this packet since
383 * that is all that will be read.
385 amount = skb->len;
387 rc = put_user(amount, (int __user *)arg);
389 break;
390 default:
391 rc = -ENOIOCTLCMD;
392 break;
394 out:
395 release_sock(sk);
396 return rc;
399 EXPORT_SYMBOL_GPL(dccp_ioctl);
401 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
402 char __user *optval, int optlen)
404 struct dccp_sock *dp = dccp_sk(sk);
405 struct dccp_service_list *sl = NULL;
407 if (service == DCCP_SERVICE_INVALID_VALUE ||
408 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
409 return -EINVAL;
411 if (optlen > sizeof(service)) {
412 sl = kmalloc(optlen, GFP_KERNEL);
413 if (sl == NULL)
414 return -ENOMEM;
416 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
417 if (copy_from_user(sl->dccpsl_list,
418 optval + sizeof(service),
419 optlen - sizeof(service)) ||
420 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
421 kfree(sl);
422 return -EFAULT;
426 lock_sock(sk);
427 dp->dccps_service = service;
429 kfree(dp->dccps_service_list);
431 dp->dccps_service_list = sl;
432 release_sock(sk);
433 return 0;
436 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
438 u8 *list, len;
439 int i, rc;
441 if (cscov < 0 || cscov > 15)
442 return -EINVAL;
444 * Populate a list of permissible values, in the range cscov...15. This
445 * is necessary since feature negotiation of single values only works if
446 * both sides incidentally choose the same value. Since the list starts
447 * lowest-value first, negotiation will pick the smallest shared value.
449 if (cscov == 0)
450 return 0;
451 len = 16 - cscov;
453 list = kmalloc(len, GFP_KERNEL);
454 if (list == NULL)
455 return -ENOBUFS;
457 for (i = 0; i < len; i++)
458 list[i] = cscov++;
460 rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
462 if (rc == 0) {
463 if (rx)
464 dccp_sk(sk)->dccps_pcrlen = cscov;
465 else
466 dccp_sk(sk)->dccps_pcslen = cscov;
468 kfree(list);
469 return rc;
472 static int dccp_setsockopt_ccid(struct sock *sk, int type,
473 char __user *optval, int optlen)
475 u8 *val;
476 int rc = 0;
478 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
479 return -EINVAL;
481 val = kmalloc(optlen, GFP_KERNEL);
482 if (val == NULL)
483 return -ENOMEM;
485 if (copy_from_user(val, optval, optlen)) {
486 kfree(val);
487 return -EFAULT;
490 lock_sock(sk);
491 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
492 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
494 if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
495 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
496 release_sock(sk);
498 kfree(val);
499 return rc;
502 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
503 char __user *optval, int optlen)
505 struct dccp_sock *dp = dccp_sk(sk);
506 int val, err = 0;
508 switch (optname) {
509 case DCCP_SOCKOPT_PACKET_SIZE:
510 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
511 return 0;
512 case DCCP_SOCKOPT_CHANGE_L:
513 case DCCP_SOCKOPT_CHANGE_R:
514 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
515 return 0;
516 case DCCP_SOCKOPT_CCID:
517 case DCCP_SOCKOPT_RX_CCID:
518 case DCCP_SOCKOPT_TX_CCID:
519 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
522 if (optlen < (int)sizeof(int))
523 return -EINVAL;
525 if (get_user(val, (int __user *)optval))
526 return -EFAULT;
528 if (optname == DCCP_SOCKOPT_SERVICE)
529 return dccp_setsockopt_service(sk, val, optval, optlen);
531 lock_sock(sk);
532 switch (optname) {
533 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
534 if (dp->dccps_role != DCCP_ROLE_SERVER)
535 err = -EOPNOTSUPP;
536 else
537 dp->dccps_server_timewait = (val != 0);
538 break;
539 case DCCP_SOCKOPT_SEND_CSCOV:
540 err = dccp_setsockopt_cscov(sk, val, false);
541 break;
542 case DCCP_SOCKOPT_RECV_CSCOV:
543 err = dccp_setsockopt_cscov(sk, val, true);
544 break;
545 case DCCP_SOCKOPT_QPOLICY_ID:
546 if (sk->sk_state != DCCP_CLOSED)
547 err = -EISCONN;
548 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
549 err = -EINVAL;
550 else
551 dp->dccps_qpolicy = val;
552 break;
553 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
554 if (val < 0)
555 err = -EINVAL;
556 else
557 dp->dccps_tx_qlen = val;
558 break;
559 default:
560 err = -ENOPROTOOPT;
561 break;
563 release_sock(sk);
565 return err;
568 int dccp_setsockopt(struct sock *sk, int level, int optname,
569 char __user *optval, int optlen)
571 if (level != SOL_DCCP)
572 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
573 optname, optval,
574 optlen);
575 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
578 EXPORT_SYMBOL_GPL(dccp_setsockopt);
580 #ifdef CONFIG_COMPAT
581 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
582 char __user *optval, int optlen)
584 if (level != SOL_DCCP)
585 return inet_csk_compat_setsockopt(sk, level, optname,
586 optval, optlen);
587 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
590 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
591 #endif
593 static int dccp_getsockopt_service(struct sock *sk, int len,
594 __be32 __user *optval,
595 int __user *optlen)
597 const struct dccp_sock *dp = dccp_sk(sk);
598 const struct dccp_service_list *sl;
599 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
601 lock_sock(sk);
602 if ((sl = dp->dccps_service_list) != NULL) {
603 slen = sl->dccpsl_nr * sizeof(u32);
604 total_len += slen;
607 err = -EINVAL;
608 if (total_len > len)
609 goto out;
611 err = 0;
612 if (put_user(total_len, optlen) ||
613 put_user(dp->dccps_service, optval) ||
614 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
615 err = -EFAULT;
616 out:
617 release_sock(sk);
618 return err;
621 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
622 char __user *optval, int __user *optlen)
624 struct dccp_sock *dp;
625 int val, len;
627 if (get_user(len, optlen))
628 return -EFAULT;
630 if (len < (int)sizeof(int))
631 return -EINVAL;
633 dp = dccp_sk(sk);
635 switch (optname) {
636 case DCCP_SOCKOPT_PACKET_SIZE:
637 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
638 return 0;
639 case DCCP_SOCKOPT_SERVICE:
640 return dccp_getsockopt_service(sk, len,
641 (__be32 __user *)optval, optlen);
642 case DCCP_SOCKOPT_GET_CUR_MPS:
643 val = dp->dccps_mss_cache;
644 break;
645 case DCCP_SOCKOPT_AVAILABLE_CCIDS:
646 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
647 case DCCP_SOCKOPT_TX_CCID:
648 val = ccid_get_current_tx_ccid(dp);
649 if (val < 0)
650 return -ENOPROTOOPT;
651 break;
652 case DCCP_SOCKOPT_RX_CCID:
653 val = ccid_get_current_rx_ccid(dp);
654 if (val < 0)
655 return -ENOPROTOOPT;
656 break;
657 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
658 val = dp->dccps_server_timewait;
659 break;
660 case DCCP_SOCKOPT_SEND_CSCOV:
661 val = dp->dccps_pcslen;
662 break;
663 case DCCP_SOCKOPT_RECV_CSCOV:
664 val = dp->dccps_pcrlen;
665 break;
666 case DCCP_SOCKOPT_QPOLICY_ID:
667 val = dp->dccps_qpolicy;
668 break;
669 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
670 val = dp->dccps_tx_qlen;
671 break;
672 case 128 ... 191:
673 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
674 len, (u32 __user *)optval, optlen);
675 case 192 ... 255:
676 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
677 len, (u32 __user *)optval, optlen);
678 default:
679 return -ENOPROTOOPT;
682 len = sizeof(val);
683 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
684 return -EFAULT;
686 return 0;
689 int dccp_getsockopt(struct sock *sk, int level, int optname,
690 char __user *optval, int __user *optlen)
692 if (level != SOL_DCCP)
693 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
694 optname, optval,
695 optlen);
696 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
699 EXPORT_SYMBOL_GPL(dccp_getsockopt);
701 #ifdef CONFIG_COMPAT
702 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
703 char __user *optval, int __user *optlen)
705 if (level != SOL_DCCP)
706 return inet_csk_compat_getsockopt(sk, level, optname,
707 optval, optlen);
708 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
711 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
712 #endif
714 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
716 struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
719 * Assign an (opaque) qpolicy priority value to skb->priority.
721 * We are overloading this skb field for use with the qpolicy subystem.
722 * The skb->priority is normally used for the SO_PRIORITY option, which
723 * is initialised from sk_priority. Since the assignment of sk_priority
724 * to skb->priority happens later (on layer 3), we overload this field
725 * for use with queueing priorities as long as the skb is on layer 4.
726 * The default priority value (if nothing is set) is 0.
728 skb->priority = 0;
730 for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
732 if (!CMSG_OK(msg, cmsg))
733 return -EINVAL;
735 if (cmsg->cmsg_level != SOL_DCCP)
736 continue;
738 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
739 !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
740 return -EINVAL;
742 switch (cmsg->cmsg_type) {
743 case DCCP_SCM_PRIORITY:
744 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
745 return -EINVAL;
746 skb->priority = *(__u32 *)CMSG_DATA(cmsg);
747 break;
748 default:
749 return -EINVAL;
752 return 0;
755 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
756 size_t len)
758 const struct dccp_sock *dp = dccp_sk(sk);
759 const int flags = msg->msg_flags;
760 const int noblock = flags & MSG_DONTWAIT;
761 struct sk_buff *skb;
762 int rc, size;
763 long timeo;
765 if (len > dp->dccps_mss_cache)
766 return -EMSGSIZE;
768 lock_sock(sk);
770 if (dccp_qpolicy_full(sk)) {
771 rc = -EAGAIN;
772 goto out_release;
775 timeo = sock_sndtimeo(sk, noblock);
778 * We have to use sk_stream_wait_connect here to set sk_write_pending,
779 * so that the trick in dccp_rcv_request_sent_state_process.
781 /* Wait for a connection to finish. */
782 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
783 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
784 goto out_release;
786 size = sk->sk_prot->max_header + len;
787 release_sock(sk);
788 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
789 lock_sock(sk);
790 if (skb == NULL)
791 goto out_release;
793 skb_reserve(skb, sk->sk_prot->max_header);
794 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
795 if (rc != 0)
796 goto out_discard;
798 rc = dccp_msghdr_parse(msg, skb);
799 if (rc != 0)
800 goto out_discard;
802 dccp_qpolicy_push(sk, skb);
803 dccp_write_xmit(sk);
804 out_release:
805 release_sock(sk);
806 return rc ? : len;
807 out_discard:
808 kfree_skb(skb);
809 goto out_release;
812 EXPORT_SYMBOL_GPL(dccp_sendmsg);
814 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
815 size_t len, int nonblock, int flags, int *addr_len)
817 const struct dccp_hdr *dh;
818 long timeo;
820 lock_sock(sk);
822 if (sk->sk_state == DCCP_LISTEN) {
823 len = -ENOTCONN;
824 goto out;
827 timeo = sock_rcvtimeo(sk, nonblock);
829 do {
830 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
832 if (skb == NULL)
833 goto verify_sock_status;
835 dh = dccp_hdr(skb);
837 switch (dh->dccph_type) {
838 case DCCP_PKT_DATA:
839 case DCCP_PKT_DATAACK:
840 goto found_ok_skb;
842 case DCCP_PKT_CLOSE:
843 case DCCP_PKT_CLOSEREQ:
844 if (!(flags & MSG_PEEK))
845 dccp_finish_passive_close(sk);
846 /* fall through */
847 case DCCP_PKT_RESET:
848 dccp_pr_debug("found fin (%s) ok!\n",
849 dccp_packet_name(dh->dccph_type));
850 len = 0;
851 goto found_fin_ok;
852 default:
853 dccp_pr_debug("packet_type=%s\n",
854 dccp_packet_name(dh->dccph_type));
855 sk_eat_skb(sk, skb, 0);
857 verify_sock_status:
858 if (sock_flag(sk, SOCK_DONE)) {
859 len = 0;
860 break;
863 if (sk->sk_err) {
864 len = sock_error(sk);
865 break;
868 if (sk->sk_shutdown & RCV_SHUTDOWN) {
869 len = 0;
870 break;
873 if (sk->sk_state == DCCP_CLOSED) {
874 if (!sock_flag(sk, SOCK_DONE)) {
875 /* This occurs when user tries to read
876 * from never connected socket.
878 len = -ENOTCONN;
879 break;
881 len = 0;
882 break;
885 if (!timeo) {
886 len = -EAGAIN;
887 break;
890 if (signal_pending(current)) {
891 len = sock_intr_errno(timeo);
892 break;
895 sk_wait_data(sk, &timeo);
896 continue;
897 found_ok_skb:
898 if (len > skb->len)
899 len = skb->len;
900 else if (len < skb->len)
901 msg->msg_flags |= MSG_TRUNC;
903 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
904 /* Exception. Bailout! */
905 len = -EFAULT;
906 break;
908 found_fin_ok:
909 if (!(flags & MSG_PEEK))
910 sk_eat_skb(sk, skb, 0);
911 break;
912 } while (1);
913 out:
914 release_sock(sk);
915 return len;
918 EXPORT_SYMBOL_GPL(dccp_recvmsg);
920 int inet_dccp_listen(struct socket *sock, int backlog)
922 struct sock *sk = sock->sk;
923 unsigned char old_state;
924 int err;
926 lock_sock(sk);
928 err = -EINVAL;
929 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
930 goto out;
932 old_state = sk->sk_state;
933 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
934 goto out;
936 /* Really, if the socket is already in listen state
937 * we can only allow the backlog to be adjusted.
939 if (old_state != DCCP_LISTEN) {
941 * FIXME: here it probably should be sk->sk_prot->listen_start
942 * see tcp_listen_start
944 err = dccp_listen_start(sk, backlog);
945 if (err)
946 goto out;
948 sk->sk_max_ack_backlog = backlog;
949 err = 0;
951 out:
952 release_sock(sk);
953 return err;
956 EXPORT_SYMBOL_GPL(inet_dccp_listen);
958 static void dccp_terminate_connection(struct sock *sk)
960 u8 next_state = DCCP_CLOSED;
962 switch (sk->sk_state) {
963 case DCCP_PASSIVE_CLOSE:
964 case DCCP_PASSIVE_CLOSEREQ:
965 dccp_finish_passive_close(sk);
966 break;
967 case DCCP_PARTOPEN:
968 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
969 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
970 /* fall through */
971 case DCCP_OPEN:
972 dccp_send_close(sk, 1);
974 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
975 !dccp_sk(sk)->dccps_server_timewait)
976 next_state = DCCP_ACTIVE_CLOSEREQ;
977 else
978 next_state = DCCP_CLOSING;
979 /* fall through */
980 default:
981 dccp_set_state(sk, next_state);
985 void dccp_close(struct sock *sk, long timeout)
987 struct dccp_sock *dp = dccp_sk(sk);
988 struct sk_buff *skb;
989 u32 data_was_unread = 0;
990 int state;
992 lock_sock(sk);
994 sk->sk_shutdown = SHUTDOWN_MASK;
996 if (sk->sk_state == DCCP_LISTEN) {
997 dccp_set_state(sk, DCCP_CLOSED);
999 /* Special case. */
1000 inet_csk_listen_stop(sk);
1002 goto adjudge_to_death;
1005 sk_stop_timer(sk, &dp->dccps_xmit_timer);
1008 * We need to flush the recv. buffs. We do this only on the
1009 * descriptor close, not protocol-sourced closes, because the
1010 *reader process may not have drained the data yet!
1012 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1013 data_was_unread += skb->len;
1014 __kfree_skb(skb);
1017 if (data_was_unread) {
1018 /* Unread data was tossed, send an appropriate Reset Code */
1019 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
1020 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1021 dccp_set_state(sk, DCCP_CLOSED);
1022 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1023 /* Check zero linger _after_ checking for unread data. */
1024 sk->sk_prot->disconnect(sk, 0);
1025 } else if (sk->sk_state != DCCP_CLOSED) {
1027 * Normal connection termination. May need to wait if there are
1028 * still packets in the TX queue that are delayed by the CCID.
1030 dccp_flush_write_queue(sk, &timeout);
1031 dccp_terminate_connection(sk);
1035 * Flush write queue. This may be necessary in several cases:
1036 * - we have been closed by the peer but still have application data;
1037 * - abortive termination (unread data or zero linger time),
1038 * - normal termination but queue could not be flushed within time limit
1040 __skb_queue_purge(&sk->sk_write_queue);
1042 sk_stream_wait_close(sk, timeout);
1044 adjudge_to_death:
1045 state = sk->sk_state;
1046 sock_hold(sk);
1047 sock_orphan(sk);
1048 atomic_inc(sk->sk_prot->orphan_count);
1051 * It is the last release_sock in its life. It will remove backlog.
1053 release_sock(sk);
1055 * Now socket is owned by kernel and we acquire BH lock
1056 * to finish close. No need to check for user refs.
1058 local_bh_disable();
1059 bh_lock_sock(sk);
1060 WARN_ON(sock_owned_by_user(sk));
1062 /* Have we already been destroyed by a softirq or backlog? */
1063 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1064 goto out;
1066 if (sk->sk_state == DCCP_CLOSED)
1067 inet_csk_destroy_sock(sk);
1069 /* Otherwise, socket is reprieved until protocol close. */
1071 out:
1072 bh_unlock_sock(sk);
1073 local_bh_enable();
1074 sock_put(sk);
1077 EXPORT_SYMBOL_GPL(dccp_close);
1079 void dccp_shutdown(struct sock *sk, int how)
1081 dccp_pr_debug("called shutdown(%x)\n", how);
1084 EXPORT_SYMBOL_GPL(dccp_shutdown);
1086 static inline int dccp_mib_init(void)
1088 return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1091 static inline void dccp_mib_exit(void)
1093 snmp_mib_free((void**)dccp_statistics);
1096 static int thash_entries;
1097 module_param(thash_entries, int, 0444);
1098 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1100 #ifdef CONFIG_IP_DCCP_DEBUG
1101 int dccp_debug;
1102 module_param(dccp_debug, bool, 0644);
1103 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1105 EXPORT_SYMBOL_GPL(dccp_debug);
1106 #endif
1108 static int __init dccp_init(void)
1110 unsigned long goal;
1111 int ehash_order, bhash_order, i;
1112 int rc = -ENOBUFS;
1114 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1115 FIELD_SIZEOF(struct sk_buff, cb));
1117 dccp_hashinfo.bind_bucket_cachep =
1118 kmem_cache_create("dccp_bind_bucket",
1119 sizeof(struct inet_bind_bucket), 0,
1120 SLAB_HWCACHE_ALIGN, NULL);
1121 if (!dccp_hashinfo.bind_bucket_cachep)
1122 goto out;
1125 * Size and allocate the main established and bind bucket
1126 * hash tables.
1128 * The methodology is similar to that of the buffer cache.
1130 if (num_physpages >= (128 * 1024))
1131 goal = num_physpages >> (21 - PAGE_SHIFT);
1132 else
1133 goal = num_physpages >> (23 - PAGE_SHIFT);
1135 if (thash_entries)
1136 goal = (thash_entries *
1137 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1138 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1140 do {
1141 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1142 sizeof(struct inet_ehash_bucket);
1143 while (dccp_hashinfo.ehash_size &
1144 (dccp_hashinfo.ehash_size - 1))
1145 dccp_hashinfo.ehash_size--;
1146 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1147 __get_free_pages(GFP_ATOMIC, ehash_order);
1148 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1150 if (!dccp_hashinfo.ehash) {
1151 DCCP_CRIT("Failed to allocate DCCP established hash table");
1152 goto out_free_bind_bucket_cachep;
1155 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1156 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1157 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1160 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1161 goto out_free_dccp_ehash;
1163 bhash_order = ehash_order;
1165 do {
1166 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1167 sizeof(struct inet_bind_hashbucket);
1168 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1169 bhash_order > 0)
1170 continue;
1171 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1172 __get_free_pages(GFP_ATOMIC, bhash_order);
1173 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1175 if (!dccp_hashinfo.bhash) {
1176 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1177 goto out_free_dccp_locks;
1180 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1181 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1182 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1185 rc = dccp_mib_init();
1186 if (rc)
1187 goto out_free_dccp_bhash;
1189 rc = dccp_ackvec_init();
1190 if (rc)
1191 goto out_free_dccp_mib;
1193 rc = dccp_sysctl_init();
1194 if (rc)
1195 goto out_ackvec_exit;
1197 dccp_timestamping_init();
1198 out:
1199 return rc;
1200 out_ackvec_exit:
1201 dccp_ackvec_exit();
1202 out_free_dccp_mib:
1203 dccp_mib_exit();
1204 out_free_dccp_bhash:
1205 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1206 dccp_hashinfo.bhash = NULL;
1207 out_free_dccp_locks:
1208 inet_ehash_locks_free(&dccp_hashinfo);
1209 out_free_dccp_ehash:
1210 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1211 dccp_hashinfo.ehash = NULL;
1212 out_free_bind_bucket_cachep:
1213 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1214 dccp_hashinfo.bind_bucket_cachep = NULL;
1215 goto out;
1218 static void __exit dccp_fini(void)
1220 dccp_mib_exit();
1221 free_pages((unsigned long)dccp_hashinfo.bhash,
1222 get_order(dccp_hashinfo.bhash_size *
1223 sizeof(struct inet_bind_hashbucket)));
1224 free_pages((unsigned long)dccp_hashinfo.ehash,
1225 get_order(dccp_hashinfo.ehash_size *
1226 sizeof(struct inet_ehash_bucket)));
1227 inet_ehash_locks_free(&dccp_hashinfo);
1228 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1229 dccp_ackvec_exit();
1230 dccp_sysctl_exit();
1233 module_init(dccp_init);
1234 module_exit(dccp_fini);
1236 MODULE_LICENSE("GPL");
1237 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1238 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");