4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
26 #include <net/inet_common.h>
28 #include <net/protocol.h>
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37 #include <linux/dccp.h>
42 DEFINE_SNMP_STAT(struct dccp_mib
, dccp_statistics
);
44 atomic_t dccp_orphan_count
= ATOMIC_INIT(0);
46 static struct net_protocol dccp_protocol
= {
47 .handler
= dccp_v4_rcv
,
48 .err_handler
= dccp_v4_err
,
51 const char *dccp_packet_name(const int type
)
53 static const char *dccp_packet_names
[] = {
54 [DCCP_PKT_REQUEST
] = "REQUEST",
55 [DCCP_PKT_RESPONSE
] = "RESPONSE",
56 [DCCP_PKT_DATA
] = "DATA",
57 [DCCP_PKT_ACK
] = "ACK",
58 [DCCP_PKT_DATAACK
] = "DATAACK",
59 [DCCP_PKT_CLOSEREQ
] = "CLOSEREQ",
60 [DCCP_PKT_CLOSE
] = "CLOSE",
61 [DCCP_PKT_RESET
] = "RESET",
62 [DCCP_PKT_SYNC
] = "SYNC",
63 [DCCP_PKT_SYNCACK
] = "SYNCACK",
66 if (type
>= DCCP_NR_PKT_TYPES
)
69 return dccp_packet_names
[type
];
72 EXPORT_SYMBOL_GPL(dccp_packet_name
);
74 const char *dccp_state_name(const int state
)
76 static char *dccp_state_names
[] = {
78 [DCCP_REQUESTING
] = "REQUESTING",
79 [DCCP_PARTOPEN
] = "PARTOPEN",
80 [DCCP_LISTEN
] = "LISTEN",
81 [DCCP_RESPOND
] = "RESPOND",
82 [DCCP_CLOSING
] = "CLOSING",
83 [DCCP_TIME_WAIT
] = "TIME_WAIT",
84 [DCCP_CLOSED
] = "CLOSED",
87 if (state
>= DCCP_MAX_STATES
)
88 return "INVALID STATE!";
90 return dccp_state_names
[state
];
93 EXPORT_SYMBOL_GPL(dccp_state_name
);
95 static inline int dccp_listen_start(struct sock
*sk
)
97 dccp_sk(sk
)->dccps_role
= DCCP_ROLE_LISTEN
;
98 return inet_csk_listen_start(sk
, TCP_SYNQ_HSIZE
);
101 int dccp_disconnect(struct sock
*sk
, int flags
)
103 struct inet_connection_sock
*icsk
= inet_csk(sk
);
104 struct inet_sock
*inet
= inet_sk(sk
);
106 const int old_state
= sk
->sk_state
;
108 if (old_state
!= DCCP_CLOSED
)
109 dccp_set_state(sk
, DCCP_CLOSED
);
111 /* ABORT function of RFC793 */
112 if (old_state
== DCCP_LISTEN
) {
113 inet_csk_listen_stop(sk
);
114 /* FIXME: do the active reset thing */
115 } else if (old_state
== DCCP_REQUESTING
)
116 sk
->sk_err
= ECONNRESET
;
118 dccp_clear_xmit_timers(sk
);
119 __skb_queue_purge(&sk
->sk_receive_queue
);
120 if (sk
->sk_send_head
!= NULL
) {
121 __kfree_skb(sk
->sk_send_head
);
122 sk
->sk_send_head
= NULL
;
127 if (!(sk
->sk_userlocks
& SOCK_BINDADDR_LOCK
))
128 inet_reset_saddr(sk
);
131 sock_reset_flag(sk
, SOCK_DONE
);
133 icsk
->icsk_backoff
= 0;
134 inet_csk_delack_init(sk
);
137 BUG_TRAP(!inet
->num
|| icsk
->icsk_bind_hash
);
139 sk
->sk_error_report(sk
);
143 int dccp_ioctl(struct sock
*sk
, int cmd
, unsigned long arg
)
145 dccp_pr_debug("entry\n");
149 int dccp_setsockopt(struct sock
*sk
, int level
, int optname
,
150 char __user
*optval
, int optlen
)
152 dccp_pr_debug("entry\n");
154 if (level
!= SOL_DCCP
)
155 return ip_setsockopt(sk
, level
, optname
, optval
, optlen
);
160 int dccp_getsockopt(struct sock
*sk
, int level
, int optname
,
161 char __user
*optval
, int __user
*optlen
)
163 dccp_pr_debug("entry\n");
165 if (level
!= SOL_DCCP
)
166 return ip_getsockopt(sk
, level
, optname
, optval
, optlen
);
171 int dccp_sendmsg(struct kiocb
*iocb
, struct sock
*sk
, struct msghdr
*msg
,
174 const struct dccp_sock
*dp
= dccp_sk(sk
);
175 const int flags
= msg
->msg_flags
;
176 const int noblock
= flags
& MSG_DONTWAIT
;
181 if (len
> dp
->dccps_mss_cache
)
185 timeo
= sock_sndtimeo(sk
, noblock
);
188 * We have to use sk_stream_wait_connect here to set sk_write_pending,
189 * so that the trick in dccp_rcv_request_sent_state_process.
191 /* Wait for a connection to finish. */
192 if ((1 << sk
->sk_state
) & ~(DCCPF_OPEN
| DCCPF_PARTOPEN
| DCCPF_CLOSING
))
193 if ((rc
= sk_stream_wait_connect(sk
, &timeo
)) != 0)
196 size
= sk
->sk_prot
->max_header
+ len
;
198 skb
= sock_alloc_send_skb(sk
, size
, noblock
, &rc
);
203 skb_reserve(skb
, sk
->sk_prot
->max_header
);
204 rc
= memcpy_fromiovec(skb_put(skb
, len
), msg
->msg_iov
, len
);
208 rc
= dccp_write_xmit(sk
, skb
, len
);
217 int dccp_recvmsg(struct kiocb
*iocb
, struct sock
*sk
, struct msghdr
*msg
,
218 size_t len
, int nonblock
, int flags
, int *addr_len
)
220 const struct dccp_hdr
*dh
;
225 if (sk
->sk_state
== DCCP_LISTEN
) {
230 timeo
= sock_rcvtimeo(sk
, nonblock
);
233 struct sk_buff
*skb
= skb_peek(&sk
->sk_receive_queue
);
236 goto verify_sock_status
;
240 if (dh
->dccph_type
== DCCP_PKT_DATA
||
241 dh
->dccph_type
== DCCP_PKT_DATAACK
)
244 if (dh
->dccph_type
== DCCP_PKT_RESET
||
245 dh
->dccph_type
== DCCP_PKT_CLOSE
) {
246 dccp_pr_debug("found fin ok!\n");
250 dccp_pr_debug("packet_type=%s\n",
251 dccp_packet_name(dh
->dccph_type
));
254 if (sock_flag(sk
, SOCK_DONE
)) {
260 len
= sock_error(sk
);
264 if (sk
->sk_shutdown
& RCV_SHUTDOWN
) {
269 if (sk
->sk_state
== DCCP_CLOSED
) {
270 if (!sock_flag(sk
, SOCK_DONE
)) {
271 /* This occurs when user tries to read
272 * from never connected socket.
286 if (signal_pending(current
)) {
287 len
= sock_intr_errno(timeo
);
291 sk_wait_data(sk
, &timeo
);
296 else if (len
< skb
->len
)
297 msg
->msg_flags
|= MSG_TRUNC
;
299 if (skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, len
)) {
300 /* Exception. Bailout! */
305 if (!(flags
& MSG_PEEK
))
314 static int inet_dccp_listen(struct socket
*sock
, int backlog
)
316 struct sock
*sk
= sock
->sk
;
317 unsigned char old_state
;
323 if (sock
->state
!= SS_UNCONNECTED
|| sock
->type
!= SOCK_DCCP
)
326 old_state
= sk
->sk_state
;
327 if (!((1 << old_state
) & (DCCPF_CLOSED
| DCCPF_LISTEN
)))
330 /* Really, if the socket is already in listen state
331 * we can only allow the backlog to be adjusted.
333 if (old_state
!= DCCP_LISTEN
) {
335 * FIXME: here it probably should be sk->sk_prot->listen_start
336 * see tcp_listen_start
338 err
= dccp_listen_start(sk
);
342 sk
->sk_max_ack_backlog
= backlog
;
350 static const unsigned char dccp_new_state
[] = {
351 /* current state: new state: action: */
353 [DCCP_OPEN
] = DCCP_CLOSING
| DCCP_ACTION_FIN
,
354 [DCCP_REQUESTING
] = DCCP_CLOSED
,
355 [DCCP_PARTOPEN
] = DCCP_CLOSING
| DCCP_ACTION_FIN
,
356 [DCCP_LISTEN
] = DCCP_CLOSED
,
357 [DCCP_RESPOND
] = DCCP_CLOSED
,
358 [DCCP_CLOSING
] = DCCP_CLOSED
,
359 [DCCP_TIME_WAIT
] = DCCP_CLOSED
,
360 [DCCP_CLOSED
] = DCCP_CLOSED
,
363 static int dccp_close_state(struct sock
*sk
)
365 const int next
= dccp_new_state
[sk
->sk_state
];
366 const int ns
= next
& DCCP_STATE_MASK
;
368 if (ns
!= sk
->sk_state
)
369 dccp_set_state(sk
, ns
);
371 return next
& DCCP_ACTION_FIN
;
374 void dccp_close(struct sock
*sk
, long timeout
)
380 sk
->sk_shutdown
= SHUTDOWN_MASK
;
382 if (sk
->sk_state
== DCCP_LISTEN
) {
383 dccp_set_state(sk
, DCCP_CLOSED
);
386 inet_csk_listen_stop(sk
);
388 goto adjudge_to_death
;
392 * We need to flush the recv. buffs. We do this only on the
393 * descriptor close, not protocol-sourced closes, because the
394 *reader process may not have drained the data yet!
396 /* FIXME: check for unread data */
397 while ((skb
= __skb_dequeue(&sk
->sk_receive_queue
)) != NULL
) {
401 if (sock_flag(sk
, SOCK_LINGER
) && !sk
->sk_lingertime
) {
402 /* Check zero linger _after_ checking for unread data. */
403 sk
->sk_prot
->disconnect(sk
, 0);
404 } else if (dccp_close_state(sk
)) {
408 sk_stream_wait_close(sk
, timeout
);
413 * Now socket is owned by kernel and we acquire BH lock
414 * to finish close. No need to check for user refs.
418 BUG_TRAP(!sock_owned_by_user(sk
));
423 if (sk
->sk_state
!= DCCP_CLOSED
)
424 dccp_set_state(sk
, DCCP_CLOSED
);
426 atomic_inc(&dccp_orphan_count
);
427 if (sk
->sk_state
== DCCP_CLOSED
)
428 inet_csk_destroy_sock(sk
);
430 /* Otherwise, socket is reprieved until protocol close. */
437 void dccp_shutdown(struct sock
*sk
, int how
)
439 dccp_pr_debug("entry\n");
442 static struct proto_ops inet_dccp_ops
= {
444 .owner
= THIS_MODULE
,
445 .release
= inet_release
,
447 .connect
= inet_stream_connect
,
448 .socketpair
= sock_no_socketpair
,
449 .accept
= inet_accept
,
450 .getname
= inet_getname
,
451 .poll
= sock_no_poll
,
453 /* FIXME: work on inet_listen to rename it to sock_common_listen */
454 .listen
= inet_dccp_listen
,
455 .shutdown
= inet_shutdown
,
456 .setsockopt
= sock_common_setsockopt
,
457 .getsockopt
= sock_common_getsockopt
,
458 .sendmsg
= inet_sendmsg
,
459 .recvmsg
= sock_common_recvmsg
,
460 .mmap
= sock_no_mmap
,
461 .sendpage
= sock_no_sendpage
,
464 extern struct net_proto_family inet_family_ops
;
466 static struct inet_protosw dccp_v4_protosw
= {
468 .protocol
= IPPROTO_DCCP
,
469 .prot
= &dccp_v4_prot
,
470 .ops
= &inet_dccp_ops
,
477 * This is the global socket data structure used for responding to
478 * the Out-of-the-blue (OOTB) packets. A control sock will be created
479 * for this socket at the initialization time.
481 struct socket
*dccp_ctl_socket
;
483 static char dccp_ctl_socket_err_msg
[] __initdata
=
484 KERN_ERR
"DCCP: Failed to create the control socket.\n";
486 static int __init
dccp_ctl_sock_init(void)
488 int rc
= sock_create_kern(PF_INET
, SOCK_DCCP
, IPPROTO_DCCP
,
491 printk(dccp_ctl_socket_err_msg
);
493 dccp_ctl_socket
->sk
->sk_allocation
= GFP_ATOMIC
;
494 inet_sk(dccp_ctl_socket
->sk
)->uc_ttl
= -1;
496 /* Unhash it so that IP input processing does not even
497 * see it, we do not wish this socket to see incoming
500 dccp_ctl_socket
->sk
->sk_prot
->unhash(dccp_ctl_socket
->sk
);
506 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
507 void dccp_ctl_sock_exit(void)
509 if (dccp_ctl_socket
!= NULL
) {
510 sock_release(dccp_ctl_socket
);
511 dccp_ctl_socket
= NULL
;
515 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit
);
518 static int __init
init_dccp_v4_mibs(void)
522 dccp_statistics
[0] = alloc_percpu(struct dccp_mib
);
523 if (dccp_statistics
[0] == NULL
)
526 dccp_statistics
[1] = alloc_percpu(struct dccp_mib
);
527 if (dccp_statistics
[1] == NULL
)
534 free_percpu(dccp_statistics
[0]);
535 dccp_statistics
[0] = NULL
;
540 static int thash_entries
;
541 module_param(thash_entries
, int, 0444);
542 MODULE_PARM_DESC(thash_entries
, "Number of ehash buckets");
544 #ifdef CONFIG_IP_DCCP_DEBUG
546 module_param(dccp_debug
, int, 0444);
547 MODULE_PARM_DESC(dccp_debug
, "Enable debug messages");
550 static int __init
dccp_init(void)
553 int ehash_order
, bhash_order
, i
;
554 int rc
= proto_register(&dccp_v4_prot
, 1);
559 dccp_hashinfo
.bind_bucket_cachep
=
560 kmem_cache_create("dccp_bind_bucket",
561 sizeof(struct inet_bind_bucket
), 0,
562 SLAB_HWCACHE_ALIGN
, NULL
, NULL
);
563 if (!dccp_hashinfo
.bind_bucket_cachep
)
564 goto out_proto_unregister
;
567 * Size and allocate the main established and bind bucket
570 * The methodology is similar to that of the buffer cache.
572 if (num_physpages
>= (128 * 1024))
573 goal
= num_physpages
>> (21 - PAGE_SHIFT
);
575 goal
= num_physpages
>> (23 - PAGE_SHIFT
);
578 goal
= (thash_entries
*
579 sizeof(struct inet_ehash_bucket
)) >> PAGE_SHIFT
;
580 for (ehash_order
= 0; (1UL << ehash_order
) < goal
; ehash_order
++)
583 dccp_hashinfo
.ehash_size
= (1UL << ehash_order
) * PAGE_SIZE
/
584 sizeof(struct inet_ehash_bucket
);
585 dccp_hashinfo
.ehash_size
>>= 1;
586 while (dccp_hashinfo
.ehash_size
&
587 (dccp_hashinfo
.ehash_size
- 1))
588 dccp_hashinfo
.ehash_size
--;
589 dccp_hashinfo
.ehash
= (struct inet_ehash_bucket
*)
590 __get_free_pages(GFP_ATOMIC
, ehash_order
);
591 } while (!dccp_hashinfo
.ehash
&& --ehash_order
> 0);
593 if (!dccp_hashinfo
.ehash
) {
594 printk(KERN_CRIT
"Failed to allocate DCCP "
595 "established hash table\n");
596 goto out_free_bind_bucket_cachep
;
599 for (i
= 0; i
< (dccp_hashinfo
.ehash_size
<< 1); i
++) {
600 rwlock_init(&dccp_hashinfo
.ehash
[i
].lock
);
601 INIT_HLIST_HEAD(&dccp_hashinfo
.ehash
[i
].chain
);
604 bhash_order
= ehash_order
;
607 dccp_hashinfo
.bhash_size
= (1UL << bhash_order
) * PAGE_SIZE
/
608 sizeof(struct inet_bind_hashbucket
);
609 if ((dccp_hashinfo
.bhash_size
> (64 * 1024)) &&
612 dccp_hashinfo
.bhash
= (struct inet_bind_hashbucket
*)
613 __get_free_pages(GFP_ATOMIC
, bhash_order
);
614 } while (!dccp_hashinfo
.bhash
&& --bhash_order
>= 0);
616 if (!dccp_hashinfo
.bhash
) {
617 printk(KERN_CRIT
"Failed to allocate DCCP bind hash table\n");
618 goto out_free_dccp_ehash
;
621 for (i
= 0; i
< dccp_hashinfo
.bhash_size
; i
++) {
622 spin_lock_init(&dccp_hashinfo
.bhash
[i
].lock
);
623 INIT_HLIST_HEAD(&dccp_hashinfo
.bhash
[i
].chain
);
626 if (init_dccp_v4_mibs())
627 goto out_free_dccp_bhash
;
630 if (inet_add_protocol(&dccp_protocol
, IPPROTO_DCCP
))
631 goto out_free_dccp_v4_mibs
;
633 inet_register_protosw(&dccp_v4_protosw
);
635 rc
= dccp_ctl_sock_init();
637 goto out_unregister_protosw
;
640 out_unregister_protosw
:
641 inet_unregister_protosw(&dccp_v4_protosw
);
642 inet_del_protocol(&dccp_protocol
, IPPROTO_DCCP
);
643 out_free_dccp_v4_mibs
:
644 free_percpu(dccp_statistics
[0]);
645 free_percpu(dccp_statistics
[1]);
646 dccp_statistics
[0] = dccp_statistics
[1] = NULL
;
648 free_pages((unsigned long)dccp_hashinfo
.bhash
, bhash_order
);
649 dccp_hashinfo
.bhash
= NULL
;
651 free_pages((unsigned long)dccp_hashinfo
.ehash
, ehash_order
);
652 dccp_hashinfo
.ehash
= NULL
;
653 out_free_bind_bucket_cachep
:
654 kmem_cache_destroy(dccp_hashinfo
.bind_bucket_cachep
);
655 dccp_hashinfo
.bind_bucket_cachep
= NULL
;
656 out_proto_unregister
:
657 proto_unregister(&dccp_v4_prot
);
661 static const char dccp_del_proto_err_msg
[] __exitdata
=
662 KERN_ERR
"can't remove dccp net_protocol\n";
664 static void __exit
dccp_fini(void)
666 inet_unregister_protosw(&dccp_v4_protosw
);
668 if (inet_del_protocol(&dccp_protocol
, IPPROTO_DCCP
) < 0)
669 printk(dccp_del_proto_err_msg
);
671 free_percpu(dccp_statistics
[0]);
672 free_percpu(dccp_statistics
[1]);
673 free_pages((unsigned long)dccp_hashinfo
.bhash
,
674 get_order(dccp_hashinfo
.bhash_size
*
675 sizeof(struct inet_bind_hashbucket
)));
676 free_pages((unsigned long)dccp_hashinfo
.ehash
,
677 get_order(dccp_hashinfo
.ehash_size
*
678 sizeof(struct inet_ehash_bucket
)));
679 kmem_cache_destroy(dccp_hashinfo
.bind_bucket_cachep
);
680 proto_unregister(&dccp_v4_prot
);
683 module_init(dccp_init
);
684 module_exit(dccp_fini
);
687 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
688 * values directly, Also cover the case where the protocol is not specified,
689 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
691 MODULE_ALIAS("net-pf-" __stringify(PF_INET
) "-proto-33-type-6");
692 MODULE_ALIAS("net-pf-" __stringify(PF_INET
) "-proto-0-type-6");
693 MODULE_LICENSE("GPL");
694 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
695 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");