4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
26 #include <net/inet_common.h>
28 #include <net/protocol.h>
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37 #include <linux/dccp.h>
42 DEFINE_SNMP_STAT(struct dccp_mib
, dccp_statistics
);
44 atomic_t dccp_orphan_count
= ATOMIC_INIT(0);
46 static struct net_protocol dccp_protocol
= {
47 .handler
= dccp_v4_rcv
,
48 .err_handler
= dccp_v4_err
,
51 const char *dccp_packet_name(const int type
)
53 static const char *dccp_packet_names
[] = {
54 [DCCP_PKT_REQUEST
] = "REQUEST",
55 [DCCP_PKT_RESPONSE
] = "RESPONSE",
56 [DCCP_PKT_DATA
] = "DATA",
57 [DCCP_PKT_ACK
] = "ACK",
58 [DCCP_PKT_DATAACK
] = "DATAACK",
59 [DCCP_PKT_CLOSEREQ
] = "CLOSEREQ",
60 [DCCP_PKT_CLOSE
] = "CLOSE",
61 [DCCP_PKT_RESET
] = "RESET",
62 [DCCP_PKT_SYNC
] = "SYNC",
63 [DCCP_PKT_SYNCACK
] = "SYNCACK",
66 if (type
>= DCCP_NR_PKT_TYPES
)
69 return dccp_packet_names
[type
];
72 EXPORT_SYMBOL_GPL(dccp_packet_name
);
74 const char *dccp_state_name(const int state
)
76 static char *dccp_state_names
[] = {
78 [DCCP_REQUESTING
] = "REQUESTING",
79 [DCCP_PARTOPEN
] = "PARTOPEN",
80 [DCCP_LISTEN
] = "LISTEN",
81 [DCCP_RESPOND
] = "RESPOND",
82 [DCCP_CLOSING
] = "CLOSING",
83 [DCCP_TIME_WAIT
] = "TIME_WAIT",
84 [DCCP_CLOSED
] = "CLOSED",
87 if (state
>= DCCP_MAX_STATES
)
88 return "INVALID STATE!";
90 return dccp_state_names
[state
];
93 EXPORT_SYMBOL_GPL(dccp_state_name
);
95 static inline int dccp_listen_start(struct sock
*sk
)
97 dccp_sk(sk
)->dccps_role
= DCCP_ROLE_LISTEN
;
98 return inet_csk_listen_start(sk
, TCP_SYNQ_HSIZE
);
101 int dccp_disconnect(struct sock
*sk
, int flags
)
103 struct inet_connection_sock
*icsk
= inet_csk(sk
);
104 struct inet_sock
*inet
= inet_sk(sk
);
106 const int old_state
= sk
->sk_state
;
108 if (old_state
!= DCCP_CLOSED
)
109 dccp_set_state(sk
, DCCP_CLOSED
);
111 /* ABORT function of RFC793 */
112 if (old_state
== DCCP_LISTEN
) {
113 inet_csk_listen_stop(sk
);
114 /* FIXME: do the active reset thing */
115 } else if (old_state
== DCCP_REQUESTING
)
116 sk
->sk_err
= ECONNRESET
;
118 dccp_clear_xmit_timers(sk
);
119 __skb_queue_purge(&sk
->sk_receive_queue
);
120 if (sk
->sk_send_head
!= NULL
) {
121 __kfree_skb(sk
->sk_send_head
);
122 sk
->sk_send_head
= NULL
;
127 if (!(sk
->sk_userlocks
& SOCK_BINDADDR_LOCK
))
128 inet_reset_saddr(sk
);
131 sock_reset_flag(sk
, SOCK_DONE
);
133 icsk
->icsk_backoff
= 0;
134 inet_csk_delack_init(sk
);
137 BUG_TRAP(!inet
->num
|| icsk
->icsk_bind_hash
);
139 sk
->sk_error_report(sk
);
143 int dccp_ioctl(struct sock
*sk
, int cmd
, unsigned long arg
)
145 dccp_pr_debug("entry\n");
149 int dccp_setsockopt(struct sock
*sk
, int level
, int optname
,
150 char *optval
, int optlen
)
152 dccp_pr_debug("entry\n");
154 if (level
!= SOL_DCCP
)
155 return ip_setsockopt(sk
, level
, optname
, optval
, optlen
);
160 int dccp_getsockopt(struct sock
*sk
, int level
, int optname
,
161 char *optval
, int *optlen
)
163 dccp_pr_debug("entry\n");
165 if (level
!= SOL_DCCP
)
166 return ip_getsockopt(sk
, level
, optname
, optval
, optlen
);
171 int dccp_sendmsg(struct kiocb
*iocb
, struct sock
*sk
, struct msghdr
*msg
,
174 const struct dccp_sock
*dp
= dccp_sk(sk
);
175 const int flags
= msg
->msg_flags
;
176 const int noblock
= flags
& MSG_DONTWAIT
;
181 if (len
> dp
->dccps_mss_cache
)
186 timeo
= sock_sndtimeo(sk
, flags
& MSG_DONTWAIT
);
189 * We have to use sk_stream_wait_connect here to set sk_write_pending,
190 * so that the trick in dccp_rcv_request_sent_state_process.
192 /* Wait for a connection to finish. */
193 if ((1 << sk
->sk_state
) & ~(DCCPF_OPEN
| DCCPF_PARTOPEN
| DCCPF_CLOSING
))
194 if ((rc
= sk_stream_wait_connect(sk
, &timeo
)) != 0)
197 size
= sk
->sk_prot
->max_header
+ len
;
199 skb
= sock_alloc_send_skb(sk
, size
, noblock
, &rc
);
205 skb_reserve(skb
, sk
->sk_prot
->max_header
);
206 rc
= memcpy_fromiovec(skb_put(skb
, len
), msg
->msg_iov
, len
);
208 struct dccp_skb_cb
*dcb
= DCCP_SKB_CB(skb
);
209 const struct dccp_ackpkts
*ap
= dp
->dccps_hc_rx_ackpkts
;
213 * XXX: This is just to match the Waikato tree CA interaction
214 * points, after the CCID3 code is stable and I have a better
215 * understanding of behaviour I'll change this to look more like
219 rc
= ccid_hc_tx_send_packet(dp
->dccps_hc_tx_ccid
, sk
,
228 delay
= schedule_timeout(delay
);
231 if (signal_pending(current
))
232 goto out_interrupted
;
234 if (!(sk
->sk_state
== DCCP_PARTOPEN
|| sk
->sk_state
== DCCP_OPEN
))
238 if (sk
->sk_state
== DCCP_PARTOPEN
) {
239 /* See 8.1.5. Handshake Completion */
240 inet_csk_schedule_ack(sk
);
241 inet_csk_reset_xmit_timer(sk
, ICSK_TIME_DACK
, inet_csk(sk
)->icsk_rto
, TCP_RTO_MAX
);
242 dcb
->dccpd_type
= DCCP_PKT_DATAACK
;
243 /* FIXME: we really should have a dccps_ack_pending or use icsk */
244 } else if (inet_csk_ack_scheduled(sk
) ||
245 (dp
->dccps_options
.dccpo_send_ack_vector
&&
246 ap
->dccpap_buf_ackno
!= DCCP_MAX_SEQNO
+ 1 &&
247 ap
->dccpap_ack_seqno
== DCCP_MAX_SEQNO
+ 1))
248 dcb
->dccpd_type
= DCCP_PKT_DATAACK
;
250 dcb
->dccpd_type
= DCCP_PKT_DATA
;
251 dccp_transmit_skb(sk
, skb
);
252 ccid_hc_tx_packet_sent(dp
->dccps_hc_tx_ccid
, sk
, 0, len
);
261 rc
= sk_stream_error(sk
, flags
, rc
);
264 rc
= sock_intr_errno(timeo
);
268 EXPORT_SYMBOL(dccp_sendmsg
);
270 int dccp_recvmsg(struct kiocb
*iocb
, struct sock
*sk
, struct msghdr
*msg
,
271 size_t len
, int nonblock
, int flags
, int *addr_len
)
273 const struct dccp_hdr
*dh
;
277 int target
; /* Read at least this many bytes */
283 if (sk
->sk_state
== DCCP_LISTEN
)
286 timeo
= sock_rcvtimeo(sk
, nonblock
);
288 /* Urgent data needs to be handled specially. */
294 seq
= &tp
->copied_seq
;
295 if (flags
& MSG_PEEK
) {
296 peek_seq
= tp
->copied_seq
;
301 target
= sock_rcvlowat(sk
, flags
& MSG_WAITALL
, len
);
309 /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
310 if (tp
->urg_data
&& tp
->urg_seq
== *seq
) {
313 if (signal_pending(current
)) {
314 copied
= timeo
? sock_intr_errno(timeo
) : -EAGAIN
;
320 /* Next get a buffer. */
322 skb
= skb_peek(&sk
->sk_receive_queue
);
330 if (dh
->dccph_type
== DCCP_PKT_DATA
||
331 dh
->dccph_type
== DCCP_PKT_DATAACK
)
334 if (dh
->dccph_type
== DCCP_PKT_RESET
||
335 dh
->dccph_type
== DCCP_PKT_CLOSE
) {
336 dccp_pr_debug("found fin ok!\n");
339 dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh
->dccph_type
));
340 BUG_TRAP(flags
& MSG_PEEK
);
342 } while (skb
!= (struct sk_buff
*)&sk
->sk_receive_queue
);
344 /* Well, if we have backlog, try to process it now yet. */
345 if (copied
>= target
&& !sk
->sk_backlog
.tail
)
350 sk
->sk_state
== DCCP_CLOSED
||
351 (sk
->sk_shutdown
& RCV_SHUTDOWN
) ||
353 signal_pending(current
) ||
357 if (sock_flag(sk
, SOCK_DONE
))
361 copied
= sock_error(sk
);
365 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
368 if (sk
->sk_state
== DCCP_CLOSED
) {
369 if (!sock_flag(sk
, SOCK_DONE
)) {
370 /* This occurs when user tries to read
371 * from never connected socket.
384 if (signal_pending(current
)) {
385 copied
= sock_intr_errno(timeo
);
390 /* FIXME: cleanup_rbuf(sk, copied); */
392 if (copied
>= target
) {
393 /* Do not sleep, just process backlog. */
397 sk_wait_data(sk
, &timeo
);
402 /* Ok so how much can we use? */
403 used
= skb
->len
- offset
;
407 if (!(flags
& MSG_TRUNC
)) {
408 err
= skb_copy_datagram_iovec(skb
, offset
,
411 /* Exception. Bailout! */
421 /* FIXME: tcp_rcv_space_adjust(sk); */
424 if (used
+ offset
< skb
->len
)
427 if (!(flags
& MSG_PEEK
))
431 if (!(flags
& MSG_PEEK
))
437 /* According to UNIX98, msg_name/msg_namelen are ignored
438 * on connected socket. I was just happy when found this 8) --ANK
441 /* Clean up data we have read: This will do ACK frames. */
442 /* FIXME: cleanup_rbuf(sk, copied); */
452 /* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */
456 static int inet_dccp_listen(struct socket
*sock
, int backlog
)
458 struct sock
*sk
= sock
->sk
;
459 unsigned char old_state
;
465 if (sock
->state
!= SS_UNCONNECTED
|| sock
->type
!= SOCK_DCCP
)
468 old_state
= sk
->sk_state
;
469 if (!((1 << old_state
) & (DCCPF_CLOSED
| DCCPF_LISTEN
)))
472 /* Really, if the socket is already in listen state
473 * we can only allow the backlog to be adjusted.
475 if (old_state
!= DCCP_LISTEN
) {
477 * FIXME: here it probably should be sk->sk_prot->listen_start
478 * see tcp_listen_start
480 err
= dccp_listen_start(sk
);
484 sk
->sk_max_ack_backlog
= backlog
;
492 static const unsigned char dccp_new_state
[] = {
493 /* current state: new state: action: */
495 [DCCP_OPEN
] = DCCP_CLOSING
| DCCP_ACTION_FIN
,
496 [DCCP_REQUESTING
] = DCCP_CLOSED
,
497 [DCCP_PARTOPEN
] = DCCP_CLOSING
| DCCP_ACTION_FIN
,
498 [DCCP_LISTEN
] = DCCP_CLOSED
,
499 [DCCP_RESPOND
] = DCCP_CLOSED
,
500 [DCCP_CLOSING
] = DCCP_CLOSED
,
501 [DCCP_TIME_WAIT
] = DCCP_CLOSED
,
502 [DCCP_CLOSED
] = DCCP_CLOSED
,
505 static int dccp_close_state(struct sock
*sk
)
507 const int next
= dccp_new_state
[sk
->sk_state
];
508 const int ns
= next
& DCCP_STATE_MASK
;
510 if (ns
!= sk
->sk_state
)
511 dccp_set_state(sk
, ns
);
513 return next
& DCCP_ACTION_FIN
;
516 void dccp_close(struct sock
*sk
, long timeout
)
522 sk
->sk_shutdown
= SHUTDOWN_MASK
;
524 if (sk
->sk_state
== DCCP_LISTEN
) {
525 dccp_set_state(sk
, DCCP_CLOSED
);
528 inet_csk_listen_stop(sk
);
530 goto adjudge_to_death
;
534 * We need to flush the recv. buffs. We do this only on the
535 * descriptor close, not protocol-sourced closes, because the
536 *reader process may not have drained the data yet!
538 /* FIXME: check for unread data */
539 while ((skb
= __skb_dequeue(&sk
->sk_receive_queue
)) != NULL
) {
543 if (sock_flag(sk
, SOCK_LINGER
) && !sk
->sk_lingertime
) {
544 /* Check zero linger _after_ checking for unread data. */
545 sk
->sk_prot
->disconnect(sk
, 0);
546 } else if (dccp_close_state(sk
)) {
550 sk_stream_wait_close(sk
, timeout
);
555 * Now socket is owned by kernel and we acquire BH lock
556 * to finish close. No need to check for user refs.
560 BUG_TRAP(!sock_owned_by_user(sk
));
565 if (sk
->sk_state
!= DCCP_CLOSED
)
566 dccp_set_state(sk
, DCCP_CLOSED
);
568 atomic_inc(&dccp_orphan_count
);
569 if (sk
->sk_state
== DCCP_CLOSED
)
570 inet_csk_destroy_sock(sk
);
572 /* Otherwise, socket is reprieved until protocol close. */
579 void dccp_shutdown(struct sock
*sk
, int how
)
581 dccp_pr_debug("entry\n");
584 struct proto_ops inet_dccp_ops
= {
586 .owner
= THIS_MODULE
,
587 .release
= inet_release
,
589 .connect
= inet_stream_connect
,
590 .socketpair
= sock_no_socketpair
,
591 .accept
= inet_accept
,
592 .getname
= inet_getname
,
593 .poll
= sock_no_poll
,
595 .listen
= inet_dccp_listen
, /* FIXME: work on inet_listen to rename it to sock_common_listen */
596 .shutdown
= inet_shutdown
,
597 .setsockopt
= sock_common_setsockopt
,
598 .getsockopt
= sock_common_getsockopt
,
599 .sendmsg
= inet_sendmsg
,
600 .recvmsg
= sock_common_recvmsg
,
601 .mmap
= sock_no_mmap
,
602 .sendpage
= sock_no_sendpage
,
605 extern struct net_proto_family inet_family_ops
;
607 static struct inet_protosw dccp_v4_protosw
= {
609 .protocol
= IPPROTO_DCCP
,
610 .prot
= &dccp_v4_prot
,
611 .ops
= &inet_dccp_ops
,
618 * This is the global socket data structure used for responding to
619 * the Out-of-the-blue (OOTB) packets. A control sock will be created
620 * for this socket at the initialization time.
622 struct socket
*dccp_ctl_socket
;
624 static char dccp_ctl_socket_err_msg
[] __initdata
=
625 KERN_ERR
"DCCP: Failed to create the control socket.\n";
627 static int __init
dccp_ctl_sock_init(void)
629 int rc
= sock_create_kern(PF_INET
, SOCK_DCCP
, IPPROTO_DCCP
,
632 printk(dccp_ctl_socket_err_msg
);
634 dccp_ctl_socket
->sk
->sk_allocation
= GFP_ATOMIC
;
635 inet_sk(dccp_ctl_socket
->sk
)->uc_ttl
= -1;
637 /* Unhash it so that IP input processing does not even
638 * see it, we do not wish this socket to see incoming
641 dccp_ctl_socket
->sk
->sk_prot
->unhash(dccp_ctl_socket
->sk
);
647 static void __exit
dccp_ctl_sock_exit(void)
649 if (dccp_ctl_socket
!= NULL
)
650 sock_release(dccp_ctl_socket
);
653 static int __init
init_dccp_v4_mibs(void)
657 dccp_statistics
[0] = alloc_percpu(struct dccp_mib
);
658 if (dccp_statistics
[0] == NULL
)
661 dccp_statistics
[1] = alloc_percpu(struct dccp_mib
);
662 if (dccp_statistics
[1] == NULL
)
669 free_percpu(dccp_statistics
[0]);
670 dccp_statistics
[0] = NULL
;
675 static int thash_entries
;
676 module_param(thash_entries
, int, 0444);
677 MODULE_PARM_DESC(thash_entries
, "Number of ehash buckets");
680 module_param(dccp_debug
, int, 0444);
681 MODULE_PARM_DESC(dccp_debug
, "Enable debug messages");
683 static int __init
dccp_init(void)
686 int ehash_order
, bhash_order
, i
;
687 int rc
= proto_register(&dccp_v4_prot
, 1);
692 dccp_hashinfo
.bind_bucket_cachep
= kmem_cache_create("dccp_bind_bucket",
693 sizeof(struct inet_bind_bucket
),
694 0, SLAB_HWCACHE_ALIGN
,
696 if (!dccp_hashinfo
.bind_bucket_cachep
)
697 goto out_proto_unregister
;
700 * Size and allocate the main established and bind bucket
703 * The methodology is similar to that of the buffer cache.
705 if (num_physpages
>= (128 * 1024))
706 goal
= num_physpages
>> (21 - PAGE_SHIFT
);
708 goal
= num_physpages
>> (23 - PAGE_SHIFT
);
711 goal
= (thash_entries
* sizeof(struct inet_ehash_bucket
)) >> PAGE_SHIFT
;
712 for (ehash_order
= 0; (1UL << ehash_order
) < goal
; ehash_order
++)
715 dccp_hashinfo
.ehash_size
= (1UL << ehash_order
) * PAGE_SIZE
/
716 sizeof(struct inet_ehash_bucket
);
717 dccp_hashinfo
.ehash_size
>>= 1;
718 while (dccp_hashinfo
.ehash_size
& (dccp_hashinfo
.ehash_size
- 1))
719 dccp_hashinfo
.ehash_size
--;
720 dccp_hashinfo
.ehash
= (struct inet_ehash_bucket
*)
721 __get_free_pages(GFP_ATOMIC
, ehash_order
);
722 } while (!dccp_hashinfo
.ehash
&& --ehash_order
> 0);
724 if (!dccp_hashinfo
.ehash
) {
725 printk(KERN_CRIT
"Failed to allocate DCCP "
726 "established hash table\n");
727 goto out_free_bind_bucket_cachep
;
730 for (i
= 0; i
< (dccp_hashinfo
.ehash_size
<< 1); i
++) {
731 rwlock_init(&dccp_hashinfo
.ehash
[i
].lock
);
732 INIT_HLIST_HEAD(&dccp_hashinfo
.ehash
[i
].chain
);
735 bhash_order
= ehash_order
;
738 dccp_hashinfo
.bhash_size
= (1UL << bhash_order
) * PAGE_SIZE
/
739 sizeof(struct inet_bind_hashbucket
);
740 if ((dccp_hashinfo
.bhash_size
> (64 * 1024)) && bhash_order
> 0)
742 dccp_hashinfo
.bhash
= (struct inet_bind_hashbucket
*)
743 __get_free_pages(GFP_ATOMIC
, bhash_order
);
744 } while (!dccp_hashinfo
.bhash
&& --bhash_order
>= 0);
746 if (!dccp_hashinfo
.bhash
) {
747 printk(KERN_CRIT
"Failed to allocate DCCP bind hash table\n");
748 goto out_free_dccp_ehash
;
751 for (i
= 0; i
< dccp_hashinfo
.bhash_size
; i
++) {
752 spin_lock_init(&dccp_hashinfo
.bhash
[i
].lock
);
753 INIT_HLIST_HEAD(&dccp_hashinfo
.bhash
[i
].chain
);
756 if (init_dccp_v4_mibs())
757 goto out_free_dccp_bhash
;
760 if (inet_add_protocol(&dccp_protocol
, IPPROTO_DCCP
))
761 goto out_free_dccp_v4_mibs
;
763 inet_register_protosw(&dccp_v4_protosw
);
765 rc
= dccp_ctl_sock_init();
767 goto out_unregister_protosw
;
770 out_unregister_protosw
:
771 inet_unregister_protosw(&dccp_v4_protosw
);
772 inet_del_protocol(&dccp_protocol
, IPPROTO_DCCP
);
773 out_free_dccp_v4_mibs
:
774 free_percpu(dccp_statistics
[0]);
775 free_percpu(dccp_statistics
[1]);
776 dccp_statistics
[0] = dccp_statistics
[1] = NULL
;
778 free_pages((unsigned long)dccp_hashinfo
.bhash
, bhash_order
);
779 dccp_hashinfo
.bhash
= NULL
;
781 free_pages((unsigned long)dccp_hashinfo
.ehash
, ehash_order
);
782 dccp_hashinfo
.ehash
= NULL
;
783 out_free_bind_bucket_cachep
:
784 kmem_cache_destroy(dccp_hashinfo
.bind_bucket_cachep
);
785 dccp_hashinfo
.bind_bucket_cachep
= NULL
;
786 out_proto_unregister
:
787 proto_unregister(&dccp_v4_prot
);
791 static const char dccp_del_proto_err_msg
[] __exitdata
=
792 KERN_ERR
"can't remove dccp net_protocol\n";
794 static void __exit
dccp_fini(void)
796 dccp_ctl_sock_exit();
798 inet_unregister_protosw(&dccp_v4_protosw
);
800 if (inet_del_protocol(&dccp_protocol
, IPPROTO_DCCP
) < 0)
801 printk(dccp_del_proto_err_msg
);
803 /* Free the control endpoint. */
804 sock_release(dccp_ctl_socket
);
806 proto_unregister(&dccp_v4_prot
);
808 kmem_cache_destroy(dccp_hashinfo
.bind_bucket_cachep
);
811 module_init(dccp_init
);
812 module_exit(dccp_fini
);
815 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
816 * values directly, Also cover the case where the protocol is not specified,
817 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
819 MODULE_ALIAS("net-pf-" __stringify(PF_INET
) "-proto-33-type-6");
820 MODULE_ALIAS("net-pf-" __stringify(PF_INET
) "-proto-0-type-6");
821 MODULE_LICENSE("GPL");
822 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
823 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");