[PATCH] x86_64: Fix swiotlb dma_alloc_coherent fallback
[linux-2.6/kvm.git] / net / dccp / proto.c
blob65b11ea90d858818db105a4855b2a285aacf7ab2
1 /*
2 * net/dccp/proto.c
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
26 #include <net/inet_common.h>
27 #include <net/inet_sock.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
38 #include "ccid.h"
39 #include "dccp.h"
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43 EXPORT_SYMBOL_GPL(dccp_statistics);
45 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
47 EXPORT_SYMBOL_GPL(dccp_orphan_count);
49 static struct net_protocol dccp_protocol = {
50 .handler = dccp_v4_rcv,
51 .err_handler = dccp_v4_err,
52 .no_policy = 1,
55 const char *dccp_packet_name(const int type)
57 static const char *dccp_packet_names[] = {
58 [DCCP_PKT_REQUEST] = "REQUEST",
59 [DCCP_PKT_RESPONSE] = "RESPONSE",
60 [DCCP_PKT_DATA] = "DATA",
61 [DCCP_PKT_ACK] = "ACK",
62 [DCCP_PKT_DATAACK] = "DATAACK",
63 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
64 [DCCP_PKT_CLOSE] = "CLOSE",
65 [DCCP_PKT_RESET] = "RESET",
66 [DCCP_PKT_SYNC] = "SYNC",
67 [DCCP_PKT_SYNCACK] = "SYNCACK",
70 if (type >= DCCP_NR_PKT_TYPES)
71 return "INVALID";
72 else
73 return dccp_packet_names[type];
76 EXPORT_SYMBOL_GPL(dccp_packet_name);
78 const char *dccp_state_name(const int state)
80 static char *dccp_state_names[] = {
81 [DCCP_OPEN] = "OPEN",
82 [DCCP_REQUESTING] = "REQUESTING",
83 [DCCP_PARTOPEN] = "PARTOPEN",
84 [DCCP_LISTEN] = "LISTEN",
85 [DCCP_RESPOND] = "RESPOND",
86 [DCCP_CLOSING] = "CLOSING",
87 [DCCP_TIME_WAIT] = "TIME_WAIT",
88 [DCCP_CLOSED] = "CLOSED",
91 if (state >= DCCP_MAX_STATES)
92 return "INVALID STATE!";
93 else
94 return dccp_state_names[state];
97 EXPORT_SYMBOL_GPL(dccp_state_name);
99 static inline int dccp_listen_start(struct sock *sk)
101 struct dccp_sock *dp = dccp_sk(sk);
103 dp->dccps_role = DCCP_ROLE_LISTEN;
105 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
106 * before calling listen()
108 if (dccp_service_not_initialized(sk))
109 return -EPROTO;
110 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
113 int dccp_disconnect(struct sock *sk, int flags)
115 struct inet_connection_sock *icsk = inet_csk(sk);
116 struct inet_sock *inet = inet_sk(sk);
117 int err = 0;
118 const int old_state = sk->sk_state;
120 if (old_state != DCCP_CLOSED)
121 dccp_set_state(sk, DCCP_CLOSED);
123 /* ABORT function of RFC793 */
124 if (old_state == DCCP_LISTEN) {
125 inet_csk_listen_stop(sk);
126 /* FIXME: do the active reset thing */
127 } else if (old_state == DCCP_REQUESTING)
128 sk->sk_err = ECONNRESET;
130 dccp_clear_xmit_timers(sk);
131 __skb_queue_purge(&sk->sk_receive_queue);
132 if (sk->sk_send_head != NULL) {
133 __kfree_skb(sk->sk_send_head);
134 sk->sk_send_head = NULL;
137 inet->dport = 0;
139 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
140 inet_reset_saddr(sk);
142 sk->sk_shutdown = 0;
143 sock_reset_flag(sk, SOCK_DONE);
145 icsk->icsk_backoff = 0;
146 inet_csk_delack_init(sk);
147 __sk_dst_reset(sk);
149 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
151 sk->sk_error_report(sk);
152 return err;
155 EXPORT_SYMBOL_GPL(dccp_disconnect);
158 * Wait for a DCCP event.
160 * Note that we don't need to lock the socket, as the upper poll layers
161 * take care of normal races (between the test and the event) and we don't
162 * go look at any of the socket buffers directly.
164 unsigned int dccp_poll(struct file *file, struct socket *sock,
165 poll_table *wait)
167 unsigned int mask;
168 struct sock *sk = sock->sk;
170 poll_wait(file, sk->sk_sleep, wait);
171 if (sk->sk_state == DCCP_LISTEN)
172 return inet_csk_listen_poll(sk);
174 /* Socket is not locked. We are protected from async events
175 by poll logic and correct handling of state changes
176 made by another threads is impossible in any case.
179 mask = 0;
180 if (sk->sk_err)
181 mask = POLLERR;
183 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
184 mask |= POLLHUP;
185 if (sk->sk_shutdown & RCV_SHUTDOWN)
186 mask |= POLLIN | POLLRDNORM;
188 /* Connected? */
189 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
190 if (atomic_read(&sk->sk_rmem_alloc) > 0)
191 mask |= POLLIN | POLLRDNORM;
193 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
194 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
195 mask |= POLLOUT | POLLWRNORM;
196 } else { /* send SIGIO later */
197 set_bit(SOCK_ASYNC_NOSPACE,
198 &sk->sk_socket->flags);
199 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
201 /* Race breaker. If space is freed after
202 * wspace test but before the flags are set,
203 * IO signal will be lost.
205 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
206 mask |= POLLOUT | POLLWRNORM;
210 return mask;
213 EXPORT_SYMBOL_GPL(dccp_poll);
215 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
217 dccp_pr_debug("entry\n");
218 return -ENOIOCTLCMD;
221 EXPORT_SYMBOL_GPL(dccp_ioctl);
223 static int dccp_setsockopt_service(struct sock *sk, const u32 service,
224 char __user *optval, int optlen)
226 struct dccp_sock *dp = dccp_sk(sk);
227 struct dccp_service_list *sl = NULL;
229 if (service == DCCP_SERVICE_INVALID_VALUE ||
230 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
231 return -EINVAL;
233 if (optlen > sizeof(service)) {
234 sl = kmalloc(optlen, GFP_KERNEL);
235 if (sl == NULL)
236 return -ENOMEM;
238 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
239 if (copy_from_user(sl->dccpsl_list,
240 optval + sizeof(service),
241 optlen - sizeof(service)) ||
242 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
243 kfree(sl);
244 return -EFAULT;
248 lock_sock(sk);
249 dp->dccps_service = service;
251 kfree(dp->dccps_service_list);
253 dp->dccps_service_list = sl;
254 release_sock(sk);
255 return 0;
258 int dccp_setsockopt(struct sock *sk, int level, int optname,
259 char __user *optval, int optlen)
261 struct dccp_sock *dp;
262 int err;
263 int val;
265 if (level != SOL_DCCP)
266 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
267 optname, optval,
268 optlen);
270 if (optlen < sizeof(int))
271 return -EINVAL;
273 if (get_user(val, (int __user *)optval))
274 return -EFAULT;
276 if (optname == DCCP_SOCKOPT_SERVICE)
277 return dccp_setsockopt_service(sk, val, optval, optlen);
279 lock_sock(sk);
280 dp = dccp_sk(sk);
281 err = 0;
283 switch (optname) {
284 case DCCP_SOCKOPT_PACKET_SIZE:
285 dp->dccps_packet_size = val;
286 break;
287 default:
288 err = -ENOPROTOOPT;
289 break;
292 release_sock(sk);
293 return err;
296 EXPORT_SYMBOL_GPL(dccp_setsockopt);
298 static int dccp_getsockopt_service(struct sock *sk, int len,
299 u32 __user *optval,
300 int __user *optlen)
302 const struct dccp_sock *dp = dccp_sk(sk);
303 const struct dccp_service_list *sl;
304 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
306 lock_sock(sk);
307 if (dccp_service_not_initialized(sk))
308 goto out;
310 if ((sl = dp->dccps_service_list) != NULL) {
311 slen = sl->dccpsl_nr * sizeof(u32);
312 total_len += slen;
315 err = -EINVAL;
316 if (total_len > len)
317 goto out;
319 err = 0;
320 if (put_user(total_len, optlen) ||
321 put_user(dp->dccps_service, optval) ||
322 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
323 err = -EFAULT;
324 out:
325 release_sock(sk);
326 return err;
329 int dccp_getsockopt(struct sock *sk, int level, int optname,
330 char __user *optval, int __user *optlen)
332 struct dccp_sock *dp;
333 int val, len;
335 if (level != SOL_DCCP)
336 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
337 optname, optval,
338 optlen);
339 if (get_user(len, optlen))
340 return -EFAULT;
342 if (len < sizeof(int))
343 return -EINVAL;
345 dp = dccp_sk(sk);
347 switch (optname) {
348 case DCCP_SOCKOPT_PACKET_SIZE:
349 val = dp->dccps_packet_size;
350 len = sizeof(dp->dccps_packet_size);
351 break;
352 case DCCP_SOCKOPT_SERVICE:
353 return dccp_getsockopt_service(sk, len,
354 (u32 __user *)optval, optlen);
355 case 128 ... 191:
356 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
357 len, (u32 __user *)optval, optlen);
358 case 192 ... 255:
359 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
360 len, (u32 __user *)optval, optlen);
361 default:
362 return -ENOPROTOOPT;
365 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
366 return -EFAULT;
368 return 0;
371 EXPORT_SYMBOL_GPL(dccp_getsockopt);
373 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
374 size_t len)
376 const struct dccp_sock *dp = dccp_sk(sk);
377 const int flags = msg->msg_flags;
378 const int noblock = flags & MSG_DONTWAIT;
379 struct sk_buff *skb;
380 int rc, size;
381 long timeo;
383 if (len > dp->dccps_mss_cache)
384 return -EMSGSIZE;
386 lock_sock(sk);
387 timeo = sock_sndtimeo(sk, noblock);
390 * We have to use sk_stream_wait_connect here to set sk_write_pending,
391 * so that the trick in dccp_rcv_request_sent_state_process.
393 /* Wait for a connection to finish. */
394 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
395 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
396 goto out_release;
398 size = sk->sk_prot->max_header + len;
399 release_sock(sk);
400 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
401 lock_sock(sk);
402 if (skb == NULL)
403 goto out_release;
405 skb_reserve(skb, sk->sk_prot->max_header);
406 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
407 if (rc != 0)
408 goto out_discard;
410 rc = dccp_write_xmit(sk, skb, &timeo);
412 * XXX we don't use sk_write_queue, so just discard the packet.
413 * Current plan however is to _use_ sk_write_queue with
414 * an algorith similar to tcp_sendmsg, where the main difference
415 * is that in DCCP we have to respect packet boundaries, so
416 * no coalescing of skbs.
418 * This bug was _quickly_ found & fixed by just looking at an OSTRA
419 * generated callgraph 8) -acme
421 out_release:
422 release_sock(sk);
423 return rc ? : len;
424 out_discard:
425 kfree_skb(skb);
426 goto out_release;
429 EXPORT_SYMBOL_GPL(dccp_sendmsg);
431 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
432 size_t len, int nonblock, int flags, int *addr_len)
434 const struct dccp_hdr *dh;
435 long timeo;
437 lock_sock(sk);
439 if (sk->sk_state == DCCP_LISTEN) {
440 len = -ENOTCONN;
441 goto out;
444 timeo = sock_rcvtimeo(sk, nonblock);
446 do {
447 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
449 if (skb == NULL)
450 goto verify_sock_status;
452 dh = dccp_hdr(skb);
454 if (dh->dccph_type == DCCP_PKT_DATA ||
455 dh->dccph_type == DCCP_PKT_DATAACK)
456 goto found_ok_skb;
458 if (dh->dccph_type == DCCP_PKT_RESET ||
459 dh->dccph_type == DCCP_PKT_CLOSE) {
460 dccp_pr_debug("found fin ok!\n");
461 len = 0;
462 goto found_fin_ok;
464 dccp_pr_debug("packet_type=%s\n",
465 dccp_packet_name(dh->dccph_type));
466 sk_eat_skb(sk, skb);
467 verify_sock_status:
468 if (sock_flag(sk, SOCK_DONE)) {
469 len = 0;
470 break;
473 if (sk->sk_err) {
474 len = sock_error(sk);
475 break;
478 if (sk->sk_shutdown & RCV_SHUTDOWN) {
479 len = 0;
480 break;
483 if (sk->sk_state == DCCP_CLOSED) {
484 if (!sock_flag(sk, SOCK_DONE)) {
485 /* This occurs when user tries to read
486 * from never connected socket.
488 len = -ENOTCONN;
489 break;
491 len = 0;
492 break;
495 if (!timeo) {
496 len = -EAGAIN;
497 break;
500 if (signal_pending(current)) {
501 len = sock_intr_errno(timeo);
502 break;
505 sk_wait_data(sk, &timeo);
506 continue;
507 found_ok_skb:
508 if (len > skb->len)
509 len = skb->len;
510 else if (len < skb->len)
511 msg->msg_flags |= MSG_TRUNC;
513 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
514 /* Exception. Bailout! */
515 len = -EFAULT;
516 break;
518 found_fin_ok:
519 if (!(flags & MSG_PEEK))
520 sk_eat_skb(sk, skb);
521 break;
522 } while (1);
523 out:
524 release_sock(sk);
525 return len;
528 EXPORT_SYMBOL_GPL(dccp_recvmsg);
530 int inet_dccp_listen(struct socket *sock, int backlog)
532 struct sock *sk = sock->sk;
533 unsigned char old_state;
534 int err;
536 lock_sock(sk);
538 err = -EINVAL;
539 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
540 goto out;
542 old_state = sk->sk_state;
543 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
544 goto out;
546 /* Really, if the socket is already in listen state
547 * we can only allow the backlog to be adjusted.
549 if (old_state != DCCP_LISTEN) {
551 * FIXME: here it probably should be sk->sk_prot->listen_start
552 * see tcp_listen_start
554 err = dccp_listen_start(sk);
555 if (err)
556 goto out;
558 sk->sk_max_ack_backlog = backlog;
559 err = 0;
561 out:
562 release_sock(sk);
563 return err;
566 EXPORT_SYMBOL_GPL(inet_dccp_listen);
568 static const unsigned char dccp_new_state[] = {
569 /* current state: new state: action: */
570 [0] = DCCP_CLOSED,
571 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
572 [DCCP_REQUESTING] = DCCP_CLOSED,
573 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
574 [DCCP_LISTEN] = DCCP_CLOSED,
575 [DCCP_RESPOND] = DCCP_CLOSED,
576 [DCCP_CLOSING] = DCCP_CLOSED,
577 [DCCP_TIME_WAIT] = DCCP_CLOSED,
578 [DCCP_CLOSED] = DCCP_CLOSED,
581 static int dccp_close_state(struct sock *sk)
583 const int next = dccp_new_state[sk->sk_state];
584 const int ns = next & DCCP_STATE_MASK;
586 if (ns != sk->sk_state)
587 dccp_set_state(sk, ns);
589 return next & DCCP_ACTION_FIN;
592 void dccp_close(struct sock *sk, long timeout)
594 struct sk_buff *skb;
596 lock_sock(sk);
598 sk->sk_shutdown = SHUTDOWN_MASK;
600 if (sk->sk_state == DCCP_LISTEN) {
601 dccp_set_state(sk, DCCP_CLOSED);
603 /* Special case. */
604 inet_csk_listen_stop(sk);
606 goto adjudge_to_death;
610 * We need to flush the recv. buffs. We do this only on the
611 * descriptor close, not protocol-sourced closes, because the
612 *reader process may not have drained the data yet!
614 /* FIXME: check for unread data */
615 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
616 __kfree_skb(skb);
619 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
620 /* Check zero linger _after_ checking for unread data. */
621 sk->sk_prot->disconnect(sk, 0);
622 } else if (dccp_close_state(sk)) {
623 dccp_send_close(sk, 1);
626 sk_stream_wait_close(sk, timeout);
628 adjudge_to_death:
630 * It is the last release_sock in its life. It will remove backlog.
632 release_sock(sk);
634 * Now socket is owned by kernel and we acquire BH lock
635 * to finish close. No need to check for user refs.
637 local_bh_disable();
638 bh_lock_sock(sk);
639 BUG_TRAP(!sock_owned_by_user(sk));
641 sock_hold(sk);
642 sock_orphan(sk);
645 * The last release_sock may have processed the CLOSE or RESET
646 * packet moving sock to CLOSED state, if not we have to fire
647 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
648 * in draft-ietf-dccp-spec-11. -acme
650 if (sk->sk_state == DCCP_CLOSING) {
651 /* FIXME: should start at 2 * RTT */
652 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
653 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
654 inet_csk(sk)->icsk_rto,
655 DCCP_RTO_MAX);
656 #if 0
657 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
658 dccp_set_state(sk, DCCP_CLOSED);
659 #endif
662 atomic_inc(sk->sk_prot->orphan_count);
663 if (sk->sk_state == DCCP_CLOSED)
664 inet_csk_destroy_sock(sk);
666 /* Otherwise, socket is reprieved until protocol close. */
668 bh_unlock_sock(sk);
669 local_bh_enable();
670 sock_put(sk);
673 EXPORT_SYMBOL_GPL(dccp_close);
675 void dccp_shutdown(struct sock *sk, int how)
677 dccp_pr_debug("entry\n");
680 EXPORT_SYMBOL_GPL(dccp_shutdown);
682 static const struct proto_ops inet_dccp_ops = {
683 .family = PF_INET,
684 .owner = THIS_MODULE,
685 .release = inet_release,
686 .bind = inet_bind,
687 .connect = inet_stream_connect,
688 .socketpair = sock_no_socketpair,
689 .accept = inet_accept,
690 .getname = inet_getname,
691 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
692 .poll = dccp_poll,
693 .ioctl = inet_ioctl,
694 /* FIXME: work on inet_listen to rename it to sock_common_listen */
695 .listen = inet_dccp_listen,
696 .shutdown = inet_shutdown,
697 .setsockopt = sock_common_setsockopt,
698 .getsockopt = sock_common_getsockopt,
699 .sendmsg = inet_sendmsg,
700 .recvmsg = sock_common_recvmsg,
701 .mmap = sock_no_mmap,
702 .sendpage = sock_no_sendpage,
705 extern struct net_proto_family inet_family_ops;
707 static struct inet_protosw dccp_v4_protosw = {
708 .type = SOCK_DCCP,
709 .protocol = IPPROTO_DCCP,
710 .prot = &dccp_prot,
711 .ops = &inet_dccp_ops,
712 .capability = -1,
713 .no_check = 0,
714 .flags = INET_PROTOSW_ICSK,
718 * This is the global socket data structure used for responding to
719 * the Out-of-the-blue (OOTB) packets. A control sock will be created
720 * for this socket at the initialization time.
722 struct socket *dccp_ctl_socket;
724 static char dccp_ctl_socket_err_msg[] __initdata =
725 KERN_ERR "DCCP: Failed to create the control socket.\n";
727 static int __init dccp_ctl_sock_init(void)
729 int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
730 &dccp_ctl_socket);
731 if (rc < 0)
732 printk(dccp_ctl_socket_err_msg);
733 else {
734 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
735 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
737 /* Unhash it so that IP input processing does not even
738 * see it, we do not wish this socket to see incoming
739 * packets.
741 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
744 return rc;
747 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
748 void dccp_ctl_sock_exit(void)
750 if (dccp_ctl_socket != NULL) {
751 sock_release(dccp_ctl_socket);
752 dccp_ctl_socket = NULL;
756 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
757 #endif
759 static int __init init_dccp_v4_mibs(void)
761 int rc = -ENOMEM;
763 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
764 if (dccp_statistics[0] == NULL)
765 goto out;
767 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
768 if (dccp_statistics[1] == NULL)
769 goto out_free_one;
771 rc = 0;
772 out:
773 return rc;
774 out_free_one:
775 free_percpu(dccp_statistics[0]);
776 dccp_statistics[0] = NULL;
777 goto out;
781 static int thash_entries;
782 module_param(thash_entries, int, 0444);
783 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
785 #ifdef CONFIG_IP_DCCP_DEBUG
786 int dccp_debug;
787 module_param(dccp_debug, int, 0444);
788 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
790 EXPORT_SYMBOL_GPL(dccp_debug);
791 #endif
793 static int __init dccp_init(void)
795 unsigned long goal;
796 int ehash_order, bhash_order, i;
797 int rc = proto_register(&dccp_prot, 1);
799 if (rc)
800 goto out;
802 dccp_hashinfo.bind_bucket_cachep =
803 kmem_cache_create("dccp_bind_bucket",
804 sizeof(struct inet_bind_bucket), 0,
805 SLAB_HWCACHE_ALIGN, NULL, NULL);
806 if (!dccp_hashinfo.bind_bucket_cachep)
807 goto out_proto_unregister;
810 * Size and allocate the main established and bind bucket
811 * hash tables.
813 * The methodology is similar to that of the buffer cache.
815 if (num_physpages >= (128 * 1024))
816 goal = num_physpages >> (21 - PAGE_SHIFT);
817 else
818 goal = num_physpages >> (23 - PAGE_SHIFT);
820 if (thash_entries)
821 goal = (thash_entries *
822 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
823 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
825 do {
826 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
827 sizeof(struct inet_ehash_bucket);
828 dccp_hashinfo.ehash_size >>= 1;
829 while (dccp_hashinfo.ehash_size &
830 (dccp_hashinfo.ehash_size - 1))
831 dccp_hashinfo.ehash_size--;
832 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
833 __get_free_pages(GFP_ATOMIC, ehash_order);
834 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
836 if (!dccp_hashinfo.ehash) {
837 printk(KERN_CRIT "Failed to allocate DCCP "
838 "established hash table\n");
839 goto out_free_bind_bucket_cachep;
842 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
843 rwlock_init(&dccp_hashinfo.ehash[i].lock);
844 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
847 bhash_order = ehash_order;
849 do {
850 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
851 sizeof(struct inet_bind_hashbucket);
852 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
853 bhash_order > 0)
854 continue;
855 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
856 __get_free_pages(GFP_ATOMIC, bhash_order);
857 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
859 if (!dccp_hashinfo.bhash) {
860 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
861 goto out_free_dccp_ehash;
864 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
865 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
866 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
869 if (init_dccp_v4_mibs())
870 goto out_free_dccp_bhash;
872 rc = -EAGAIN;
873 if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
874 goto out_free_dccp_v4_mibs;
876 inet_register_protosw(&dccp_v4_protosw);
878 rc = dccp_ctl_sock_init();
879 if (rc)
880 goto out_unregister_protosw;
881 out:
882 return rc;
883 out_unregister_protosw:
884 inet_unregister_protosw(&dccp_v4_protosw);
885 inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
886 out_free_dccp_v4_mibs:
887 free_percpu(dccp_statistics[0]);
888 free_percpu(dccp_statistics[1]);
889 dccp_statistics[0] = dccp_statistics[1] = NULL;
890 out_free_dccp_bhash:
891 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
892 dccp_hashinfo.bhash = NULL;
893 out_free_dccp_ehash:
894 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
895 dccp_hashinfo.ehash = NULL;
896 out_free_bind_bucket_cachep:
897 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
898 dccp_hashinfo.bind_bucket_cachep = NULL;
899 out_proto_unregister:
900 proto_unregister(&dccp_prot);
901 goto out;
904 static const char dccp_del_proto_err_msg[] __exitdata =
905 KERN_ERR "can't remove dccp net_protocol\n";
907 static void __exit dccp_fini(void)
909 inet_unregister_protosw(&dccp_v4_protosw);
911 if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
912 printk(dccp_del_proto_err_msg);
914 free_percpu(dccp_statistics[0]);
915 free_percpu(dccp_statistics[1]);
916 free_pages((unsigned long)dccp_hashinfo.bhash,
917 get_order(dccp_hashinfo.bhash_size *
918 sizeof(struct inet_bind_hashbucket)));
919 free_pages((unsigned long)dccp_hashinfo.ehash,
920 get_order(dccp_hashinfo.ehash_size *
921 sizeof(struct inet_ehash_bucket)));
922 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
923 proto_unregister(&dccp_prot);
926 module_init(dccp_init);
927 module_exit(dccp_fini);
930 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
931 * values directly, Also cover the case where the protocol is not specified,
932 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
934 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
935 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
936 MODULE_LICENSE("GPL");
937 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
938 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");