Ok. I didn't make 2.4.0 in 2000. Tough. I tried, but we had some
[davej-history.git] / net / ipv4 / udp.c
bloba4ff40d56a9e0e51ca38e57b263f279c42c6bd04
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * The User Datagram Protocol (UDP).
8 * Version: $Id: udp.c,v 1.91 2000/11/28 13:38:38 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13 * Alan Cox, <Alan.Cox@linux.org>
15 * Fixes:
16 * Alan Cox : verify_area() calls
17 * Alan Cox : stopped close while in use off icmp
18 * messages. Not a fix but a botch that
19 * for udp at least is 'valid'.
20 * Alan Cox : Fixed icmp handling properly
21 * Alan Cox : Correct error for oversized datagrams
22 * Alan Cox : Tidied select() semantics.
23 * Alan Cox : udp_err() fixed properly, also now
24 * select and read wake correctly on errors
25 * Alan Cox : udp_send verify_area moved to avoid mem leak
26 * Alan Cox : UDP can count its memory
27 * Alan Cox : send to an unknown connection causes
28 * an ECONNREFUSED off the icmp, but
29 * does NOT close.
30 * Alan Cox : Switched to new sk_buff handlers. No more backlog!
31 * Alan Cox : Using generic datagram code. Even smaller and the PEEK
32 * bug no longer crashes it.
33 * Fred Van Kempen : Net2e support for sk->broadcast.
34 * Alan Cox : Uses skb_free_datagram
35 * Alan Cox : Added get/set sockopt support.
36 * Alan Cox : Broadcasting without option set returns EACCES.
37 * Alan Cox : No wakeup calls. Instead we now use the callbacks.
38 * Alan Cox : Use ip_tos and ip_ttl
39 * Alan Cox : SNMP Mibs
40 * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
41 * Matt Dillon : UDP length checks.
42 * Alan Cox : Smarter af_inet used properly.
43 * Alan Cox : Use new kernel side addressing.
44 * Alan Cox : Incorrect return on truncated datagram receive.
45 * Arnt Gulbrandsen : New udp_send and stuff
46 * Alan Cox : Cache last socket
47 * Alan Cox : Route cache
48 * Jon Peatfield : Minor efficiency fix to sendto().
49 * Mike Shaver : RFC1122 checks.
50 * Alan Cox : Nonblocking error fix.
51 * Willy Konynenberg : Transparent proxying support.
52 * Mike McLagan : Routing by source
53 * David S. Miller : New socket lookup architecture.
54 * Last socket cache retained as it
55 * does have a high hit rate.
56 * Olaf Kirch : Don't linearise iovec on sendmsg.
57 * Andi Kleen : Some cleanups, cache destination entry
58 * for connect.
59 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
60 * Melvin Smith : Check msg_name not msg_namelen in sendto(),
61 * return ENOTCONN for unconnected sockets (POSIX)
62 * Janos Farkas : don't deliver multi/broadcasts to a different
63 * bound-to-device socket
66 * This program is free software; you can redistribute it and/or
67 * modify it under the terms of the GNU General Public License
68 * as published by the Free Software Foundation; either version
69 * 2 of the License, or (at your option) any later version.
72 /* RFC1122 Status:
73 4.1.3.1 (Ports):
74 SHOULD send ICMP_PORT_UNREACHABLE in response to datagrams to
75 an un-listened port. (OK)
76 4.1.3.2 (IP Options)
77 MUST pass IP options from IP -> application (OK)
78 MUST allow application to specify IP options (OK)
79 4.1.3.3 (ICMP Messages)
80 MUST pass ICMP error messages to application (OK -- except when SO_BSDCOMPAT is set)
81 4.1.3.4 (UDP Checksums)
82 MUST provide facility for checksumming (OK)
83 MAY allow application to control checksumming (OK)
84 MUST default to checksumming on (OK)
85 MUST discard silently datagrams with bad csums (OK, except during debugging)
86 4.1.3.5 (UDP Multihoming)
87 MUST allow application to specify source address (OK)
88 SHOULD be able to communicate the chosen src addr up to application
89 when application doesn't choose (DOES - use recvmsg cmsgs)
90 4.1.3.6 (Invalid Addresses)
91 MUST discard invalid source addresses (OK -- done in the new routing code)
92 MUST only send datagrams with one of our addresses (OK)
95 #include <asm/system.h>
96 #include <asm/uaccess.h>
97 #include <linux/types.h>
98 #include <linux/fcntl.h>
99 #include <linux/socket.h>
100 #include <linux/sockios.h>
101 #include <linux/in.h>
102 #include <linux/errno.h>
103 #include <linux/timer.h>
104 #include <linux/mm.h>
105 #include <linux/config.h>
106 #include <linux/inet.h>
107 #include <linux/netdevice.h>
108 #include <net/snmp.h>
109 #include <net/ip.h>
110 #include <net/protocol.h>
111 #include <linux/skbuff.h>
112 #include <net/sock.h>
113 #include <net/udp.h>
114 #include <net/icmp.h>
115 #include <net/route.h>
116 #include <net/inet_common.h>
117 #include <net/checksum.h>
120 * Snmp MIB for the UDP layer
123 struct udp_mib udp_statistics[NR_CPUS*2];
125 struct sock *udp_hash[UDP_HTABLE_SIZE];
126 rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED;
128 /* Shared by v4/v6 udp. */
129 int udp_port_rover;
131 static int udp_v4_get_port(struct sock *sk, unsigned short snum)
133 write_lock_bh(&udp_hash_lock);
134 if (snum == 0) {
135 int best_size_so_far, best, result, i;
137 if (udp_port_rover > sysctl_local_port_range[1] ||
138 udp_port_rover < sysctl_local_port_range[0])
139 udp_port_rover = sysctl_local_port_range[0];
140 best_size_so_far = 32767;
141 best = result = udp_port_rover;
142 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
143 struct sock *sk;
144 int size;
146 sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
147 if (!sk) {
148 if (result > sysctl_local_port_range[1])
149 result = sysctl_local_port_range[0] +
150 ((result - sysctl_local_port_range[0]) &
151 (UDP_HTABLE_SIZE - 1));
152 goto gotit;
154 size = 0;
155 do {
156 if (++size >= best_size_so_far)
157 goto next;
158 } while ((sk = sk->next) != NULL);
159 best_size_so_far = size;
160 best = result;
161 next:
163 result = best;
164 for(;; result += UDP_HTABLE_SIZE) {
165 if (result > sysctl_local_port_range[1])
166 result = sysctl_local_port_range[0]
167 + ((result - sysctl_local_port_range[0]) &
168 (UDP_HTABLE_SIZE - 1));
169 if (!udp_lport_inuse(result))
170 break;
172 gotit:
173 udp_port_rover = snum = result;
174 } else {
175 struct sock *sk2;
177 for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
178 sk2 != NULL;
179 sk2 = sk2->next) {
180 if (sk2->num == snum &&
181 sk2 != sk &&
182 sk2->bound_dev_if == sk->bound_dev_if &&
183 (!sk2->rcv_saddr ||
184 !sk->rcv_saddr ||
185 sk2->rcv_saddr == sk->rcv_saddr) &&
186 (!sk2->reuse || !sk->reuse))
187 goto fail;
190 sk->num = snum;
191 if (sk->pprev == NULL) {
192 struct sock **skp = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
193 if ((sk->next = *skp) != NULL)
194 (*skp)->pprev = &sk->next;
195 *skp = sk;
196 sk->pprev = skp;
197 sock_prot_inc_use(sk->prot);
198 sock_hold(sk);
200 write_unlock_bh(&udp_hash_lock);
201 return 0;
203 fail:
204 write_unlock_bh(&udp_hash_lock);
205 return 1;
208 static void udp_v4_hash(struct sock *sk)
210 BUG();
213 static void udp_v4_unhash(struct sock *sk)
215 write_lock_bh(&udp_hash_lock);
216 if (sk->pprev) {
217 if (sk->next)
218 sk->next->pprev = sk->pprev;
219 *sk->pprev = sk->next;
220 sk->pprev = NULL;
221 sk->num = 0;
222 sock_prot_dec_use(sk->prot);
223 __sock_put(sk);
225 write_unlock_bh(&udp_hash_lock);
228 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
229 * harder than this. -DaveM
231 struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
233 struct sock *sk, *result = NULL;
234 unsigned short hnum = ntohs(dport);
235 int badness = -1;
237 for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
238 if(sk->num == hnum) {
239 int score = 0;
240 if(sk->rcv_saddr) {
241 if(sk->rcv_saddr != daddr)
242 continue;
243 score++;
245 if(sk->daddr) {
246 if(sk->daddr != saddr)
247 continue;
248 score++;
250 if(sk->dport) {
251 if(sk->dport != sport)
252 continue;
253 score++;
255 if(sk->bound_dev_if) {
256 if(sk->bound_dev_if != dif)
257 continue;
258 score++;
260 if(score == 4) {
261 result = sk;
262 break;
263 } else if(score > badness) {
264 result = sk;
265 badness = score;
269 return result;
272 __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
274 struct sock *sk;
276 read_lock(&udp_hash_lock);
277 sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
278 if (sk)
279 sock_hold(sk);
280 read_unlock(&udp_hash_lock);
281 return sk;
284 static inline struct sock *udp_v4_mcast_next(struct sock *sk,
285 u16 loc_port, u32 loc_addr,
286 u16 rmt_port, u32 rmt_addr,
287 int dif)
289 struct sock *s = sk;
290 unsigned short hnum = ntohs(loc_port);
291 for(; s; s = s->next) {
292 if ((s->num != hnum) ||
293 (s->daddr && s->daddr!=rmt_addr) ||
294 (s->dport != rmt_port && s->dport != 0) ||
295 (s->rcv_saddr && s->rcv_saddr != loc_addr) ||
296 (s->bound_dev_if && s->bound_dev_if != dif))
297 continue;
298 break;
300 return s;
304 * This routine is called by the ICMP module when it gets some
305 * sort of error condition. If err < 0 then the socket should
306 * be closed and the error returned to the user. If err > 0
307 * it's just the icmp type << 8 | icmp code.
308 * Header points to the ip header of the error packet. We move
309 * on past this. Then (as it used to claim before adjustment)
310 * header points to the first 8 bytes of the udp header. We need
311 * to find the appropriate port.
314 void udp_err(struct sk_buff *skb, unsigned char *dp, int len)
316 struct iphdr *iph = (struct iphdr*)dp;
317 struct udphdr *uh = (struct udphdr*)(dp+(iph->ihl<<2));
318 int type = skb->h.icmph->type;
319 int code = skb->h.icmph->code;
320 struct sock *sk;
321 int harderr;
322 u32 info;
323 int err;
325 if (len < (iph->ihl<<2)+sizeof(struct udphdr)) {
326 ICMP_INC_STATS_BH(IcmpInErrors);
327 return;
330 sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
331 if (sk == NULL) {
332 ICMP_INC_STATS_BH(IcmpInErrors);
333 return; /* No socket for error */
336 err = 0;
337 info = 0;
338 harderr = 0;
340 switch (type) {
341 default:
342 case ICMP_TIME_EXCEEDED:
343 err = EHOSTUNREACH;
344 break;
345 case ICMP_SOURCE_QUENCH:
346 goto out;
347 case ICMP_PARAMETERPROB:
348 err = EPROTO;
349 info = ntohl(skb->h.icmph->un.gateway)>>24;
350 harderr = 1;
351 break;
352 case ICMP_DEST_UNREACH:
353 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
354 if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT) {
355 err = EMSGSIZE;
356 info = ntohs(skb->h.icmph->un.frag.mtu);
357 harderr = 1;
358 break;
360 goto out;
362 err = EHOSTUNREACH;
363 if (code <= NR_ICMP_UNREACH) {
364 harderr = icmp_err_convert[code].fatal;
365 err = icmp_err_convert[code].errno;
367 break;
371 * RFC1122: OK. Passes ICMP errors back to application, as per
372 * 4.1.3.3.
374 if (!sk->protinfo.af_inet.recverr) {
375 if (!harderr || sk->state != TCP_ESTABLISHED)
376 goto out;
377 } else {
378 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
380 sk->err = err;
381 sk->error_report(sk);
382 out:
383 sock_put(sk);
387 static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
389 return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
392 struct udpfakehdr
394 struct udphdr uh;
395 u32 saddr;
396 u32 daddr;
397 struct iovec *iov;
398 u32 wcheck;
402 * Copy and checksum a UDP packet from user space into a buffer. We still have
403 * to do the planning to get ip_build_xmit to spot direct transfer to network
404 * card and provide an additional callback mode for direct user->board I/O
405 * transfers. That one will be fun.
408 static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen)
410 struct udpfakehdr *ufh = (struct udpfakehdr *)p;
411 if (offset==0) {
412 if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
413 fraglen-sizeof(struct udphdr), &ufh->wcheck))
414 return -EFAULT;
415 ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
416 ufh->wcheck);
417 ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr,
418 ntohs(ufh->uh.len),
419 IPPROTO_UDP, ufh->wcheck);
420 if (ufh->uh.check == 0)
421 ufh->uh.check = -1;
422 memcpy(to, ufh, sizeof(struct udphdr));
423 return 0;
425 if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
426 fraglen, &ufh->wcheck))
427 return -EFAULT;
428 return 0;
432 * Unchecksummed UDP is sufficiently critical to stuff like ATM video conferencing
433 * that we use two routines for this for speed. Probably we ought to have a
434 * CONFIG_FAST_NET set for >10Mb/second boards to activate this sort of coding.
435 * Timing needed to verify if this is a valid decision.
438 static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen)
440 struct udpfakehdr *ufh = (struct udpfakehdr *)p;
442 if (offset==0) {
443 memcpy(to, ufh, sizeof(struct udphdr));
444 return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
445 fraglen-sizeof(struct udphdr));
447 return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
448 fraglen);
451 int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
453 int ulen = len + sizeof(struct udphdr);
454 struct ipcm_cookie ipc;
455 struct udpfakehdr ufh;
456 struct rtable *rt = NULL;
457 int free = 0;
458 int connected = 0;
459 u32 daddr;
460 u8 tos;
461 int err;
463 /* This check is ONLY to check for arithmetic overflow
464 on integer(!) len. Not more! Real check will be made
465 in ip_build_xmit --ANK
467 BTW socket.c -> af_*.c -> ... make multiple
468 invalid conversions size_t -> int. We MUST repair it f.e.
469 by replacing all of them with size_t and revise all
470 the places sort of len += sizeof(struct iphdr)
471 If len was ULONG_MAX-10 it would be cathastrophe --ANK
474 if (len < 0 || len > 0xFFFF)
475 return -EMSGSIZE;
478 * Check the flags.
481 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
482 return -EOPNOTSUPP;
485 * Get and verify the address.
488 if (msg->msg_name) {
489 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
490 if (msg->msg_namelen < sizeof(*usin))
491 return -EINVAL;
492 if (usin->sin_family != AF_INET) {
493 if (usin->sin_family != AF_UNSPEC)
494 return -EINVAL;
497 ufh.daddr = usin->sin_addr.s_addr;
498 ufh.uh.dest = usin->sin_port;
499 if (ufh.uh.dest == 0)
500 return -EINVAL;
501 } else {
502 if (sk->state != TCP_ESTABLISHED)
503 return -ENOTCONN;
504 ufh.daddr = sk->daddr;
505 ufh.uh.dest = sk->dport;
506 /* Open fast path for connected socket.
507 Route will not be used, if at least one option is set.
509 connected = 1;
511 ipc.addr = sk->saddr;
512 ufh.uh.source = sk->sport;
514 ipc.opt = NULL;
515 ipc.oif = sk->bound_dev_if;
516 if (msg->msg_controllen) {
517 err = ip_cmsg_send(msg, &ipc);
518 if (err)
519 return err;
520 if (ipc.opt)
521 free = 1;
522 connected = 0;
524 if (!ipc.opt)
525 ipc.opt = sk->protinfo.af_inet.opt;
527 ufh.saddr = ipc.addr;
528 ipc.addr = daddr = ufh.daddr;
530 if (ipc.opt && ipc.opt->srr) {
531 if (!daddr)
532 return -EINVAL;
533 daddr = ipc.opt->faddr;
534 connected = 0;
536 tos = RT_TOS(sk->protinfo.af_inet.tos);
537 if (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) ||
538 (ipc.opt && ipc.opt->is_strictroute)) {
539 tos |= RTO_ONLINK;
540 connected = 0;
543 if (MULTICAST(daddr)) {
544 if (!ipc.oif)
545 ipc.oif = sk->protinfo.af_inet.mc_index;
546 if (!ufh.saddr)
547 ufh.saddr = sk->protinfo.af_inet.mc_addr;
548 connected = 0;
551 if (connected)
552 rt = (struct rtable*)sk_dst_check(sk, 0);
554 if (rt == NULL) {
555 err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
556 if (err)
557 goto out;
559 err = -EACCES;
560 if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast)
561 goto out;
562 if (connected)
563 sk_dst_set(sk, dst_clone(&rt->u.dst));
566 if (msg->msg_flags&MSG_CONFIRM)
567 goto do_confirm;
568 back_from_confirm:
570 ufh.saddr = rt->rt_src;
571 if (!ipc.addr)
572 ufh.daddr = ipc.addr = rt->rt_dst;
573 ufh.uh.len = htons(ulen);
574 ufh.uh.check = 0;
575 ufh.iov = msg->msg_iov;
576 ufh.wcheck = 0;
578 /* RFC1122: OK. Provides the checksumming facility (MUST) as per */
579 /* 4.1.3.4. It's configurable by the application via setsockopt() */
580 /* (MAY) and it defaults to on (MUST). */
582 err = ip_build_xmit(sk,
583 (sk->no_check == UDP_CSUM_NOXMIT ?
584 udp_getfrag_nosum :
585 udp_getfrag),
586 &ufh, ulen, &ipc, rt, msg->msg_flags);
588 out:
589 ip_rt_put(rt);
590 if (free)
591 kfree(ipc.opt);
592 if (!err) {
593 UDP_INC_STATS_USER(UdpOutDatagrams);
594 return len;
596 return err;
598 do_confirm:
599 dst_confirm(&rt->u.dst);
600 if (!(msg->msg_flags&MSG_PROBE) || len)
601 goto back_from_confirm;
602 err = 0;
603 goto out;
607 * IOCTL requests applicable to the UDP protocol
610 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
612 switch(cmd)
614 case SIOCOUTQ:
616 int amount = atomic_read(&sk->wmem_alloc);
617 return put_user(amount, (int *)arg);
620 case SIOCINQ:
622 struct sk_buff *skb;
623 unsigned long amount;
625 amount = 0;
626 spin_lock_irq(&sk->receive_queue.lock);
627 skb = skb_peek(&sk->receive_queue);
628 if (skb != NULL) {
630 * We will only return the amount
631 * of this packet since that is all
632 * that will be read.
634 amount = skb->len - sizeof(struct udphdr);
636 spin_unlock_irq(&sk->receive_queue.lock);
637 return put_user(amount, (int *)arg);
640 default:
641 return -ENOIOCTLCMD;
643 return(0);
646 static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
648 return (unsigned short)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum));
651 static __inline__ int udp_checksum_complete(struct sk_buff *skb)
653 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
654 __udp_checksum_complete(skb);
658 * This should be easy, if there is something there we
659 * return it, otherwise we block.
662 int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
663 int noblock, int flags, int *addr_len)
665 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
666 struct sk_buff *skb;
667 int copied, err;
670 * Check any passed addresses
672 if (addr_len)
673 *addr_len=sizeof(*sin);
675 if (flags & MSG_ERRQUEUE)
676 return ip_recv_error(sk, msg, len);
679 * From here the generic datagram does a lot of the work. Come
680 * the finished NET3, it will do _ALL_ the work!
683 skb = skb_recv_datagram(sk, flags, noblock, &err);
684 if (!skb)
685 goto out;
687 copied = skb->len - sizeof(struct udphdr);
688 if (copied > len) {
689 copied = len;
690 msg->msg_flags |= MSG_TRUNC;
693 if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
694 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
695 copied);
696 } else if (msg->msg_flags&MSG_TRUNC) {
697 if (__udp_checksum_complete(skb))
698 goto csum_copy_err;
699 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
700 copied);
701 } else {
702 err = copy_and_csum_toiovec(msg->msg_iov, skb, sizeof(struct udphdr));
704 if (err)
705 goto csum_copy_err;
708 if (err)
709 goto out_free;
711 sock_recv_timestamp(msg, sk, skb);
713 /* Copy the address. */
714 if (sin)
716 sin->sin_family = AF_INET;
717 sin->sin_port = skb->h.uh->source;
718 sin->sin_addr.s_addr = skb->nh.iph->saddr;
719 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
721 if (sk->protinfo.af_inet.cmsg_flags)
722 ip_cmsg_recv(msg, skb);
723 err = copied;
725 out_free:
726 skb_free_datagram(sk, skb);
727 out:
728 return err;
730 csum_copy_err:
731 UDP_INC_STATS_BH(UdpInErrors);
733 /* Clear queue. */
734 if (flags&MSG_PEEK) {
735 int clear = 0;
736 spin_lock_irq(&sk->receive_queue.lock);
737 if (skb == skb_peek(&sk->receive_queue)) {
738 __skb_unlink(skb, &sk->receive_queue);
739 clear = 1;
741 spin_unlock_irq(&sk->receive_queue.lock);
742 if (clear)
743 kfree_skb(skb);
746 skb_free_datagram(sk, skb);
748 return -EAGAIN;
751 int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
753 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
754 struct rtable *rt;
755 int err;
758 if (addr_len < sizeof(*usin))
759 return -EINVAL;
761 if (usin->sin_family != AF_INET)
762 return -EAFNOSUPPORT;
764 sk_dst_reset(sk);
766 err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
767 sk->protinfo.af_inet.tos|sk->localroute, sk->bound_dev_if);
768 if (err)
769 return err;
770 if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
771 ip_rt_put(rt);
772 return -EACCES;
774 if(!sk->saddr)
775 sk->saddr = rt->rt_src; /* Update source address */
776 if(!sk->rcv_saddr)
777 sk->rcv_saddr = rt->rt_src;
778 sk->daddr = rt->rt_dst;
779 sk->dport = usin->sin_port;
780 sk->state = TCP_ESTABLISHED;
782 sk_dst_set(sk, &rt->u.dst);
783 return(0);
786 int udp_disconnect(struct sock *sk, int flags)
789 * 1003.1g - break association.
792 sk->state = TCP_CLOSE;
793 sk->daddr = 0;
794 sk->dport = 0;
795 sk->bound_dev_if = 0;
796 if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) {
797 sk->rcv_saddr = 0;
798 sk->saddr = 0;
799 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
800 memset(&sk->net_pinfo.af_inet6.saddr, 0, 16);
801 memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, 16);
802 #endif
804 if (!(sk->userlocks&SOCK_BINDPORT_LOCK)) {
805 sk->prot->unhash(sk);
806 sk->sport = 0;
808 sk_dst_reset(sk);
809 return 0;
812 static void udp_close(struct sock *sk, long timeout)
814 inet_sock_release(sk);
817 static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
820 * Charge it to the socket, dropping if the queue is full.
823 #if defined(CONFIG_FILTER)
824 if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
825 if (__udp_checksum_complete(skb)) {
826 UDP_INC_STATS_BH(UdpInErrors);
827 IP_INC_STATS_BH(IpInDiscards);
828 ip_statistics[smp_processor_id()*2].IpInDelivers--;
829 kfree_skb(skb);
830 return -1;
832 skb->ip_summed = CHECKSUM_UNNECESSARY;
834 #endif
836 if (sock_queue_rcv_skb(sk,skb)<0) {
837 UDP_INC_STATS_BH(UdpInErrors);
838 IP_INC_STATS_BH(IpInDiscards);
839 ip_statistics[smp_processor_id()*2].IpInDelivers--;
840 kfree_skb(skb);
841 return -1;
843 UDP_INC_STATS_BH(UdpInDatagrams);
844 return 0;
848 * Multicasts and broadcasts go to each listener.
850 * Note: called only from the BH handler context,
851 * so we don't need to lock the hashes.
853 static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
854 u32 saddr, u32 daddr)
856 struct sock *sk;
857 int dif;
859 read_lock(&udp_hash_lock);
860 sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
861 dif = skb->dev->ifindex;
862 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
863 if (sk) {
864 struct sock *sknext = NULL;
866 do {
867 struct sk_buff *skb1 = skb;
869 sknext = udp_v4_mcast_next(sk->next, uh->dest, daddr,
870 uh->source, saddr, dif);
871 if(sknext)
872 skb1 = skb_clone(skb, GFP_ATOMIC);
874 if(skb1)
875 udp_queue_rcv_skb(sk, skb1);
876 sk = sknext;
877 } while(sknext);
878 } else
879 kfree_skb(skb);
880 read_unlock(&udp_hash_lock);
881 return 0;
884 /* Initialize UDP checksum. If exited with zero value (success),
885 * CHECKSUM_UNNECESSARY means, that no more checks are required.
886 * Otherwise, csum completion requires chacksumming packet body,
887 * including udp header and folding it to skb->csum.
889 static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
890 unsigned short ulen, u32 saddr, u32 daddr)
892 if (uh->check == 0) {
893 skb->ip_summed = CHECKSUM_UNNECESSARY;
894 } else if (skb->ip_summed == CHECKSUM_HW) {
895 if (udp_check(uh, ulen, saddr, daddr, skb->csum))
896 return -1;
897 skb->ip_summed = CHECKSUM_UNNECESSARY;
898 } else if (skb->ip_summed != CHECKSUM_UNNECESSARY)
899 skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
900 /* Probably, we should checksum udp header (it should be in cache
901 * in any case) and data in tiny packets (< rx copybreak).
903 return 0;
907 * All we need to do is get the socket, and then do a checksum.
910 int udp_rcv(struct sk_buff *skb, unsigned short len)
912 struct sock *sk;
913 struct udphdr *uh;
914 unsigned short ulen;
915 struct rtable *rt = (struct rtable*)skb->dst;
916 u32 saddr = skb->nh.iph->saddr;
917 u32 daddr = skb->nh.iph->daddr;
920 * Get the header.
923 uh = skb->h.uh;
924 __skb_pull(skb, skb->h.raw - skb->data);
926 IP_INC_STATS_BH(IpInDelivers);
929 * Validate the packet and the UDP length.
932 ulen = ntohs(uh->len);
934 if (ulen > len || ulen < sizeof(*uh)) {
935 NETDEBUG(printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len));
936 UDP_INC_STATS_BH(UdpInErrors);
937 kfree_skb(skb);
938 return(0);
940 skb_trim(skb, ulen);
942 if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
943 goto csum_error;
945 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
946 return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
948 sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
950 if (sk != NULL) {
951 udp_queue_rcv_skb(sk, skb);
952 sock_put(sk);
953 return 0;
956 /* No socket. Drop packet silently, if checksum is wrong */
957 if (udp_checksum_complete(skb))
958 goto csum_error;
960 UDP_INC_STATS_BH(UdpNoPorts);
961 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
964 * Hmm. We got an UDP packet to a port to which we
965 * don't wanna listen. Ignore it.
967 kfree_skb(skb);
968 return(0);
970 csum_error:
972 * RFC1122: OK. Discards the bad packet silently (as far as
973 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
975 NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
976 NIPQUAD(saddr),
977 ntohs(uh->source),
978 NIPQUAD(daddr),
979 ntohs(uh->dest),
980 ulen));
981 UDP_INC_STATS_BH(UdpInErrors);
982 kfree_skb(skb);
983 return(0);
986 static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
988 unsigned int dest, src;
989 __u16 destp, srcp;
991 dest = sp->daddr;
992 src = sp->rcv_saddr;
993 destp = ntohs(sp->dport);
994 srcp = ntohs(sp->sport);
995 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
996 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p",
997 i, src, srcp, dest, destp, sp->state,
998 atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
999 0, 0L, 0,
1000 sock_i_uid(sp), 0,
1001 sock_i_ino(sp),
1002 atomic_read(&sp->refcnt), sp);
1005 int udp_get_info(char *buffer, char **start, off_t offset, int length)
1007 int len = 0, num = 0, i;
1008 off_t pos = 0;
1009 off_t begin;
1010 char tmpbuf[129];
1012 if (offset < 128)
1013 len += sprintf(buffer, "%-127s\n",
1014 " sl local_address rem_address st tx_queue "
1015 "rx_queue tr tm->when retrnsmt uid timeout inode");
1016 pos = 128;
1017 read_lock(&udp_hash_lock);
1018 for (i = 0; i < UDP_HTABLE_SIZE; i++) {
1019 struct sock *sk;
1021 for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
1022 if (sk->family != PF_INET)
1023 continue;
1024 pos += 128;
1025 if (pos <= offset)
1026 continue;
1027 get_udp_sock(sk, tmpbuf, i);
1028 len += sprintf(buffer+len, "%-127s\n", tmpbuf);
1029 if(len >= length)
1030 goto out;
1033 out:
1034 read_unlock(&udp_hash_lock);
1035 begin = len - (pos - offset);
1036 *start = buffer + begin;
1037 len -= begin;
1038 if(len > length)
1039 len = length;
1040 if (len < 0)
1041 len = 0;
1042 return len;
1045 struct proto udp_prot = {
1046 name: "UDP",
1047 close: udp_close,
1048 connect: udp_connect,
1049 disconnect: udp_disconnect,
1050 ioctl: udp_ioctl,
1051 setsockopt: ip_setsockopt,
1052 getsockopt: ip_getsockopt,
1053 sendmsg: udp_sendmsg,
1054 recvmsg: udp_recvmsg,
1055 backlog_rcv: udp_queue_rcv_skb,
1056 hash: udp_v4_hash,
1057 unhash: udp_v4_unhash,
1058 get_port: udp_v4_get_port,