pre-2.3.4..
[davej-history.git] / net / ipv4 / udp.c
blobfdf3760139b38ec1b38ba1bf2bf0b39c847023c2
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * The User Datagram Protocol (UDP).
8 * Version: $Id: udp.c,v 1.66 1999/05/08 20:00:25 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13 * Alan Cox, <Alan.Cox@linux.org>
15 * Fixes:
16 * Alan Cox : verify_area() calls
17 * Alan Cox : stopped close while in use off icmp
18 * messages. Not a fix but a botch that
19 * for udp at least is 'valid'.
20 * Alan Cox : Fixed icmp handling properly
21 * Alan Cox : Correct error for oversized datagrams
22 * Alan Cox : Tidied select() semantics.
23 * Alan Cox : udp_err() fixed properly, also now
24 * select and read wake correctly on errors
25 * Alan Cox : udp_send verify_area moved to avoid mem leak
26 * Alan Cox : UDP can count its memory
27 * Alan Cox : send to an unknown connection causes
28 * an ECONNREFUSED off the icmp, but
29 * does NOT close.
30 * Alan Cox : Switched to new sk_buff handlers. No more backlog!
31 * Alan Cox : Using generic datagram code. Even smaller and the PEEK
32 * bug no longer crashes it.
33 * Fred Van Kempen : Net2e support for sk->broadcast.
34 * Alan Cox : Uses skb_free_datagram
35 * Alan Cox : Added get/set sockopt support.
36 * Alan Cox : Broadcasting without option set returns EACCES.
37 * Alan Cox : No wakeup calls. Instead we now use the callbacks.
38 * Alan Cox : Use ip_tos and ip_ttl
39 * Alan Cox : SNMP Mibs
40 * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
41 * Matt Dillon : UDP length checks.
42 * Alan Cox : Smarter af_inet used properly.
43 * Alan Cox : Use new kernel side addressing.
44 * Alan Cox : Incorrect return on truncated datagram receive.
45 * Arnt Gulbrandsen : New udp_send and stuff
46 * Alan Cox : Cache last socket
47 * Alan Cox : Route cache
48 * Jon Peatfield : Minor efficiency fix to sendto().
49 * Mike Shaver : RFC1122 checks.
50 * Alan Cox : Nonblocking error fix.
51 * Willy Konynenberg : Transparent proxying support.
52 * Mike McLagan : Routing by source
53 * David S. Miller : New socket lookup architecture.
54 * Last socket cache retained as it
55 * does have a high hit rate.
56 * Olaf Kirch : Don't linearise iovec on sendmsg.
57 * Andi Kleen : Some cleanups, cache destination entry
58 * for connect.
59 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
60 * Melvin Smith : Check msg_name not msg_namelen in sendto(),
61 * return ENOTCONN for unconnected sockets (POSIX)
62 * Janos Farkas : don't deliver multi/broadcasts to a different
63 * bound-to-device socket
66 * This program is free software; you can redistribute it and/or
67 * modify it under the terms of the GNU General Public License
68 * as published by the Free Software Foundation; either version
69 * 2 of the License, or (at your option) any later version.
72 /* RFC1122 Status:
73 4.1.3.1 (Ports):
74 SHOULD send ICMP_PORT_UNREACHABLE in response to datagrams to
75 an un-listened port. (OK)
76 4.1.3.2 (IP Options)
77 MUST pass IP options from IP -> application (OK)
78 MUST allow application to specify IP options (OK)
79 4.1.3.3 (ICMP Messages)
80 MUST pass ICMP error messages to application (OK -- except when SO_BSDCOMPAT is set)
81 4.1.3.4 (UDP Checksums)
82 MUST provide facility for checksumming (OK)
83 MAY allow application to control checksumming (OK)
84 MUST default to checksumming on (OK)
85 MUST discard silently datagrams with bad csums (OK, except during debugging)
86 4.1.3.5 (UDP Multihoming)
87 MUST allow application to specify source address (OK)
88 SHOULD be able to communicate the chosen src addr up to application
89 when application doesn't choose (DOES - use recvmsg cmsgs)
90 4.1.3.6 (Invalid Addresses)
91 MUST discard invalid source addresses (OK -- done in the new routing code)
92 MUST only send datagrams with one of our addresses (OK)
95 #include <asm/system.h>
96 #include <asm/uaccess.h>
97 #include <linux/types.h>
98 #include <linux/fcntl.h>
99 #include <linux/socket.h>
100 #include <linux/sockios.h>
101 #include <linux/in.h>
102 #include <linux/errno.h>
103 #include <linux/timer.h>
104 #include <linux/mm.h>
105 #include <linux/config.h>
106 #include <linux/inet.h>
107 #include <linux/netdevice.h>
108 #include <net/snmp.h>
109 #include <net/ip.h>
110 #include <net/protocol.h>
111 #include <linux/skbuff.h>
112 #include <net/sock.h>
113 #include <net/udp.h>
114 #include <net/icmp.h>
115 #include <net/route.h>
116 #include <net/checksum.h>
119 * Snmp MIB for the UDP layer
122 struct udp_mib udp_statistics;
124 struct sock *udp_hash[UDP_HTABLE_SIZE];
126 static int udp_v4_verify_bind(struct sock *sk, unsigned short snum)
128 struct sock *sk2;
129 int retval = 0, sk_reuse = sk->reuse;
131 SOCKHASH_LOCK_READ();
132 for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) {
133 if((sk2->num == snum) && (sk2 != sk)) {
134 unsigned char state = sk2->state;
135 int sk2_reuse = sk2->reuse;
137 /* Two sockets can be bound to the same port if they're
138 * bound to different interfaces.
141 if(sk2->bound_dev_if != sk->bound_dev_if)
142 continue;
144 if(!sk2->rcv_saddr || !sk->rcv_saddr) {
145 if((!sk2_reuse) ||
146 (!sk_reuse) ||
147 (state == TCP_LISTEN)) {
148 retval = 1;
149 break;
151 } else if(sk2->rcv_saddr == sk->rcv_saddr) {
152 if((!sk_reuse) ||
153 (!sk2_reuse) ||
154 (state == TCP_LISTEN)) {
155 retval = 1;
156 break;
161 SOCKHASH_UNLOCK_READ();
162 return retval;
165 static inline int udp_lport_inuse(u16 num)
167 struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
169 for(; sk != NULL; sk = sk->next) {
170 if(sk->num == num)
171 return 1;
173 return 0;
176 /* Shared by v4/v6 tcp. */
177 unsigned short udp_good_socknum(void)
179 int result;
180 static int start = 0;
181 int i, best, best_size_so_far;
183 SOCKHASH_LOCK_READ();
184 if (start > sysctl_local_port_range[1] || start < sysctl_local_port_range[0])
185 start = sysctl_local_port_range[0];
187 best_size_so_far = 32767; /* "big" num */
188 best = result = start;
190 for(i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
191 struct sock *sk;
192 int size;
194 sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
196 if(!sk) {
197 if (result > sysctl_local_port_range[1])
198 result = sysctl_local_port_range[0]
199 + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
200 goto out;
203 /* Is this one better than our best so far? */
204 size = 0;
205 do {
206 if(++size >= best_size_so_far)
207 goto next;
208 } while((sk = sk->next) != NULL);
209 best_size_so_far = size;
210 best = result;
211 next:
214 result = best;
216 for(;; result += UDP_HTABLE_SIZE) {
217 /* Get into range (but preserve hash bin)... */
218 if (result > sysctl_local_port_range[1])
219 result = sysctl_local_port_range[0]
220 + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
221 if (!udp_lport_inuse(result))
222 break;
224 out:
225 start = result;
226 SOCKHASH_UNLOCK_READ();
227 return result;
230 static void udp_v4_hash(struct sock *sk)
232 struct sock **skp;
233 int num = sk->num;
235 num &= (UDP_HTABLE_SIZE - 1);
236 skp = &udp_hash[num];
238 SOCKHASH_LOCK_WRITE();
239 sk->next = *skp;
240 *skp = sk;
241 sk->hashent = num;
242 SOCKHASH_UNLOCK_WRITE();
245 static void udp_v4_unhash(struct sock *sk)
247 struct sock **skp;
248 int num = sk->num;
250 num &= (UDP_HTABLE_SIZE - 1);
251 skp = &udp_hash[num];
253 SOCKHASH_LOCK_WRITE();
254 while(*skp != NULL) {
255 if(*skp == sk) {
256 *skp = sk->next;
257 break;
259 skp = &((*skp)->next);
261 SOCKHASH_UNLOCK_WRITE();
264 static void udp_v4_rehash(struct sock *sk)
266 struct sock **skp;
267 int num = sk->num;
268 int oldnum = sk->hashent;
270 num &= (UDP_HTABLE_SIZE - 1);
271 skp = &udp_hash[oldnum];
273 SOCKHASH_LOCK_WRITE();
274 while(*skp != NULL) {
275 if(*skp == sk) {
276 *skp = sk->next;
277 break;
279 skp = &((*skp)->next);
281 sk->next = udp_hash[num];
282 udp_hash[num] = sk;
283 sk->hashent = num;
284 SOCKHASH_UNLOCK_WRITE();
287 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
288 * harder than this. -DaveM
290 struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
292 struct sock *sk, *result = NULL;
293 unsigned short hnum = ntohs(dport);
294 int badness = -1;
296 for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
297 if((sk->num == hnum) && !(sk->dead && (sk->state == TCP_CLOSE))) {
298 int score = 0;
299 if(sk->rcv_saddr) {
300 if(sk->rcv_saddr != daddr)
301 continue;
302 score++;
304 if(sk->daddr) {
305 if(sk->daddr != saddr)
306 continue;
307 score++;
309 if(sk->dport) {
310 if(sk->dport != sport)
311 continue;
312 score++;
314 if(sk->bound_dev_if) {
315 if(sk->bound_dev_if != dif)
316 continue;
317 score++;
319 if(score == 4) {
320 result = sk;
321 break;
322 } else if(score > badness) {
323 result = sk;
324 badness = score;
328 return result;
331 __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
333 struct sock *sk;
335 SOCKHASH_LOCK_READ();
336 sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
337 SOCKHASH_UNLOCK_READ();
338 return sk;
341 #ifdef CONFIG_IP_TRANSPARENT_PROXY
342 #define secondlist(hpnum, sk, fpass) \
343 ({ struct sock *s1; if(!(sk) && (fpass)--) \
344 s1 = udp_hash[(hpnum) & (UDP_HTABLE_SIZE - 1)]; \
345 else \
346 s1 = (sk); \
347 s1; \
350 #define udp_v4_proxy_loop_init(hnum, hpnum, sk, fpass) \
351 secondlist((hpnum), udp_hash[(hnum)&(UDP_HTABLE_SIZE-1)],(fpass))
353 #define udp_v4_proxy_loop_next(hnum, hpnum, sk, fpass) \
354 secondlist((hpnum),(sk)->next,(fpass))
356 static struct sock *udp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
357 unsigned short rnum, unsigned long laddr,
358 struct device *dev, unsigned short pnum,
359 int dif)
361 struct sock *s, *result = NULL;
362 int badness = -1;
363 u32 paddr = 0;
364 unsigned short hnum = ntohs(num);
365 unsigned short hpnum = ntohs(pnum);
366 int firstpass = 1;
368 if(dev && dev->ip_ptr) {
369 struct in_device *idev = dev->ip_ptr;
371 if(idev->ifa_list)
372 paddr = idev->ifa_list->ifa_local;
375 SOCKHASH_LOCK_READ();
376 for(s = udp_v4_proxy_loop_init(hnum, hpnum, s, firstpass);
377 s != NULL;
378 s = udp_v4_proxy_loop_next(hnum, hpnum, s, firstpass)) {
379 if(s->num == hnum || s->num == hpnum) {
380 int score = 0;
381 if(s->dead && (s->state == TCP_CLOSE))
382 continue;
383 if(s->rcv_saddr) {
384 if((s->num != hpnum || s->rcv_saddr != paddr) &&
385 (s->num != hnum || s->rcv_saddr != laddr))
386 continue;
387 score++;
389 if(s->daddr) {
390 if(s->daddr != raddr)
391 continue;
392 score++;
394 if(s->dport) {
395 if(s->dport != rnum)
396 continue;
397 score++;
399 if(s->bound_dev_if) {
400 if(s->bound_dev_if != dif)
401 continue;
402 score++;
404 if(score == 4 && s->num == hnum) {
405 result = s;
406 break;
407 } else if(score > badness && (s->num == hpnum || s->rcv_saddr)) {
408 result = s;
409 badness = score;
413 SOCKHASH_UNLOCK_READ();
414 return result;
417 #undef secondlist
418 #undef udp_v4_proxy_loop_init
419 #undef udp_v4_proxy_loop_next
421 #endif
423 static inline struct sock *udp_v4_mcast_next(struct sock *sk,
424 unsigned short num,
425 unsigned long raddr,
426 unsigned short rnum,
427 unsigned long laddr,
428 int dif)
430 struct sock *s = sk;
431 unsigned short hnum = ntohs(num);
432 for(; s; s = s->next) {
433 if ((s->num != hnum) ||
434 (s->dead && (s->state == TCP_CLOSE)) ||
435 (s->daddr && s->daddr!=raddr) ||
436 (s->dport != rnum && s->dport != 0) ||
437 (s->rcv_saddr && s->rcv_saddr != laddr) ||
438 (s->bound_dev_if && s->bound_dev_if != dif))
439 continue;
440 break;
442 return s;
446 * This routine is called by the ICMP module when it gets some
447 * sort of error condition. If err < 0 then the socket should
448 * be closed and the error returned to the user. If err > 0
449 * it's just the icmp type << 8 | icmp code.
450 * Header points to the ip header of the error packet. We move
451 * on past this. Then (as it used to claim before adjustment)
452 * header points to the first 8 bytes of the udp header. We need
453 * to find the appropriate port.
456 void udp_err(struct sk_buff *skb, unsigned char *dp, int len)
458 struct iphdr *iph = (struct iphdr*)dp;
459 struct udphdr *uh = (struct udphdr*)(dp+(iph->ihl<<2));
460 int type = skb->h.icmph->type;
461 int code = skb->h.icmph->code;
462 struct sock *sk;
463 int harderr;
464 u32 info;
465 int err;
467 if (len < (iph->ihl<<2)+sizeof(struct udphdr)) {
468 icmp_statistics.IcmpInErrors++;
469 return;
472 sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
473 if (sk == NULL) {
474 icmp_statistics.IcmpInErrors++;
475 return; /* No socket for error */
478 err = 0;
479 info = 0;
480 harderr = 0;
482 switch (type) {
483 default:
484 case ICMP_TIME_EXCEEDED:
485 err = EHOSTUNREACH;
486 break;
487 case ICMP_SOURCE_QUENCH:
488 return;
489 case ICMP_PARAMETERPROB:
490 err = EPROTO;
491 info = ntohl(skb->h.icmph->un.gateway)>>24;
492 harderr = 1;
493 break;
494 case ICMP_DEST_UNREACH:
495 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
496 if (sk->ip_pmtudisc != IP_PMTUDISC_DONT) {
497 err = EMSGSIZE;
498 info = ntohs(skb->h.icmph->un.frag.mtu);
499 harderr = 1;
500 break;
502 return;
504 err = EHOSTUNREACH;
505 if (code <= NR_ICMP_UNREACH) {
506 harderr = icmp_err_convert[code].fatal;
507 err = icmp_err_convert[code].errno;
509 break;
513 * Various people wanted BSD UDP semantics. Well they've come
514 * back out because they slow down response to stuff like dead
515 * or unreachable name servers and they screw term users something
516 * chronic. Oh and it violates RFC1122. So basically fix your
517 * client code people.
521 * RFC1122: OK. Passes ICMP errors back to application, as per
522 * 4.1.3.3. After the comment above, that should be no surprise.
525 if (!harderr && !sk->ip_recverr)
526 return;
529 * 4.x BSD compatibility item. Break RFC1122 to
530 * get BSD socket semantics.
532 if(sk->bsdism && sk->state!=TCP_ESTABLISHED)
533 return;
535 if (sk->ip_recverr)
536 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
537 sk->err = err;
538 sk->error_report(sk);
542 static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
544 return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
547 struct udpfakehdr
549 struct udphdr uh;
550 u32 saddr;
551 u32 daddr;
552 struct iovec *iov;
553 u32 wcheck;
557 * Copy and checksum a UDP packet from user space into a buffer. We still have
558 * to do the planning to get ip_build_xmit to spot direct transfer to network
559 * card and provide an additional callback mode for direct user->board I/O
560 * transfers. That one will be fun.
563 static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen)
565 struct udpfakehdr *ufh = (struct udpfakehdr *)p;
566 if (offset==0) {
567 if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
568 fraglen-sizeof(struct udphdr), &ufh->wcheck))
569 return -EFAULT;
570 ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
571 ufh->wcheck);
572 ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr,
573 ntohs(ufh->uh.len),
574 IPPROTO_UDP, ufh->wcheck);
575 if (ufh->uh.check == 0)
576 ufh->uh.check = -1;
577 memcpy(to, ufh, sizeof(struct udphdr));
578 return 0;
580 if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
581 fraglen, &ufh->wcheck))
582 return -EFAULT;
583 return 0;
587 * Unchecksummed UDP is sufficiently critical to stuff like ATM video conferencing
588 * that we use two routines for this for speed. Probably we ought to have a
589 * CONFIG_FAST_NET set for >10Mb/second boards to activate this sort of coding.
590 * Timing needed to verify if this is a valid decision.
593 static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen)
595 struct udpfakehdr *ufh = (struct udpfakehdr *)p;
597 if (offset==0) {
598 memcpy(to, ufh, sizeof(struct udphdr));
599 return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
600 fraglen-sizeof(struct udphdr));
602 return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
603 fraglen);
606 int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
608 int ulen = len + sizeof(struct udphdr);
609 struct ipcm_cookie ipc;
610 struct udpfakehdr ufh;
611 struct rtable *rt = NULL;
612 int free = 0;
613 int connected = 0;
614 u32 daddr;
615 u8 tos;
616 int err;
618 /* This check is ONLY to check for arithmetic overflow
619 on integer(!) len. Not more! Real check will be made
620 in ip_build_xmit --ANK
622 BTW socket.c -> af_*.c -> ... make multiple
623 invalid conversions size_t -> int. We MUST repair it f.e.
624 by replacing all of them with size_t and revise all
625 the places sort of len += sizeof(struct iphdr)
626 If len was ULONG_MAX-10 it would be cathastrophe --ANK
629 if (len < 0 || len > 0xFFFF)
630 return -EMSGSIZE;
633 * Check the flags.
636 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
637 return -EOPNOTSUPP;
639 #ifdef CONFIG_IP_TRANSPARENT_PROXY
640 if (msg->msg_flags&~(MSG_DONTROUTE|MSG_DONTWAIT|MSG_PROXY|MSG_NOSIGNAL))
641 return -EINVAL;
642 if ((msg->msg_flags&MSG_PROXY) && !capable(CAP_NET_ADMIN))
643 return -EPERM;
644 #else
645 if (msg->msg_flags&~(MSG_DONTROUTE|MSG_DONTWAIT|MSG_NOSIGNAL))
646 return -EINVAL;
647 #endif
650 * Get and verify the address.
653 if (msg->msg_name) {
654 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
655 if (msg->msg_namelen < sizeof(*usin))
656 return(-EINVAL);
657 if (usin->sin_family != AF_INET) {
658 static int complained;
659 if (!complained++)
660 printk(KERN_WARNING "%s forgot to set AF_INET in udp sendmsg. Fix it!\n", current->comm);
661 if (usin->sin_family)
662 return -EINVAL;
664 ufh.daddr = usin->sin_addr.s_addr;
665 ufh.uh.dest = usin->sin_port;
666 if (ufh.uh.dest == 0)
667 return -EINVAL;
668 } else {
669 if (sk->state != TCP_ESTABLISHED)
670 return -ENOTCONN;
671 ufh.daddr = sk->daddr;
672 ufh.uh.dest = sk->dport;
673 /* Open fast path for connected socket.
674 Route will not be used, if at least one option is set.
676 connected = 1;
678 #ifdef CONFIG_IP_TRANSPARENT_PROXY
679 if (msg->msg_flags&MSG_PROXY) {
681 * We map the first 8 bytes of a second sockaddr_in
682 * into the last 8 (unused) bytes of a sockaddr_in.
684 struct sockaddr_in *from = (struct sockaddr_in *)msg->msg_name;
685 from = (struct sockaddr_in *)&from->sin_zero;
686 if (from->sin_family != AF_INET)
687 return -EINVAL;
688 ipc.addr = from->sin_addr.s_addr;
689 ufh.uh.source = from->sin_port;
690 if (ipc.addr == 0)
691 ipc.addr = sk->saddr;
692 connected = 0;
693 } else
694 #endif
696 ipc.addr = sk->saddr;
697 ufh.uh.source = sk->sport;
700 ipc.opt = NULL;
701 ipc.oif = sk->bound_dev_if;
702 if (msg->msg_controllen) {
703 err = ip_cmsg_send(msg, &ipc);
704 if (err)
705 return err;
706 if (ipc.opt)
707 free = 1;
708 connected = 0;
710 if (!ipc.opt)
711 ipc.opt = sk->opt;
713 ufh.saddr = ipc.addr;
714 ipc.addr = daddr = ufh.daddr;
716 if (ipc.opt && ipc.opt->srr) {
717 if (!daddr)
718 return -EINVAL;
719 daddr = ipc.opt->faddr;
720 connected = 0;
722 tos = RT_TOS(sk->ip_tos);
723 if (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) ||
724 (ipc.opt && ipc.opt->is_strictroute)) {
725 tos |= RTO_ONLINK;
726 connected = 0;
729 if (MULTICAST(daddr)) {
730 if (!ipc.oif)
731 ipc.oif = sk->ip_mc_index;
732 if (!ufh.saddr)
733 ufh.saddr = sk->ip_mc_addr;
734 connected = 0;
737 if (connected)
738 rt = (struct rtable*)dst_clone(sk->dst_cache);
740 if (rt == NULL) {
741 err = ip_route_output(&rt, daddr, ufh.saddr,
742 #ifdef CONFIG_IP_TRANSPARENT_PROXY
743 (msg->msg_flags&MSG_PROXY ? RTO_TPROXY : 0) |
744 #endif
745 tos, ipc.oif);
746 if (err)
747 goto out;
749 err = -EACCES;
750 if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast)
751 goto out;
754 ufh.saddr = rt->rt_src;
755 if (!ipc.addr)
756 ufh.daddr = ipc.addr = rt->rt_dst;
757 ufh.uh.len = htons(ulen);
758 ufh.uh.check = 0;
759 ufh.iov = msg->msg_iov;
760 ufh.wcheck = 0;
762 /* RFC1122: OK. Provides the checksumming facility (MUST) as per */
763 /* 4.1.3.4. It's configurable by the application via setsockopt() */
764 /* (MAY) and it defaults to on (MUST). */
766 err = ip_build_xmit(sk,sk->no_check ? udp_getfrag_nosum : udp_getfrag,
767 &ufh, ulen, &ipc, rt, msg->msg_flags);
769 out:
770 ip_rt_put(rt);
771 if (free)
772 kfree(ipc.opt);
773 if (!err) {
774 udp_statistics.UdpOutDatagrams++;
775 return len;
777 return err;
781 * IOCTL requests applicable to the UDP protocol
784 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
786 switch(cmd)
788 case TIOCOUTQ:
790 unsigned long amount;
792 if (sk->state == TCP_LISTEN) return(-EINVAL);
793 amount = sock_wspace(sk);
794 return put_user(amount, (int *)arg);
797 case TIOCINQ:
799 struct sk_buff *skb;
800 unsigned long amount;
802 if (sk->state == TCP_LISTEN)
803 return(-EINVAL);
804 amount = 0;
805 /* N.B. Is this interrupt safe??
806 -> Yes. Interrupts do not remove skbs. --ANK (980725)
808 skb = skb_peek(&sk->receive_queue);
809 if (skb != NULL) {
811 * We will only return the amount
812 * of this packet since that is all
813 * that will be read.
815 amount = skb->len - sizeof(struct udphdr);
817 return put_user(amount, (int *)arg);
820 default:
821 return(-ENOIOCTLCMD);
823 return(0);
826 #ifndef HAVE_CSUM_COPY_USER
827 #undef CONFIG_UDP_DELAY_CSUM
828 #endif
831 * This should be easy, if there is something there we
832 * return it, otherwise we block.
835 int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
836 int noblock, int flags, int *addr_len)
838 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
839 struct sk_buff *skb;
840 int copied, err;
843 * Check any passed addresses
845 if (addr_len)
846 *addr_len=sizeof(*sin);
848 if (flags & MSG_ERRQUEUE)
849 return ip_recv_error(sk, msg, len);
852 * From here the generic datagram does a lot of the work. Come
853 * the finished NET3, it will do _ALL_ the work!
856 skb = skb_recv_datagram(sk, flags, noblock, &err);
857 if (!skb)
858 goto out;
860 copied = skb->len - sizeof(struct udphdr);
861 if (copied > len) {
862 copied = len;
863 msg->msg_flags |= MSG_TRUNC;
866 #ifndef CONFIG_UDP_DELAY_CSUM
867 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
868 copied);
869 #else
870 if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
871 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
872 copied);
873 } else if (copied > msg->msg_iov[0].iov_len || (msg->msg_flags&MSG_TRUNC)) {
874 if ((unsigned short)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum)))
875 goto csum_copy_err;
876 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
877 copied);
878 } else {
879 unsigned int csum;
881 err = 0;
882 csum = csum_partial(skb->h.raw, sizeof(struct udphdr), skb->csum);
883 csum = csum_and_copy_to_user((char*)&skb->h.uh[1], msg->msg_iov[0].iov_base,
884 copied, csum, &err);
885 if (err)
886 goto out_free;
887 if ((unsigned short)csum_fold(csum))
888 goto csum_copy_err;
890 #endif
891 if (err)
892 goto out_free;
893 sk->stamp=skb->stamp;
895 /* Copy the address. */
896 if (sin)
898 sin->sin_family = AF_INET;
899 sin->sin_port = skb->h.uh->source;
900 sin->sin_addr.s_addr = skb->nh.iph->saddr;
901 #ifdef CONFIG_IP_TRANSPARENT_PROXY
902 if (flags&MSG_PROXY)
905 * We map the first 8 bytes of a second sockaddr_in
906 * into the last 8 (unused) bytes of a sockaddr_in.
907 * This _is_ ugly, but it's the only way to do it
908 * easily, without adding system calls.
910 struct sockaddr_in *sinto =
911 (struct sockaddr_in *) sin->sin_zero;
913 sinto->sin_family = AF_INET;
914 sinto->sin_port = skb->h.uh->dest;
915 sinto->sin_addr.s_addr = skb->nh.iph->daddr;
917 #endif
919 if (sk->ip_cmsg_flags)
920 ip_cmsg_recv(msg, skb);
921 err = copied;
923 out_free:
924 skb_free_datagram(sk, skb);
925 out:
926 return err;
928 #ifdef CONFIG_UDP_DELAY_CSUM
929 csum_copy_err:
930 udp_statistics.UdpInErrors++;
931 skb_free_datagram(sk, skb);
934 * Error for blocking case is chosen to masquerade
935 * as some normal condition.
937 return (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
938 #endif
941 int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
943 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
944 struct rtable *rt;
945 int err;
948 if (addr_len < sizeof(*usin))
949 return(-EINVAL);
952 * 1003.1g - break association.
955 if (usin->sin_family==AF_UNSPEC)
957 sk->saddr=INADDR_ANY;
958 sk->rcv_saddr=INADDR_ANY;
959 sk->daddr=INADDR_ANY;
960 sk->state = TCP_CLOSE;
961 return 0;
964 if (usin->sin_family && usin->sin_family != AF_INET)
965 return(-EAFNOSUPPORT);
967 dst_release(xchg(&sk->dst_cache, NULL));
969 err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
970 sk->ip_tos|sk->localroute, sk->bound_dev_if);
971 if (err)
972 return err;
973 if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
974 ip_rt_put(rt);
975 return -EACCES;
977 if(!sk->saddr)
978 sk->saddr = rt->rt_src; /* Update source address */
979 if(!sk->rcv_saddr)
980 sk->rcv_saddr = rt->rt_src;
981 sk->daddr = rt->rt_dst;
982 sk->dport = usin->sin_port;
983 sk->state = TCP_ESTABLISHED;
985 sk->dst_cache = &rt->u.dst;
986 return(0);
990 static void udp_close(struct sock *sk, long timeout)
992 bh_lock_sock(sk);
994 /* See for explanation: raw_close in ipv4/raw.c */
995 sk->state = TCP_CLOSE;
996 udp_v4_unhash(sk);
997 sk->dead = 1;
998 destroy_sock(sk);
1001 static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1004 * Charge it to the socket, dropping if the queue is full.
1007 #if defined(CONFIG_FILTER) && defined(CONFIG_UDP_DELAY_CSUM)
1008 if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
1009 if ((unsigned short)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum))) {
1010 udp_statistics.UdpInErrors++;
1011 ip_statistics.IpInDiscards++;
1012 ip_statistics.IpInDelivers--;
1013 kfree_skb(skb);
1014 return -1;
1016 skb->ip_summed = CHECKSUM_UNNECESSARY;
1018 #endif
1020 if (sock_queue_rcv_skb(sk,skb)<0) {
1021 udp_statistics.UdpInErrors++;
1022 ip_statistics.IpInDiscards++;
1023 ip_statistics.IpInDelivers--;
1024 kfree_skb(skb);
1025 return -1;
1027 udp_statistics.UdpInDatagrams++;
1028 return 0;
1032 static inline void udp_deliver(struct sock *sk, struct sk_buff *skb)
1034 udp_queue_rcv_skb(sk, skb);
1038 * Multicasts and broadcasts go to each listener.
1040 * Note: called only from the BH handler context,
1041 * so we don't need to lock the hashes.
1043 static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
1044 u32 saddr, u32 daddr)
1046 struct sock *sk;
1047 int dif;
1049 sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
1050 dif = skb->dev->ifindex;
1051 sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr, dif);
1052 if (sk) {
1053 struct sock *sknext = NULL;
1055 do {
1056 struct sk_buff *skb1 = skb;
1058 sknext = udp_v4_mcast_next(sk->next, uh->dest, saddr,
1059 uh->source, daddr, dif);
1060 if(sknext)
1061 skb1 = skb_clone(skb, GFP_ATOMIC);
1063 if(skb1)
1064 udp_deliver(sk, skb1);
1065 sk = sknext;
1066 } while(sknext);
1067 } else
1068 kfree_skb(skb);
1069 return 0;
1072 #ifdef CONFIG_IP_TRANSPARENT_PROXY
1074 * Check whether a received UDP packet might be for one of our
1075 * sockets.
1078 int udp_chkaddr(struct sk_buff *skb)
1080 struct iphdr *iph = skb->nh.iph;
1081 struct udphdr *uh = (struct udphdr *)(skb->nh.raw + iph->ihl*4);
1082 struct sock *sk;
1084 sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest, skb->dev->ifindex);
1085 if (!sk)
1086 return 0;
1088 /* 0 means accept all LOCAL addresses here, not all the world... */
1089 if (sk->rcv_saddr == 0)
1090 return 0;
1092 return 1;
1094 #endif
1097 * All we need to do is get the socket, and then do a checksum.
1100 int udp_rcv(struct sk_buff *skb, unsigned short len)
1102 struct sock *sk;
1103 struct udphdr *uh;
1104 unsigned short ulen;
1105 struct rtable *rt = (struct rtable*)skb->dst;
1106 u32 saddr = skb->nh.iph->saddr;
1107 u32 daddr = skb->nh.iph->daddr;
1110 * First time through the loop.. Do all the setup stuff
1111 * (including finding out the socket we go to etc)
1115 * Get the header.
1118 uh = skb->h.uh;
1119 __skb_pull(skb, skb->h.raw - skb->data);
1121 ip_statistics.IpInDelivers++;
1124 * Validate the packet and the UDP length.
1127 ulen = ntohs(uh->len);
1129 if (ulen > len || ulen < sizeof(*uh)) {
1130 NETDEBUG(printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len));
1131 udp_statistics.UdpInErrors++;
1132 kfree_skb(skb);
1133 return(0);
1135 skb_trim(skb, ulen);
1137 #ifndef CONFIG_UDP_DELAY_CSUM
1138 if (uh->check &&
1139 (((skb->ip_summed==CHECKSUM_HW)&&udp_check(uh,ulen,saddr,daddr,skb->csum)) ||
1140 ((skb->ip_summed==CHECKSUM_NONE) &&
1141 (udp_check(uh,ulen,saddr,daddr, csum_partial((char*)uh, ulen, 0))))))
1142 goto csum_error;
1143 #else
1144 if (uh->check==0)
1145 skb->ip_summed = CHECKSUM_UNNECESSARY;
1146 else if (skb->ip_summed==CHECKSUM_HW) {
1147 if (udp_check(uh,ulen,saddr,daddr,skb->csum))
1148 goto csum_error;
1149 skb->ip_summed = CHECKSUM_UNNECESSARY;
1150 } else if (skb->ip_summed != CHECKSUM_UNNECESSARY)
1151 skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
1152 #endif
1154 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1155 return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
1157 #ifdef CONFIG_IP_TRANSPARENT_PROXY
1158 if (IPCB(skb)->redirport)
1159 sk = udp_v4_proxy_lookup(uh->dest, saddr, uh->source,
1160 daddr, skb->dev, IPCB(skb)->redirport,
1161 skb->dev->ifindex);
1162 else
1163 #endif
1164 sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
1166 if (sk == NULL) {
1167 #ifdef CONFIG_UDP_DELAY_CSUM
1168 if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
1169 (unsigned short)csum_fold(csum_partial((char*)uh, ulen, skb->csum)))
1170 goto csum_error;
1171 #endif
1172 udp_statistics.UdpNoPorts++;
1173 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1176 * Hmm. We got an UDP broadcast to a port to which we
1177 * don't wanna listen. Ignore it.
1179 kfree_skb(skb);
1180 return(0);
1182 udp_deliver(sk, skb);
1183 return 0;
1185 csum_error:
1187 * RFC1122: OK. Discards the bad packet silently (as far as
1188 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1190 NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
1191 NIPQUAD(saddr),
1192 ntohs(uh->source),
1193 NIPQUAD(daddr),
1194 ntohs(uh->dest),
1195 ulen));
1196 udp_statistics.UdpInErrors++;
1197 kfree_skb(skb);
1198 return(0);
1201 struct proto udp_prot = {
1202 (struct sock *)&udp_prot, /* sklist_next */
1203 (struct sock *)&udp_prot, /* sklist_prev */
1204 udp_close, /* close */
1205 udp_connect, /* connect */
1206 NULL, /* accept */
1207 NULL, /* retransmit */
1208 NULL, /* write_wakeup */
1209 NULL, /* read_wakeup */
1210 datagram_poll, /* poll */
1211 udp_ioctl, /* ioctl */
1212 NULL, /* init */
1213 NULL, /* destroy */
1214 NULL, /* shutdown */
1215 ip_setsockopt, /* setsockopt */
1216 ip_getsockopt, /* getsockopt */
1217 udp_sendmsg, /* sendmsg */
1218 udp_recvmsg, /* recvmsg */
1219 NULL, /* bind */
1220 udp_queue_rcv_skb, /* backlog_rcv */
1221 udp_v4_hash, /* hash */
1222 udp_v4_unhash, /* unhash */
1223 udp_v4_rehash, /* rehash */
1224 udp_good_socknum, /* good_socknum */
1225 udp_v4_verify_bind, /* verify_bind */
1226 128, /* max_header */
1227 0, /* retransmits */
1228 "UDP", /* name */
1229 0, /* inuse */
1230 0 /* highestinuse */