Import 2.4.0-test3pre4
[davej-history.git] / net / ipv4 / udp.c
bloba14c984d7b50de55454199bc2cc108f8ff50548b
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * The User Datagram Protocol (UDP).
8 * Version: $Id: udp.c,v 1.83 2000/06/09 07:35:49 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13 * Alan Cox, <Alan.Cox@linux.org>
15 * Fixes:
16 * Alan Cox : verify_area() calls
17 * Alan Cox : stopped close while in use off icmp
18 * messages. Not a fix but a botch that
19 * for udp at least is 'valid'.
20 * Alan Cox : Fixed icmp handling properly
21 * Alan Cox : Correct error for oversized datagrams
22 * Alan Cox : Tidied select() semantics.
23 * Alan Cox : udp_err() fixed properly, also now
24 * select and read wake correctly on errors
25 * Alan Cox : udp_send verify_area moved to avoid mem leak
26 * Alan Cox : UDP can count its memory
27 * Alan Cox : send to an unknown connection causes
28 * an ECONNREFUSED off the icmp, but
29 * does NOT close.
30 * Alan Cox : Switched to new sk_buff handlers. No more backlog!
31 * Alan Cox : Using generic datagram code. Even smaller and the PEEK
32 * bug no longer crashes it.
33 * Fred Van Kempen : Net2e support for sk->broadcast.
34 * Alan Cox : Uses skb_free_datagram
35 * Alan Cox : Added get/set sockopt support.
36 * Alan Cox : Broadcasting without option set returns EACCES.
37 * Alan Cox : No wakeup calls. Instead we now use the callbacks.
38 * Alan Cox : Use ip_tos and ip_ttl
39 * Alan Cox : SNMP Mibs
40 * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
41 * Matt Dillon : UDP length checks.
42 * Alan Cox : Smarter af_inet used properly.
43 * Alan Cox : Use new kernel side addressing.
44 * Alan Cox : Incorrect return on truncated datagram receive.
45 * Arnt Gulbrandsen : New udp_send and stuff
46 * Alan Cox : Cache last socket
47 * Alan Cox : Route cache
48 * Jon Peatfield : Minor efficiency fix to sendto().
49 * Mike Shaver : RFC1122 checks.
50 * Alan Cox : Nonblocking error fix.
51 * Willy Konynenberg : Transparent proxying support.
52 * Mike McLagan : Routing by source
53 * David S. Miller : New socket lookup architecture.
54 * Last socket cache retained as it
55 * does have a high hit rate.
56 * Olaf Kirch : Don't linearise iovec on sendmsg.
57 * Andi Kleen : Some cleanups, cache destination entry
58 * for connect.
59 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
60 * Melvin Smith : Check msg_name not msg_namelen in sendto(),
61 * return ENOTCONN for unconnected sockets (POSIX)
62 * Janos Farkas : don't deliver multi/broadcasts to a different
63 * bound-to-device socket
66 * This program is free software; you can redistribute it and/or
67 * modify it under the terms of the GNU General Public License
68 * as published by the Free Software Foundation; either version
69 * 2 of the License, or (at your option) any later version.
72 /* RFC1122 Status:
73 4.1.3.1 (Ports):
74 SHOULD send ICMP_PORT_UNREACHABLE in response to datagrams to
75 an un-listened port. (OK)
76 4.1.3.2 (IP Options)
77 MUST pass IP options from IP -> application (OK)
78 MUST allow application to specify IP options (OK)
79 4.1.3.3 (ICMP Messages)
80 MUST pass ICMP error messages to application (OK -- except when SO_BSDCOMPAT is set)
81 4.1.3.4 (UDP Checksums)
82 MUST provide facility for checksumming (OK)
83 MAY allow application to control checksumming (OK)
84 MUST default to checksumming on (OK)
85 MUST discard silently datagrams with bad csums (OK, except during debugging)
86 4.1.3.5 (UDP Multihoming)
87 MUST allow application to specify source address (OK)
88 SHOULD be able to communicate the chosen src addr up to application
89 when application doesn't choose (DOES - use recvmsg cmsgs)
90 4.1.3.6 (Invalid Addresses)
91 MUST discard invalid source addresses (OK -- done in the new routing code)
92 MUST only send datagrams with one of our addresses (OK)
95 #include <asm/system.h>
96 #include <asm/uaccess.h>
97 #include <linux/types.h>
98 #include <linux/fcntl.h>
99 #include <linux/socket.h>
100 #include <linux/sockios.h>
101 #include <linux/in.h>
102 #include <linux/errno.h>
103 #include <linux/timer.h>
104 #include <linux/mm.h>
105 #include <linux/config.h>
106 #include <linux/inet.h>
107 #include <linux/netdevice.h>
108 #include <net/snmp.h>
109 #include <net/ip.h>
110 #include <net/protocol.h>
111 #include <linux/skbuff.h>
112 #include <net/sock.h>
113 #include <net/udp.h>
114 #include <net/icmp.h>
115 #include <net/route.h>
116 #include <net/inet_common.h>
117 #include <net/checksum.h>
120 * Snmp MIB for the UDP layer
123 struct udp_mib udp_statistics[NR_CPUS*2];
125 struct sock *udp_hash[UDP_HTABLE_SIZE];
126 rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED;
128 /* Shared by v4/v6 udp. */
129 int udp_port_rover = 0;
131 static int udp_v4_get_port(struct sock *sk, unsigned short snum)
133 write_lock_bh(&udp_hash_lock);
134 if (snum == 0) {
135 int best_size_so_far, best, result, i;
137 if (udp_port_rover > sysctl_local_port_range[1] ||
138 udp_port_rover < sysctl_local_port_range[0])
139 udp_port_rover = sysctl_local_port_range[0];
140 best_size_so_far = 32767;
141 best = result = udp_port_rover;
142 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
143 struct sock *sk;
144 int size;
146 sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
147 if (!sk) {
148 if (result > sysctl_local_port_range[1])
149 result = sysctl_local_port_range[0] +
150 ((result - sysctl_local_port_range[0]) &
151 (UDP_HTABLE_SIZE - 1));
152 goto gotit;
154 size = 0;
155 do {
156 if (++size >= best_size_so_far)
157 goto next;
158 } while ((sk = sk->next) != NULL);
159 best_size_so_far = size;
160 best = result;
161 next:
163 result = best;
164 for(;; result += UDP_HTABLE_SIZE) {
165 if (result > sysctl_local_port_range[1])
166 result = sysctl_local_port_range[0]
167 + ((result - sysctl_local_port_range[0]) &
168 (UDP_HTABLE_SIZE - 1));
169 if (!udp_lport_inuse(result))
170 break;
172 gotit:
173 udp_port_rover = snum = result;
174 } else {
175 struct sock *sk2;
177 for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
178 sk2 != NULL;
179 sk2 = sk2->next) {
180 if (sk2->num == snum &&
181 sk2 != sk &&
182 sk2->bound_dev_if == sk->bound_dev_if &&
183 (!sk2->rcv_saddr ||
184 !sk->rcv_saddr ||
185 sk2->rcv_saddr == sk->rcv_saddr) &&
186 (!sk2->reuse || !sk->reuse))
187 goto fail;
190 sk->num = snum;
191 write_unlock_bh(&udp_hash_lock);
192 return 0;
194 fail:
195 write_unlock_bh(&udp_hash_lock);
196 return 1;
199 static void udp_v4_hash(struct sock *sk)
201 struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)];
203 write_lock_bh(&udp_hash_lock);
204 if ((sk->next = *skp) != NULL)
205 (*skp)->pprev = &sk->next;
206 *skp = sk;
207 sk->pprev = skp;
208 sock_prot_inc_use(sk->prot);
209 sock_hold(sk);
210 write_unlock_bh(&udp_hash_lock);
213 static void udp_v4_unhash(struct sock *sk)
215 write_lock_bh(&udp_hash_lock);
216 if (sk->pprev) {
217 if (sk->next)
218 sk->next->pprev = sk->pprev;
219 *sk->pprev = sk->next;
220 sk->pprev = NULL;
221 sock_prot_dec_use(sk->prot);
222 __sock_put(sk);
224 write_unlock_bh(&udp_hash_lock);
227 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
228 * harder than this. -DaveM
230 struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
232 struct sock *sk, *result = NULL;
233 unsigned short hnum = ntohs(dport);
234 int badness = -1;
236 for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
237 if(sk->num == hnum) {
238 int score = 0;
239 if(sk->rcv_saddr) {
240 if(sk->rcv_saddr != daddr)
241 continue;
242 score++;
244 if(sk->daddr) {
245 if(sk->daddr != saddr)
246 continue;
247 score++;
249 if(sk->dport) {
250 if(sk->dport != sport)
251 continue;
252 score++;
254 if(sk->bound_dev_if) {
255 if(sk->bound_dev_if != dif)
256 continue;
257 score++;
259 if(score == 4) {
260 result = sk;
261 break;
262 } else if(score > badness) {
263 result = sk;
264 badness = score;
268 return result;
271 __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
273 struct sock *sk;
275 read_lock(&udp_hash_lock);
276 sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
277 if (sk)
278 sock_hold(sk);
279 read_unlock(&udp_hash_lock);
280 return sk;
283 static inline struct sock *udp_v4_mcast_next(struct sock *sk,
284 unsigned short num,
285 unsigned long raddr,
286 unsigned short rnum,
287 unsigned long laddr,
288 int dif)
290 struct sock *s = sk;
291 unsigned short hnum = ntohs(num);
292 for(; s; s = s->next) {
293 if ((s->num != hnum) ||
294 (s->daddr && s->daddr!=raddr) ||
295 (s->dport != rnum && s->dport != 0) ||
296 (s->rcv_saddr && s->rcv_saddr != laddr) ||
297 (s->bound_dev_if && s->bound_dev_if != dif))
298 continue;
299 break;
301 return s;
305 * This routine is called by the ICMP module when it gets some
306 * sort of error condition. If err < 0 then the socket should
307 * be closed and the error returned to the user. If err > 0
308 * it's just the icmp type << 8 | icmp code.
309 * Header points to the ip header of the error packet. We move
310 * on past this. Then (as it used to claim before adjustment)
311 * header points to the first 8 bytes of the udp header. We need
312 * to find the appropriate port.
315 void udp_err(struct sk_buff *skb, unsigned char *dp, int len)
317 struct iphdr *iph = (struct iphdr*)dp;
318 struct udphdr *uh = (struct udphdr*)(dp+(iph->ihl<<2));
319 int type = skb->h.icmph->type;
320 int code = skb->h.icmph->code;
321 struct sock *sk;
322 int harderr;
323 u32 info;
324 int err;
326 if (len < (iph->ihl<<2)+sizeof(struct udphdr)) {
327 ICMP_INC_STATS_BH(IcmpInErrors);
328 return;
331 sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
332 if (sk == NULL) {
333 ICMP_INC_STATS_BH(IcmpInErrors);
334 return; /* No socket for error */
337 err = 0;
338 info = 0;
339 harderr = 0;
341 switch (type) {
342 default:
343 case ICMP_TIME_EXCEEDED:
344 err = EHOSTUNREACH;
345 break;
346 case ICMP_SOURCE_QUENCH:
347 goto out;
348 case ICMP_PARAMETERPROB:
349 err = EPROTO;
350 info = ntohl(skb->h.icmph->un.gateway)>>24;
351 harderr = 1;
352 break;
353 case ICMP_DEST_UNREACH:
354 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
355 if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT) {
356 err = EMSGSIZE;
357 info = ntohs(skb->h.icmph->un.frag.mtu);
358 harderr = 1;
359 break;
361 goto out;
363 err = EHOSTUNREACH;
364 if (code <= NR_ICMP_UNREACH) {
365 harderr = icmp_err_convert[code].fatal;
366 err = icmp_err_convert[code].errno;
368 break;
372 * RFC1122: OK. Passes ICMP errors back to application, as per
373 * 4.1.3.3.
375 if (!sk->protinfo.af_inet.recverr) {
376 if (!harderr || sk->state != TCP_ESTABLISHED)
377 goto out;
378 } else {
379 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
381 sk->err = err;
382 sk->error_report(sk);
383 out:
384 sock_put(sk);
388 static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
390 return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
393 struct udpfakehdr
395 struct udphdr uh;
396 u32 saddr;
397 u32 daddr;
398 struct iovec *iov;
399 u32 wcheck;
403 * Copy and checksum a UDP packet from user space into a buffer. We still have
404 * to do the planning to get ip_build_xmit to spot direct transfer to network
405 * card and provide an additional callback mode for direct user->board I/O
406 * transfers. That one will be fun.
409 static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen)
411 struct udpfakehdr *ufh = (struct udpfakehdr *)p;
412 if (offset==0) {
413 if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
414 fraglen-sizeof(struct udphdr), &ufh->wcheck))
415 return -EFAULT;
416 ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
417 ufh->wcheck);
418 ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr,
419 ntohs(ufh->uh.len),
420 IPPROTO_UDP, ufh->wcheck);
421 if (ufh->uh.check == 0)
422 ufh->uh.check = -1;
423 memcpy(to, ufh, sizeof(struct udphdr));
424 return 0;
426 if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
427 fraglen, &ufh->wcheck))
428 return -EFAULT;
429 return 0;
433 * Unchecksummed UDP is sufficiently critical to stuff like ATM video conferencing
434 * that we use two routines for this for speed. Probably we ought to have a
435 * CONFIG_FAST_NET set for >10Mb/second boards to activate this sort of coding.
436 * Timing needed to verify if this is a valid decision.
439 static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen)
441 struct udpfakehdr *ufh = (struct udpfakehdr *)p;
443 if (offset==0) {
444 memcpy(to, ufh, sizeof(struct udphdr));
445 return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
446 fraglen-sizeof(struct udphdr));
448 return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
449 fraglen);
452 int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
454 int ulen = len + sizeof(struct udphdr);
455 struct ipcm_cookie ipc;
456 struct udpfakehdr ufh;
457 struct rtable *rt = NULL;
458 int free = 0;
459 int connected = 0;
460 u32 daddr;
461 u8 tos;
462 int err;
464 /* This check is ONLY to check for arithmetic overflow
465 on integer(!) len. Not more! Real check will be made
466 in ip_build_xmit --ANK
468 BTW socket.c -> af_*.c -> ... make multiple
469 invalid conversions size_t -> int. We MUST repair it f.e.
470 by replacing all of them with size_t and revise all
471 the places sort of len += sizeof(struct iphdr)
472 If len was ULONG_MAX-10 it would be cathastrophe --ANK
475 if (len < 0 || len > 0xFFFF)
476 return -EMSGSIZE;
479 * Check the flags.
482 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
483 return -EOPNOTSUPP;
486 * Get and verify the address.
489 if (msg->msg_name) {
490 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
491 if (msg->msg_namelen < sizeof(*usin))
492 return -EINVAL;
493 if (usin->sin_family != AF_INET) {
494 if (usin->sin_family != AF_UNSPEC)
495 return -EINVAL;
496 if (net_ratelimit())
497 printk("Remind Kuznetsov, he has to repair %s eventually\n", current->comm);
500 ufh.daddr = usin->sin_addr.s_addr;
501 ufh.uh.dest = usin->sin_port;
502 if (ufh.uh.dest == 0)
503 return -EINVAL;
504 } else {
505 if (sk->state != TCP_ESTABLISHED)
506 return -ENOTCONN;
507 ufh.daddr = sk->daddr;
508 ufh.uh.dest = sk->dport;
509 /* Open fast path for connected socket.
510 Route will not be used, if at least one option is set.
512 connected = 1;
514 ipc.addr = sk->saddr;
515 ufh.uh.source = sk->sport;
517 ipc.opt = NULL;
518 ipc.oif = sk->bound_dev_if;
519 if (msg->msg_controllen) {
520 err = ip_cmsg_send(msg, &ipc);
521 if (err)
522 return err;
523 if (ipc.opt)
524 free = 1;
525 connected = 0;
527 if (!ipc.opt)
528 ipc.opt = sk->protinfo.af_inet.opt;
530 ufh.saddr = ipc.addr;
531 ipc.addr = daddr = ufh.daddr;
533 if (ipc.opt && ipc.opt->srr) {
534 if (!daddr)
535 return -EINVAL;
536 daddr = ipc.opt->faddr;
537 connected = 0;
539 tos = RT_TOS(sk->protinfo.af_inet.tos);
540 if (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) ||
541 (ipc.opt && ipc.opt->is_strictroute)) {
542 tos |= RTO_ONLINK;
543 connected = 0;
546 if (MULTICAST(daddr)) {
547 if (!ipc.oif)
548 ipc.oif = sk->protinfo.af_inet.mc_index;
549 if (!ufh.saddr)
550 ufh.saddr = sk->protinfo.af_inet.mc_addr;
551 connected = 0;
554 if (connected)
555 rt = (struct rtable*)sk_dst_check(sk, 0);
557 if (rt == NULL) {
558 err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
559 if (err)
560 goto out;
562 err = -EACCES;
563 if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast)
564 goto out;
565 if (connected)
566 sk_dst_set(sk, dst_clone(&rt->u.dst));
569 if (msg->msg_flags&MSG_CONFIRM)
570 goto do_confirm;
571 back_from_confirm:
573 ufh.saddr = rt->rt_src;
574 if (!ipc.addr)
575 ufh.daddr = ipc.addr = rt->rt_dst;
576 ufh.uh.len = htons(ulen);
577 ufh.uh.check = 0;
578 ufh.iov = msg->msg_iov;
579 ufh.wcheck = 0;
581 /* RFC1122: OK. Provides the checksumming facility (MUST) as per */
582 /* 4.1.3.4. It's configurable by the application via setsockopt() */
583 /* (MAY) and it defaults to on (MUST). */
585 err = ip_build_xmit(sk,
586 (sk->no_check == UDP_CSUM_NOXMIT ?
587 udp_getfrag_nosum :
588 udp_getfrag),
589 &ufh, ulen, &ipc, rt, msg->msg_flags);
591 out:
592 ip_rt_put(rt);
593 if (free)
594 kfree(ipc.opt);
595 if (!err) {
596 UDP_INC_STATS_USER(UdpOutDatagrams);
597 return len;
599 return err;
601 do_confirm:
602 dst_confirm(&rt->u.dst);
603 if (!(msg->msg_flags&MSG_PROBE) || len)
604 goto back_from_confirm;
605 err = 0;
606 goto out;
610 * IOCTL requests applicable to the UDP protocol
613 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
615 switch(cmd)
617 case SIOCOUTQ:
619 int amount = atomic_read(&sk->wmem_alloc);
620 return put_user(amount, (int *)arg);
623 case SIOCINQ:
625 struct sk_buff *skb;
626 unsigned long amount;
628 amount = 0;
629 spin_lock_irq(&sk->receive_queue.lock);
630 skb = skb_peek(&sk->receive_queue);
631 if (skb != NULL) {
633 * We will only return the amount
634 * of this packet since that is all
635 * that will be read.
637 amount = skb->len - sizeof(struct udphdr);
639 spin_unlock_irq(&sk->receive_queue.lock);
640 return put_user(amount, (int *)arg);
643 default:
644 return -ENOIOCTLCMD;
646 return(0);
649 static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
651 return (unsigned short)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum));
654 static __inline__ int udp_checksum_complete(struct sk_buff *skb)
656 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
657 __udp_checksum_complete(skb);
661 * This should be easy, if there is something there we
662 * return it, otherwise we block.
665 int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
666 int noblock, int flags, int *addr_len)
668 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
669 struct sk_buff *skb;
670 int copied, err;
673 * Check any passed addresses
675 if (addr_len)
676 *addr_len=sizeof(*sin);
678 if (flags & MSG_ERRQUEUE)
679 return ip_recv_error(sk, msg, len);
682 * From here the generic datagram does a lot of the work. Come
683 * the finished NET3, it will do _ALL_ the work!
686 skb = skb_recv_datagram(sk, flags, noblock, &err);
687 if (!skb)
688 goto out;
690 copied = skb->len - sizeof(struct udphdr);
691 if (copied > len) {
692 copied = len;
693 msg->msg_flags |= MSG_TRUNC;
696 if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
697 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
698 copied);
699 } else if (msg->msg_flags&MSG_TRUNC) {
700 if (__udp_checksum_complete(skb))
701 goto csum_copy_err;
702 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
703 copied);
704 } else {
705 err = copy_and_csum_toiovec(msg->msg_iov, skb, sizeof(struct udphdr));
707 if (err)
708 goto csum_copy_err;
711 if (err)
712 goto out_free;
713 sk->stamp=skb->stamp;
715 /* Copy the address. */
716 if (sin)
718 sin->sin_family = AF_INET;
719 sin->sin_port = skb->h.uh->source;
720 sin->sin_addr.s_addr = skb->nh.iph->saddr;
721 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
723 if (sk->protinfo.af_inet.cmsg_flags)
724 ip_cmsg_recv(msg, skb);
725 err = copied;
727 out_free:
728 skb_free_datagram(sk, skb);
729 out:
730 return err;
732 csum_copy_err:
733 UDP_INC_STATS_BH(UdpInErrors);
735 /* Clear queue. */
736 if (flags&MSG_PEEK) {
737 int clear = 0;
738 spin_lock_irq(&sk->receive_queue.lock);
739 if (skb == skb_peek(&sk->receive_queue)) {
740 __skb_unlink(skb, &sk->receive_queue);
741 clear = 1;
743 spin_unlock_irq(&sk->receive_queue.lock);
744 if (clear)
745 kfree_skb(skb);
748 skb_free_datagram(sk, skb);
751 * Error for blocking case is chosen to masquerade
752 * as some normal condition.
754 return (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
757 int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
759 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
760 struct rtable *rt;
761 int err;
764 if (addr_len < sizeof(*usin))
765 return -EINVAL;
767 if (usin->sin_family != AF_INET)
768 return -EAFNOSUPPORT;
770 sk_dst_reset(sk);
772 err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
773 sk->protinfo.af_inet.tos|sk->localroute, sk->bound_dev_if);
774 if (err)
775 return err;
776 if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
777 ip_rt_put(rt);
778 return -EACCES;
780 if(!sk->saddr)
781 sk->saddr = rt->rt_src; /* Update source address */
782 if(!sk->rcv_saddr)
783 sk->rcv_saddr = rt->rt_src;
784 sk->daddr = rt->rt_dst;
785 sk->dport = usin->sin_port;
786 sk->state = TCP_ESTABLISHED;
788 sk_dst_set(sk, &rt->u.dst);
789 return(0);
792 int udp_disconnect(struct sock *sk, int flags)
795 * 1003.1g - break association.
798 sk->state = TCP_CLOSE;
799 sk->rcv_saddr = 0;
800 sk->daddr = 0;
801 sk->dport = 0;
802 sk->bound_dev_if = 0;
803 sk_dst_reset(sk);
804 return 0;
807 static void udp_close(struct sock *sk, long timeout)
809 inet_sock_release(sk);
812 static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
815 * Charge it to the socket, dropping if the queue is full.
818 #if defined(CONFIG_FILTER)
819 if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
820 if (__udp_checksum_complete(skb)) {
821 UDP_INC_STATS_BH(UdpInErrors);
822 IP_INC_STATS_BH(IpInDiscards);
823 ip_statistics[smp_processor_id()*2].IpInDelivers--;
824 kfree_skb(skb);
825 return -1;
827 skb->ip_summed = CHECKSUM_UNNECESSARY;
829 #endif
831 if (sock_queue_rcv_skb(sk,skb)<0) {
832 UDP_INC_STATS_BH(UdpInErrors);
833 IP_INC_STATS_BH(IpInDiscards);
834 ip_statistics[smp_processor_id()*2].IpInDelivers--;
835 kfree_skb(skb);
836 return -1;
838 UDP_INC_STATS_BH(UdpInDatagrams);
839 return 0;
843 * Multicasts and broadcasts go to each listener.
845 * Note: called only from the BH handler context,
846 * so we don't need to lock the hashes.
848 static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
849 u32 saddr, u32 daddr)
851 struct sock *sk;
852 int dif;
854 read_lock(&udp_hash_lock);
855 sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
856 dif = skb->dev->ifindex;
857 sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr, dif);
858 if (sk) {
859 struct sock *sknext = NULL;
861 do {
862 struct sk_buff *skb1 = skb;
864 sknext = udp_v4_mcast_next(sk->next, uh->dest, saddr,
865 uh->source, daddr, dif);
866 if(sknext)
867 skb1 = skb_clone(skb, GFP_ATOMIC);
869 if(skb1)
870 udp_queue_rcv_skb(sk, skb1);
871 sk = sknext;
872 } while(sknext);
873 } else
874 kfree_skb(skb);
875 read_unlock(&udp_hash_lock);
876 return 0;
879 /* Initialize UDP checksum. If exited with zero value (success),
880 * CHECKSUM_UNNECESSARY means, that no more checks are required.
881 * Otherwise, csum completion requires chacksumming packet body,
882 * including udp header and folding it to skb->csum.
884 static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
885 unsigned short ulen, u32 saddr, u32 daddr)
887 if (uh->check == 0) {
888 skb->ip_summed = CHECKSUM_UNNECESSARY;
889 } else if (skb->ip_summed == CHECKSUM_HW) {
890 if (udp_check(uh, ulen, saddr, daddr, skb->csum))
891 return -1;
892 skb->ip_summed = CHECKSUM_UNNECESSARY;
893 } else if (skb->ip_summed != CHECKSUM_UNNECESSARY)
894 skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
895 /* Probably, we should checksum udp header (it should be in cache
896 * in any case) and data in tiny packets (< rx copybreak).
898 return 0;
902 * All we need to do is get the socket, and then do a checksum.
905 int udp_rcv(struct sk_buff *skb, unsigned short len)
907 struct sock *sk;
908 struct udphdr *uh;
909 unsigned short ulen;
910 struct rtable *rt = (struct rtable*)skb->dst;
911 u32 saddr = skb->nh.iph->saddr;
912 u32 daddr = skb->nh.iph->daddr;
915 * Get the header.
918 uh = skb->h.uh;
919 __skb_pull(skb, skb->h.raw - skb->data);
921 IP_INC_STATS_BH(IpInDelivers);
924 * Validate the packet and the UDP length.
927 ulen = ntohs(uh->len);
929 if (ulen > len || ulen < sizeof(*uh)) {
930 NETDEBUG(printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len));
931 UDP_INC_STATS_BH(UdpInErrors);
932 kfree_skb(skb);
933 return(0);
935 skb_trim(skb, ulen);
937 if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
938 goto csum_error;
940 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
941 return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
943 sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
945 if (sk != NULL) {
946 udp_queue_rcv_skb(sk, skb);
947 sock_put(sk);
948 return 0;
951 /* No socket. Drop packet silently, if checksum is wrong */
952 if (udp_checksum_complete(skb))
953 goto csum_error;
955 UDP_INC_STATS_BH(UdpNoPorts);
956 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
959 * Hmm. We got an UDP packet to a port to which we
960 * don't wanna listen. Ignore it.
962 kfree_skb(skb);
963 return(0);
965 csum_error:
967 * RFC1122: OK. Discards the bad packet silently (as far as
968 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
970 NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
971 NIPQUAD(saddr),
972 ntohs(uh->source),
973 NIPQUAD(daddr),
974 ntohs(uh->dest),
975 ulen));
976 UDP_INC_STATS_BH(UdpInErrors);
977 kfree_skb(skb);
978 return(0);
981 static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
983 unsigned int dest, src;
984 __u16 destp, srcp;
985 int timer_active;
986 unsigned long timer_expires;
988 dest = sp->daddr;
989 src = sp->rcv_saddr;
990 destp = ntohs(sp->dport);
991 srcp = ntohs(sp->sport);
992 timer_active = timer_pending(&sp->timer) ? 2 : 0;
993 timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
994 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
995 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p",
996 i, src, srcp, dest, destp, sp->state,
997 atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
998 timer_active, timer_expires-jiffies, 0,
999 sp->socket->inode->i_uid, 0,
1000 sp->socket ? sp->socket->inode->i_ino : 0,
1001 atomic_read(&sp->refcnt), sp);
1004 int udp_get_info(char *buffer, char **start, off_t offset, int length)
1006 int len = 0, num = 0, i;
1007 off_t pos = 0;
1008 off_t begin;
1009 char tmpbuf[129];
1011 if (offset < 128)
1012 len += sprintf(buffer, "%-127s\n",
1013 " sl local_address rem_address st tx_queue "
1014 "rx_queue tr tm->when retrnsmt uid timeout inode");
1015 pos = 128;
1016 read_lock(&udp_hash_lock);
1017 for (i = 0; i < UDP_HTABLE_SIZE; i++) {
1018 struct sock *sk;
1020 for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
1021 if (sk->family != PF_INET)
1022 continue;
1023 pos += 128;
1024 if (pos < offset)
1025 continue;
1026 get_udp_sock(sk, tmpbuf, i);
1027 len += sprintf(buffer+len, "%-127s\n", tmpbuf);
1028 if(len >= length)
1029 goto out;
1032 out:
1033 read_unlock(&udp_hash_lock);
1034 begin = len - (pos - offset);
1035 *start = buffer + begin;
1036 len -= begin;
1037 if(len > length)
1038 len = length;
1039 if (len < 0)
1040 len = 0;
1041 return len;
1044 struct proto udp_prot = {
1045 name: "UDP",
1046 close: udp_close,
1047 connect: udp_connect,
1048 disconnect: udp_disconnect,
1049 ioctl: udp_ioctl,
1050 setsockopt: ip_setsockopt,
1051 getsockopt: ip_getsockopt,
1052 sendmsg: udp_sendmsg,
1053 recvmsg: udp_recvmsg,
1054 backlog_rcv: udp_queue_rcv_skb,
1055 hash: udp_v4_hash,
1056 unhash: udp_v4_unhash,
1057 get_port: udp_v4_get_port,