Linux 2.2.0
[davej-history.git] / net / ipv4 / ip_output.c
blobce027c3744c661263936403c7a342742b1097fda
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * The Internet Protocol (IP) output module.
8 * Version: $Id: ip_output.c,v 1.64 1999/01/04 20:05:33 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@super.org>
13 * Alan Cox, <Alan.Cox@linux.org>
14 * Richard Underwood
15 * Stefan Becker, <stefanb@yello.ping.de>
16 * Jorge Cwik, <jorge@laser.satlink.net>
17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
19 * See ip_input.c for original log
21 * Fixes:
22 * Alan Cox : Missing nonblock feature in ip_build_xmit.
23 * Mike Kilburn : htons() missing in ip_build_xmit.
24 * Bradford Johnson: Fix faulty handling of some frames when
25 * no route is found.
26 * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit
27 * (in case if packet not accepted by
28 * output firewall rules)
29 * Mike McLagan : Routing by source
30 * Alexey Kuznetsov: use new route cache
31 * Andi Kleen: Fix broken PMTU recovery and remove
32 * some redundant tests.
33 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
34 * Andi Kleen : Replace ip_reply with ip_send_reply.
35 * Andi Kleen : Split fast and slow ip_build_xmit path
36 * for decreased register pressure on x86
37 * and more readibility.
38 * Marc Boucher : When call_out_firewall returns FW_QUEUE,
39 * silently abort send instead of failing
40 * with -EPERM.
43 #include <asm/uaccess.h>
44 #include <asm/system.h>
45 #include <linux/types.h>
46 #include <linux/kernel.h>
47 #include <linux/sched.h>
48 #include <linux/mm.h>
49 #include <linux/string.h>
50 #include <linux/errno.h>
51 #include <linux/config.h>
53 #include <linux/socket.h>
54 #include <linux/sockios.h>
55 #include <linux/in.h>
56 #include <linux/inet.h>
57 #include <linux/netdevice.h>
58 #include <linux/etherdevice.h>
59 #include <linux/proc_fs.h>
60 #include <linux/stat.h>
61 #include <linux/init.h>
63 #include <net/snmp.h>
64 #include <net/ip.h>
65 #include <net/protocol.h>
66 #include <net/route.h>
67 #include <net/tcp.h>
68 #include <net/udp.h>
69 #include <linux/skbuff.h>
70 #include <net/sock.h>
71 #include <net/arp.h>
72 #include <net/icmp.h>
73 #include <net/raw.h>
74 #include <net/checksum.h>
75 #include <linux/igmp.h>
76 #include <linux/ip_fw.h>
77 #include <linux/firewall.h>
78 #include <linux/mroute.h>
79 #include <linux/netlink.h>
82 * Shall we try to damage output packets if routing dev changes?
85 int sysctl_ip_dynaddr = 0;
88 int ip_id_count = 0;
90 /* Generate a checksum for an outgoing IP datagram. */
91 __inline__ void ip_send_check(struct iphdr *iph)
93 iph->check = 0;
94 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
97 /*
98 * Add an ip header to a skbuff and send it out.
100 void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
101 u32 saddr, u32 daddr, struct ip_options *opt)
103 struct rtable *rt = (struct rtable *)skb->dst;
104 struct iphdr *iph;
105 struct device *dev;
107 /* Build the IP header. */
108 if (opt)
109 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
110 else
111 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
113 iph->version = 4;
114 iph->ihl = 5;
115 iph->tos = sk->ip_tos;
116 iph->frag_off = 0;
117 if (ip_dont_fragment(sk, &rt->u.dst))
118 iph->frag_off |= htons(IP_DF);
119 iph->ttl = sk->ip_ttl;
120 iph->daddr = rt->rt_dst;
121 iph->saddr = rt->rt_src;
122 iph->protocol = sk->protocol;
123 iph->tot_len = htons(skb->len);
124 iph->id = htons(ip_id_count++);
125 skb->nh.iph = iph;
127 if (opt && opt->optlen) {
128 iph->ihl += opt->optlen>>2;
129 ip_options_build(skb, opt, daddr, rt, 0);
132 dev = rt->u.dst.dev;
134 #ifdef CONFIG_FIREWALL
135 if (call_out_firewall(PF_INET, dev, iph, NULL, &skb) < FW_ACCEPT)
136 goto drop;
137 #endif
139 ip_send_check(iph);
141 /* Send it out. */
142 skb->dst->output(skb);
143 return;
145 #ifdef CONFIG_FIREWALL
146 drop:
147 kfree_skb(skb);
148 #endif
151 int __ip_finish_output(struct sk_buff *skb)
153 return ip_finish_output(skb);
156 int ip_mc_output(struct sk_buff *skb)
158 struct sock *sk = skb->sk;
159 struct rtable *rt = (struct rtable*)skb->dst;
160 struct device *dev = rt->u.dst.dev;
163 * If the indicated interface is up and running, send the packet.
166 ip_statistics.IpOutRequests++;
167 #ifdef CONFIG_IP_ROUTE_NAT
168 if (rt->rt_flags & RTCF_NAT)
169 ip_do_nat(skb);
170 #endif
172 skb->dev = dev;
173 skb->protocol = __constant_htons(ETH_P_IP);
176 * Multicasts are looped back for other local users
179 if (rt->rt_flags&RTCF_MULTICAST && (!sk || sk->ip_mc_loop)) {
180 #ifdef CONFIG_IP_MROUTE
181 /* Small optimization: do not loopback not local frames,
182 which returned after forwarding; they will be dropped
183 by ip_mr_input in any case.
184 Note, that local frames are looped back to be delivered
185 to local recipients.
187 This check is duplicated in ip_mr_input at the moment.
189 if ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
190 #endif
191 dev_loopback_xmit(skb);
193 /* Multicasts with ttl 0 must not go beyond the host */
195 if (skb->nh.iph->ttl == 0) {
196 kfree_skb(skb);
197 return 0;
201 if (rt->rt_flags&RTCF_BROADCAST)
202 dev_loopback_xmit(skb);
204 return ip_finish_output(skb);
207 int ip_output(struct sk_buff *skb)
209 #ifdef CONFIG_IP_ROUTE_NAT
210 struct rtable *rt = (struct rtable*)skb->dst;
211 #endif
213 ip_statistics.IpOutRequests++;
215 #ifdef CONFIG_IP_ROUTE_NAT
216 if (rt->rt_flags&RTCF_NAT)
217 ip_do_nat(skb);
218 #endif
220 return ip_finish_output(skb);
223 /* Queues a packet to be sent, and starts the transmitter if necessary.
224 * This routine also needs to put in the total length and compute the
225 * checksum. We use to do this in two stages, ip_build_header() then
226 * this, but that scheme created a mess when routes disappeared etc.
227 * So we do it all here, and the TCP send engine has been changed to
228 * match. (No more unroutable FIN disasters, etc. wheee...) This will
229 * most likely make other reliable transport layers above IP easier
230 * to implement under Linux.
232 void ip_queue_xmit(struct sk_buff *skb)
234 struct sock *sk = skb->sk;
235 struct ip_options *opt = sk->opt;
236 struct rtable *rt;
237 struct device *dev;
238 struct iphdr *iph;
239 unsigned int tot_len;
241 /* Make sure we can route this packet. */
242 rt = (struct rtable *) sk->dst_cache;
243 if(rt == NULL || rt->u.dst.obsolete) {
244 u32 daddr;
246 sk->dst_cache = NULL;
247 ip_rt_put(rt);
249 /* Use correct destination address if we have options. */
250 daddr = sk->daddr;
251 if(opt && opt->srr)
252 daddr = opt->faddr;
254 /* If this fails, retransmit mechanism of transport layer will
255 * keep trying until route appears or the connection times itself
256 * out.
258 if(ip_route_output(&rt, daddr, sk->saddr,
259 RT_TOS(sk->ip_tos) | RTO_CONN | sk->localroute,
260 sk->bound_dev_if))
261 goto drop;
262 sk->dst_cache = &rt->u.dst;
264 if(opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
265 goto no_route;
267 /* We have a route, so grab a reference. */
268 skb->dst = dst_clone(sk->dst_cache);
270 /* OK, we know where to send it, allocate and build IP header. */
271 iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
272 iph->version = 4;
273 iph->ihl = 5;
274 iph->tos = sk->ip_tos;
275 iph->frag_off = 0;
276 iph->ttl = sk->ip_ttl;
277 iph->daddr = rt->rt_dst;
278 iph->saddr = rt->rt_src;
279 iph->protocol = sk->protocol;
280 skb->nh.iph = iph;
281 /* Transport layer set skb->h.foo itself. */
283 if(opt && opt->optlen) {
284 iph->ihl += opt->optlen >> 2;
285 ip_options_build(skb, opt, sk->daddr, rt, 0);
288 tot_len = skb->len;
289 iph->tot_len = htons(tot_len);
290 iph->id = htons(ip_id_count++);
292 dev = rt->u.dst.dev;
294 #ifdef CONFIG_FIREWALL
295 if (call_out_firewall(PF_INET, dev, iph, NULL, &skb) < FW_ACCEPT)
296 goto drop;
297 #endif
299 /* This can happen when the transport layer has segments queued
300 * with a cached route, and by the time we get here things are
301 * re-routed to a device with a different MTU than the original
302 * device. Sick, but we must cover it.
304 if (skb_headroom(skb) < dev->hard_header_len && dev->hard_header) {
305 struct sk_buff *skb2;
307 skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15);
308 kfree_skb(skb);
309 if (skb2 == NULL)
310 return;
311 if (sk)
312 skb_set_owner_w(skb, sk);
313 skb = skb2;
314 iph = skb->nh.iph;
317 /* Do we need to fragment. Again this is inefficient. We
318 * need to somehow lock the original buffer and use bits of it.
320 if (tot_len > rt->u.dst.pmtu)
321 goto fragment;
323 if (ip_dont_fragment(sk, &rt->u.dst))
324 iph->frag_off |= __constant_htons(IP_DF);
326 /* Add an IP checksum. */
327 ip_send_check(iph);
329 skb->priority = sk->priority;
330 skb->dst->output(skb);
331 return;
333 fragment:
334 if (ip_dont_fragment(sk, &rt->u.dst) &&
335 tot_len > (iph->ihl<<2) + sizeof(struct tcphdr)+16) {
336 /* Reject packet ONLY if TCP might fragment
337 it itself, if were careful enough.
338 Test is not precise (f.e. it does not take sacks
339 into account). Actually, tcp should make it. --ANK (980801)
341 iph->frag_off |= __constant_htons(IP_DF);
342 printk(KERN_DEBUG "sending pkt_too_big to self\n");
343 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
344 htonl(rt->u.dst.pmtu));
345 goto drop;
347 ip_fragment(skb, skb->dst->output);
348 return;
350 no_route:
351 sk->dst_cache = NULL;
352 ip_rt_put(rt);
353 ip_statistics.IpOutNoRoutes++;
354 /* Fall through... */
355 drop:
356 kfree_skb(skb);
360 * Build and send a packet, with as little as one copy
362 * Doesn't care much about ip options... option length can be
363 * different for fragment at 0 and other fragments.
365 * Note that the fragment at the highest offset is sent first,
366 * so the getfrag routine can fill in the TCP/UDP checksum header
367 * field in the last fragment it sends... actually it also helps
368 * the reassemblers, they can put most packets in at the head of
369 * the fragment queue, and they know the total size in advance. This
370 * last feature will measurably improve the Linux fragment handler one
371 * day.
373 * The callback has five args, an arbitrary pointer (copy of frag),
374 * the source IP address (may depend on the routing table), the
375 * destination address (char *), the offset to copy from, and the
376 * length to be copied.
379 int ip_build_xmit_slow(struct sock *sk,
380 int getfrag (const void *,
381 char *,
382 unsigned int,
383 unsigned int),
384 const void *frag,
385 unsigned length,
386 struct ipcm_cookie *ipc,
387 struct rtable *rt,
388 int flags)
390 unsigned int fraglen, maxfraglen, fragheaderlen;
391 int err;
392 int offset, mf;
393 int mtu;
394 unsigned short id;
396 int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
397 int nfrags=0;
398 struct ip_options *opt = ipc->opt;
399 int df = 0;
401 mtu = rt->u.dst.pmtu;
402 if (ip_dont_fragment(sk, &rt->u.dst))
403 df = htons(IP_DF);
405 if (!sk->ip_hdrincl)
406 length -= sizeof(struct iphdr);
408 if (opt) {
409 fragheaderlen = sizeof(struct iphdr) + opt->optlen;
410 maxfraglen = ((mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
411 } else {
412 fragheaderlen = sk->ip_hdrincl ? 0 : sizeof(struct iphdr);
415 * Fragheaderlen is the size of 'overhead' on each buffer. Now work
416 * out the size of the frames to send.
419 maxfraglen = ((mtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
422 if (length + fragheaderlen > 0xFFFF) {
423 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
424 return -EMSGSIZE;
428 * Start at the end of the frame by handling the remainder.
431 offset = length - (length % (maxfraglen - fragheaderlen));
434 * Amount of memory to allocate for final fragment.
437 fraglen = length - offset + fragheaderlen;
439 if (length-offset==0) {
440 fraglen = maxfraglen;
441 offset -= maxfraglen-fragheaderlen;
446 * The last fragment will not have MF (more fragments) set.
449 mf = 0;
452 * Don't fragment packets for path mtu discovery.
455 if (offset > 0 && df) {
456 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
457 return(-EMSGSIZE);
461 * Lock the device lists.
464 dev_lock_list();
467 * Get an identifier
470 id = htons(ip_id_count++);
473 * Begin outputting the bytes.
476 do {
477 int error;
478 char *data;
479 struct sk_buff * skb;
482 * Get the memory we require with some space left for alignment.
485 skb = sock_alloc_send_skb(sk, fraglen+hh_len+15, 0, flags&MSG_DONTWAIT, &error);
486 if (skb == NULL) {
487 ip_statistics.IpOutDiscards++;
488 if(nfrags>1)
489 ip_statistics.IpFragCreates++;
490 dev_unlock_list();
491 return(error);
495 * Fill in the control structures
498 skb->priority = sk->priority;
499 skb->dst = dst_clone(&rt->u.dst);
500 skb_reserve(skb, hh_len);
503 * Find where to start putting bytes.
506 data = skb_put(skb, fraglen);
507 skb->nh.iph = (struct iphdr *)data;
510 * Only write IP header onto non-raw packets
513 if(!sk->ip_hdrincl) {
514 struct iphdr *iph = (struct iphdr *)data;
516 iph->version = 4;
517 iph->ihl = 5;
518 if (opt) {
519 iph->ihl += opt->optlen>>2;
520 ip_options_build(skb, opt,
521 ipc->addr, rt, offset);
523 iph->tos = sk->ip_tos;
524 iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
525 iph->id = id;
526 iph->frag_off = htons(offset>>3);
527 iph->frag_off |= mf|df;
528 if (rt->rt_type == RTN_MULTICAST)
529 iph->ttl = sk->ip_mc_ttl;
530 else
531 iph->ttl = sk->ip_ttl;
532 iph->protocol = sk->protocol;
533 iph->check = 0;
534 iph->saddr = rt->rt_src;
535 iph->daddr = rt->rt_dst;
536 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
537 data += iph->ihl*4;
540 * Any further fragments will have MF set.
543 mf = htons(IP_MF);
547 * User data callback
550 err = 0;
551 if (getfrag(frag, data, offset, fraglen-fragheaderlen))
552 err = -EFAULT;
555 * Account for the fragment.
558 #ifdef CONFIG_FIREWALL
559 if(!err) {
560 int fw_res;
562 fw_res = call_out_firewall(PF_INET, rt->u.dst.dev, skb->nh.iph, NULL, &skb);
563 if(fw_res == FW_QUEUE) {
564 kfree_skb(skb);
565 skb = NULL;
566 } else if(fw_res < FW_ACCEPT) {
567 err = -EPERM;
570 #endif
572 if (err) {
573 ip_statistics.IpOutDiscards++;
574 kfree_skb(skb);
575 dev_unlock_list();
576 return err;
580 offset -= (maxfraglen-fragheaderlen);
581 fraglen = maxfraglen;
583 nfrags++;
585 err = 0;
586 if (skb && rt->u.dst.output(skb)) {
587 err = -ENETDOWN;
588 ip_statistics.IpOutDiscards++;
589 break;
591 } while (offset >= 0);
593 if (nfrags>1)
594 ip_statistics.IpFragCreates += nfrags;
595 dev_unlock_list();
596 return err;
601 * Fast path for unfragmented packets.
603 int ip_build_xmit(struct sock *sk,
604 int getfrag (const void *,
605 char *,
606 unsigned int,
607 unsigned int),
608 const void *frag,
609 unsigned length,
610 struct ipcm_cookie *ipc,
611 struct rtable *rt,
612 int flags)
614 int err;
615 struct sk_buff *skb;
616 int df;
617 struct iphdr *iph;
620 * Try the simple case first. This leaves fragmented frames, and by
621 * choice RAW frames within 20 bytes of maximum size(rare) to the long path
624 if (!sk->ip_hdrincl)
625 length += sizeof(struct iphdr);
628 * Check for slow path.
630 if (length > rt->u.dst.pmtu || ipc->opt != NULL)
631 return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags);
634 * Do path mtu discovery if needed.
636 df = 0;
637 if (ip_dont_fragment(sk, &rt->u.dst))
638 df = htons(IP_DF);
641 * Fast path for unfragmented frames without options.
644 int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
646 skb = sock_alloc_send_skb(sk, length+hh_len+15,
647 0, flags&MSG_DONTWAIT, &err);
648 if(skb==NULL)
649 goto error;
650 skb_reserve(skb, hh_len);
653 skb->priority = sk->priority;
654 skb->dst = dst_clone(&rt->u.dst);
656 skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
658 dev_lock_list();
660 if(!sk->ip_hdrincl) {
661 iph->version=4;
662 iph->ihl=5;
663 iph->tos=sk->ip_tos;
664 iph->tot_len = htons(length);
665 iph->id=htons(ip_id_count++);
666 iph->frag_off = df;
667 iph->ttl=sk->ip_mc_ttl;
668 if (rt->rt_type != RTN_MULTICAST)
669 iph->ttl=sk->ip_ttl;
670 iph->protocol=sk->protocol;
671 iph->saddr=rt->rt_src;
672 iph->daddr=rt->rt_dst;
673 iph->check=0;
674 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
675 err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
677 else
678 err = getfrag(frag, (void *)iph, 0, length);
680 dev_unlock_list();
682 if (err)
683 err = -EFAULT;
685 #ifdef CONFIG_FIREWALL
686 if(!err) {
687 int fw_res;
689 fw_res = call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb);
690 if(fw_res == FW_QUEUE) {
691 /* re-queued elsewhere; silently abort this send */
692 kfree_skb(skb);
693 return 0;
695 if(fw_res < FW_ACCEPT)
696 err = -EPERM;
698 #endif
700 if (err) {
701 kfree_skb(skb);
702 goto error;
705 return rt->u.dst.output(skb);
707 error:
708 ip_statistics.IpOutDiscards++;
709 return err;
715 * This IP datagram is too large to be sent in one piece. Break it up into
716 * smaller pieces (each of size equal to IP header plus
717 * a block of the data of the original IP data part) that will yet fit in a
718 * single device frame, and queue such a frame for sending.
720 * Yes this is inefficient, feel free to submit a quicker one.
723 void ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
725 struct iphdr *iph;
726 unsigned char *raw;
727 unsigned char *ptr;
728 struct device *dev;
729 struct sk_buff *skb2;
730 unsigned int mtu, hlen, left, len;
731 int offset;
732 int not_last_frag;
733 struct rtable *rt = (struct rtable*)skb->dst;
735 dev = rt->u.dst.dev;
738 * Point into the IP datagram header.
741 raw = skb->nh.raw;
742 iph = (struct iphdr*)raw;
745 * Setup starting values.
748 hlen = iph->ihl * 4;
749 left = ntohs(iph->tot_len) - hlen; /* Space per frame */
750 mtu = rt->u.dst.pmtu - hlen; /* Size of data space */
751 ptr = raw + hlen; /* Where to start from */
754 * The protocol doesn't seem to say what to do in the case that the
755 * frame + options doesn't fit the mtu. As it used to fall down dead
756 * in this case we were fortunate it didn't happen
758 * It is impossible, because mtu>=68. --ANK (980801)
761 #ifdef CONFIG_NET_PARANOIA
762 if (mtu<8)
763 goto fail;
764 #endif
767 * Fragment the datagram.
770 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
771 not_last_frag = iph->frag_off & htons(IP_MF);
774 * Keep copying data until we run out.
777 while(left > 0) {
778 len = left;
779 /* IF: it doesn't fit, use 'mtu' - the data space left */
780 if (len > mtu)
781 len = mtu;
782 /* IF: we are not sending upto and including the packet end
783 then align the next start on an eight byte boundary */
784 if (len < left) {
785 len &= ~7;
788 * Allocate buffer.
791 if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
792 NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
793 goto fail;
797 * Set up data on packet
800 skb2->pkt_type = skb->pkt_type;
801 skb2->priority = skb->priority;
802 skb_reserve(skb2, (dev->hard_header_len+15)&~15);
803 skb_put(skb2, len + hlen);
804 skb2->nh.raw = skb2->data;
805 skb2->h.raw = skb2->data + hlen;
808 * Charge the memory for the fragment to any owner
809 * it might possess
812 if (skb->sk)
813 skb_set_owner_w(skb2, skb->sk);
814 skb2->dst = dst_clone(skb->dst);
817 * Copy the packet header into the new buffer.
820 memcpy(skb2->nh.raw, raw, hlen);
823 * Copy a block of the IP datagram.
825 memcpy(skb2->h.raw, ptr, len);
826 left -= len;
829 * Fill in the new header fields.
831 iph = skb2->nh.iph;
832 iph->frag_off = htons((offset >> 3));
834 /* ANK: dirty, but effective trick. Upgrade options only if
835 * the segment to be fragmented was THE FIRST (otherwise,
836 * options are already fixed) and make it ONCE
837 * on the initial skb, so that all the following fragments
838 * will inherit fixed options.
840 if (offset == 0)
841 ip_options_fragment(skb);
844 * Added AC : If we are fragmenting a fragment that's not the
845 * last fragment then keep MF on each bit
847 if (left > 0 || not_last_frag)
848 iph->frag_off |= htons(IP_MF);
849 ptr += len;
850 offset += len;
853 * Put this fragment into the sending queue.
856 ip_statistics.IpFragCreates++;
858 iph->tot_len = htons(len + hlen);
860 ip_send_check(iph);
862 output(skb2);
864 kfree_skb(skb);
865 ip_statistics.IpFragOKs++;
866 return;
868 fail:
869 kfree_skb(skb);
870 ip_statistics.IpFragFails++;
874 * Fetch data from kernel space and fill in checksum if needed.
876 static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset,
877 unsigned int fraglen)
879 struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr;
880 u16 *pktp = (u16 *)to;
881 struct iovec *iov;
882 int len;
883 int hdrflag = 1;
885 iov = &dp->iov[0];
886 if (offset >= iov->iov_len) {
887 offset -= iov->iov_len;
888 iov++;
889 hdrflag = 0;
891 len = iov->iov_len - offset;
892 if (fraglen > len) { /* overlapping. */
893 dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
894 dp->csum);
895 offset = 0;
896 fraglen -= len;
897 to += len;
898 iov++;
901 dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen,
902 dp->csum);
904 if (hdrflag && dp->csumoffset)
905 *(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */
906 return 0;
910 * Generic function to send a packet as reply to another packet.
911 * Used to send TCP resets so far. ICMP should use this function too.
913 * Should run single threaded per socket because it uses the sock
914 * structure to pass arguments.
916 void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
917 unsigned int len)
919 struct {
920 struct ip_options opt;
921 char data[40];
922 } replyopts;
923 struct ipcm_cookie ipc;
924 u32 daddr;
925 struct rtable *rt = (struct rtable*)skb->dst;
927 if (ip_options_echo(&replyopts.opt, skb))
928 return;
930 sk->ip_tos = skb->nh.iph->tos;
931 sk->priority = skb->priority;
932 sk->protocol = skb->nh.iph->protocol;
934 daddr = ipc.addr = rt->rt_src;
935 ipc.opt = &replyopts.opt;
937 if (ipc.opt->srr)
938 daddr = replyopts.opt.faddr;
939 if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
940 return;
942 /* And let IP do all the hard work. */
943 ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
944 ip_rt_put(rt);
948 * IP protocol layer initialiser
951 static struct packet_type ip_packet_type =
953 __constant_htons(ETH_P_IP),
954 NULL, /* All devices */
955 ip_rcv,
956 NULL,
957 NULL,
962 #ifdef CONFIG_PROC_FS
963 #ifdef CONFIG_IP_MULTICAST
964 static struct proc_dir_entry proc_net_igmp = {
965 PROC_NET_IGMP, 4, "igmp",
966 S_IFREG | S_IRUGO, 1, 0, 0,
967 0, &proc_net_inode_operations,
968 ip_mc_procinfo
970 #endif
971 #endif
974 * IP registers the packet type and then calls the subprotocol initialisers
977 __initfunc(void ip_init(void))
979 dev_add_pack(&ip_packet_type);
981 ip_rt_init();
983 #ifdef CONFIG_PROC_FS
984 #ifdef CONFIG_IP_MULTICAST
985 proc_net_register(&proc_net_igmp);
986 #endif
987 #endif