Import 2.2.5pre2
[davej-history.git] / net / ipv4 / ip_output.c
blob44d635573378abd7f85cf2b81c0b3734f2b43d0e
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * The Internet Protocol (IP) output module.
8 * Version: $Id: ip_output.c,v 1.67 1999/03/25 00:43:00 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@super.org>
13 * Alan Cox, <Alan.Cox@linux.org>
14 * Richard Underwood
15 * Stefan Becker, <stefanb@yello.ping.de>
16 * Jorge Cwik, <jorge@laser.satlink.net>
17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
19 * See ip_input.c for original log
21 * Fixes:
22 * Alan Cox : Missing nonblock feature in ip_build_xmit.
23 * Mike Kilburn : htons() missing in ip_build_xmit.
24 * Bradford Johnson: Fix faulty handling of some frames when
25 * no route is found.
26 * Alexander Demenshin: Missing sk/skb free in ip_queue_xmit
27 * (in case if packet not accepted by
28 * output firewall rules)
29 * Mike McLagan : Routing by source
30 * Alexey Kuznetsov: use new route cache
31 * Andi Kleen: Fix broken PMTU recovery and remove
32 * some redundant tests.
33 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
34 * Andi Kleen : Replace ip_reply with ip_send_reply.
35 * Andi Kleen : Split fast and slow ip_build_xmit path
36 * for decreased register pressure on x86
37 * and more readibility.
38 * Marc Boucher : When call_out_firewall returns FW_QUEUE,
39 * silently drop skb instead of failing with -EPERM.
42 #include <asm/uaccess.h>
43 #include <asm/system.h>
44 #include <linux/types.h>
45 #include <linux/kernel.h>
46 #include <linux/sched.h>
47 #include <linux/mm.h>
48 #include <linux/string.h>
49 #include <linux/errno.h>
50 #include <linux/config.h>
52 #include <linux/socket.h>
53 #include <linux/sockios.h>
54 #include <linux/in.h>
55 #include <linux/inet.h>
56 #include <linux/netdevice.h>
57 #include <linux/etherdevice.h>
58 #include <linux/proc_fs.h>
59 #include <linux/stat.h>
60 #include <linux/init.h>
62 #include <net/snmp.h>
63 #include <net/ip.h>
64 #include <net/protocol.h>
65 #include <net/route.h>
66 #include <net/tcp.h>
67 #include <net/udp.h>
68 #include <linux/skbuff.h>
69 #include <net/sock.h>
70 #include <net/arp.h>
71 #include <net/icmp.h>
72 #include <net/raw.h>
73 #include <net/checksum.h>
74 #include <linux/igmp.h>
75 #include <linux/ip_fw.h>
76 #include <linux/firewall.h>
77 #include <linux/mroute.h>
78 #include <linux/netlink.h>
81 * Shall we try to damage output packets if routing dev changes?
84 int sysctl_ip_dynaddr = 0;
87 int ip_id_count = 0;
89 /* Generate a checksum for an outgoing IP datagram. */
90 __inline__ void ip_send_check(struct iphdr *iph)
92 iph->check = 0;
93 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
96 /*
97 * Add an ip header to a skbuff and send it out.
99 void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
100 u32 saddr, u32 daddr, struct ip_options *opt)
102 struct rtable *rt = (struct rtable *)skb->dst;
103 struct iphdr *iph;
104 struct device *dev;
106 /* Build the IP header. */
107 if (opt)
108 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
109 else
110 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
112 iph->version = 4;
113 iph->ihl = 5;
114 iph->tos = sk->ip_tos;
115 iph->frag_off = 0;
116 if (ip_dont_fragment(sk, &rt->u.dst))
117 iph->frag_off |= htons(IP_DF);
118 iph->ttl = sk->ip_ttl;
119 iph->daddr = rt->rt_dst;
120 iph->saddr = rt->rt_src;
121 iph->protocol = sk->protocol;
122 iph->tot_len = htons(skb->len);
123 iph->id = htons(ip_id_count++);
124 skb->nh.iph = iph;
126 if (opt && opt->optlen) {
127 iph->ihl += opt->optlen>>2;
128 ip_options_build(skb, opt, daddr, rt, 0);
131 dev = rt->u.dst.dev;
133 #ifdef CONFIG_FIREWALL
134 /* Now we have no better mechanism to notify about error. */
135 switch (call_out_firewall(PF_INET, dev, iph, NULL, &skb)) {
136 case FW_REJECT:
137 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
138 /* Fall thru... */
139 case FW_BLOCK:
140 case FW_QUEUE:
141 kfree_skb(skb);
142 return;
144 #endif
146 ip_send_check(iph);
148 /* Send it out. */
149 skb->dst->output(skb);
150 return;
153 int __ip_finish_output(struct sk_buff *skb)
155 return ip_finish_output(skb);
158 int ip_mc_output(struct sk_buff *skb)
160 struct sock *sk = skb->sk;
161 struct rtable *rt = (struct rtable*)skb->dst;
162 struct device *dev = rt->u.dst.dev;
165 * If the indicated interface is up and running, send the packet.
168 ip_statistics.IpOutRequests++;
169 #ifdef CONFIG_IP_ROUTE_NAT
170 if (rt->rt_flags & RTCF_NAT)
171 ip_do_nat(skb);
172 #endif
174 skb->dev = dev;
175 skb->protocol = __constant_htons(ETH_P_IP);
178 * Multicasts are looped back for other local users
181 if (rt->rt_flags&RTCF_MULTICAST && (!sk || sk->ip_mc_loop)) {
182 #ifdef CONFIG_IP_MROUTE
183 /* Small optimization: do not loopback not local frames,
184 which returned after forwarding; they will be dropped
185 by ip_mr_input in any case.
186 Note, that local frames are looped back to be delivered
187 to local recipients.
189 This check is duplicated in ip_mr_input at the moment.
191 if ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
192 #endif
193 dev_loopback_xmit(skb);
195 /* Multicasts with ttl 0 must not go beyond the host */
197 if (skb->nh.iph->ttl == 0) {
198 kfree_skb(skb);
199 return 0;
203 if (rt->rt_flags&RTCF_BROADCAST)
204 dev_loopback_xmit(skb);
206 return ip_finish_output(skb);
209 int ip_output(struct sk_buff *skb)
211 #ifdef CONFIG_IP_ROUTE_NAT
212 struct rtable *rt = (struct rtable*)skb->dst;
213 #endif
215 ip_statistics.IpOutRequests++;
217 #ifdef CONFIG_IP_ROUTE_NAT
218 if (rt->rt_flags&RTCF_NAT)
219 ip_do_nat(skb);
220 #endif
222 return ip_finish_output(skb);
225 /* Queues a packet to be sent, and starts the transmitter if necessary.
226 * This routine also needs to put in the total length and compute the
227 * checksum. We use to do this in two stages, ip_build_header() then
228 * this, but that scheme created a mess when routes disappeared etc.
229 * So we do it all here, and the TCP send engine has been changed to
230 * match. (No more unroutable FIN disasters, etc. wheee...) This will
231 * most likely make other reliable transport layers above IP easier
232 * to implement under Linux.
234 void ip_queue_xmit(struct sk_buff *skb)
236 struct sock *sk = skb->sk;
237 struct ip_options *opt = sk->opt;
238 struct rtable *rt;
239 struct device *dev;
240 struct iphdr *iph;
241 unsigned int tot_len;
243 /* Make sure we can route this packet. */
244 rt = (struct rtable *) sk->dst_cache;
245 if(rt == NULL || rt->u.dst.obsolete) {
246 u32 daddr;
248 sk->dst_cache = NULL;
249 ip_rt_put(rt);
251 /* Use correct destination address if we have options. */
252 daddr = sk->daddr;
253 if(opt && opt->srr)
254 daddr = opt->faddr;
256 /* If this fails, retransmit mechanism of transport layer will
257 * keep trying until route appears or the connection times itself
258 * out.
260 if(ip_route_output(&rt, daddr, sk->saddr,
261 RT_TOS(sk->ip_tos) | RTO_CONN | sk->localroute,
262 sk->bound_dev_if))
263 goto drop;
264 sk->dst_cache = &rt->u.dst;
266 if(opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
267 goto no_route;
269 /* We have a route, so grab a reference. */
270 skb->dst = dst_clone(sk->dst_cache);
272 /* OK, we know where to send it, allocate and build IP header. */
273 iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
274 iph->version = 4;
275 iph->ihl = 5;
276 iph->tos = sk->ip_tos;
277 iph->frag_off = 0;
278 iph->ttl = sk->ip_ttl;
279 iph->daddr = rt->rt_dst;
280 iph->saddr = rt->rt_src;
281 iph->protocol = sk->protocol;
282 skb->nh.iph = iph;
283 /* Transport layer set skb->h.foo itself. */
285 if(opt && opt->optlen) {
286 iph->ihl += opt->optlen >> 2;
287 ip_options_build(skb, opt, sk->daddr, rt, 0);
290 tot_len = skb->len;
291 iph->tot_len = htons(tot_len);
292 iph->id = htons(ip_id_count++);
294 dev = rt->u.dst.dev;
296 #ifdef CONFIG_FIREWALL
297 /* Now we have no better mechanism to notify about error. */
298 switch (call_out_firewall(PF_INET, dev, iph, NULL, &skb)) {
299 case FW_REJECT:
300 start_bh_atomic();
301 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
302 end_bh_atomic();
303 /* Fall thru... */
304 case FW_BLOCK:
305 case FW_QUEUE:
306 goto drop;
308 #endif
310 /* This can happen when the transport layer has segments queued
311 * with a cached route, and by the time we get here things are
312 * re-routed to a device with a different MTU than the original
313 * device. Sick, but we must cover it.
315 if (skb_headroom(skb) < dev->hard_header_len && dev->hard_header) {
316 struct sk_buff *skb2;
318 skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15);
319 kfree_skb(skb);
320 if (skb2 == NULL)
321 return;
322 if (sk)
323 skb_set_owner_w(skb, sk);
324 skb = skb2;
325 iph = skb->nh.iph;
328 /* Do we need to fragment. Again this is inefficient. We
329 * need to somehow lock the original buffer and use bits of it.
331 if (tot_len > rt->u.dst.pmtu)
332 goto fragment;
334 if (ip_dont_fragment(sk, &rt->u.dst))
335 iph->frag_off |= __constant_htons(IP_DF);
337 /* Add an IP checksum. */
338 ip_send_check(iph);
340 skb->priority = sk->priority;
341 skb->dst->output(skb);
342 return;
344 fragment:
345 if (ip_dont_fragment(sk, &rt->u.dst) &&
346 tot_len > (iph->ihl<<2) + sizeof(struct tcphdr)+16) {
347 /* Reject packet ONLY if TCP might fragment
348 it itself, if were careful enough.
349 Test is not precise (f.e. it does not take sacks
350 into account). Actually, tcp should make it. --ANK (980801)
352 iph->frag_off |= __constant_htons(IP_DF);
353 NETDEBUG(printk(KERN_DEBUG "sending pkt_too_big to self\n"));
355 /* icmp_send is not reenterable, so that bh_atomic... --ANK */
356 start_bh_atomic();
357 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
358 htonl(rt->u.dst.pmtu));
359 end_bh_atomic();
360 goto drop;
362 ip_fragment(skb, skb->dst->output);
363 return;
365 no_route:
366 sk->dst_cache = NULL;
367 ip_rt_put(rt);
368 ip_statistics.IpOutNoRoutes++;
369 /* Fall through... */
370 drop:
371 kfree_skb(skb);
375 * Build and send a packet, with as little as one copy
377 * Doesn't care much about ip options... option length can be
378 * different for fragment at 0 and other fragments.
380 * Note that the fragment at the highest offset is sent first,
381 * so the getfrag routine can fill in the TCP/UDP checksum header
382 * field in the last fragment it sends... actually it also helps
383 * the reassemblers, they can put most packets in at the head of
384 * the fragment queue, and they know the total size in advance. This
385 * last feature will measurably improve the Linux fragment handler one
386 * day.
388 * The callback has five args, an arbitrary pointer (copy of frag),
389 * the source IP address (may depend on the routing table), the
390 * destination address (char *), the offset to copy from, and the
391 * length to be copied.
394 int ip_build_xmit_slow(struct sock *sk,
395 int getfrag (const void *,
396 char *,
397 unsigned int,
398 unsigned int),
399 const void *frag,
400 unsigned length,
401 struct ipcm_cookie *ipc,
402 struct rtable *rt,
403 int flags)
405 unsigned int fraglen, maxfraglen, fragheaderlen;
406 int err;
407 int offset, mf;
408 int mtu;
409 unsigned short id;
411 int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
412 int nfrags=0;
413 struct ip_options *opt = ipc->opt;
414 int df = 0;
416 mtu = rt->u.dst.pmtu;
417 if (ip_dont_fragment(sk, &rt->u.dst))
418 df = htons(IP_DF);
420 length -= sizeof(struct iphdr);
422 if (opt) {
423 fragheaderlen = sizeof(struct iphdr) + opt->optlen;
424 maxfraglen = ((mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
425 } else {
426 fragheaderlen = sizeof(struct iphdr);
429 * Fragheaderlen is the size of 'overhead' on each buffer. Now work
430 * out the size of the frames to send.
433 maxfraglen = ((mtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
436 if (length + fragheaderlen > 0xFFFF) {
437 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
438 return -EMSGSIZE;
442 * Start at the end of the frame by handling the remainder.
445 offset = length - (length % (maxfraglen - fragheaderlen));
448 * Amount of memory to allocate for final fragment.
451 fraglen = length - offset + fragheaderlen;
453 if (length-offset==0) {
454 fraglen = maxfraglen;
455 offset -= maxfraglen-fragheaderlen;
460 * The last fragment will not have MF (more fragments) set.
463 mf = 0;
466 * Don't fragment packets for path mtu discovery.
469 if (offset > 0 && df) {
470 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
471 return(-EMSGSIZE);
475 * Lock the device lists.
478 dev_lock_list();
481 * Get an identifier
484 id = htons(ip_id_count++);
487 * Begin outputting the bytes.
490 do {
491 char *data;
492 struct sk_buff * skb;
495 * Get the memory we require with some space left for alignment.
498 skb = sock_alloc_send_skb(sk, fraglen+hh_len+15, 0, flags&MSG_DONTWAIT, &err);
499 if (skb == NULL)
500 goto error;
503 * Fill in the control structures
506 skb->priority = sk->priority;
507 skb->dst = dst_clone(&rt->u.dst);
508 skb_reserve(skb, hh_len);
511 * Find where to start putting bytes.
514 data = skb_put(skb, fraglen);
515 skb->nh.iph = (struct iphdr *)data;
518 * Only write IP header onto non-raw packets
522 struct iphdr *iph = (struct iphdr *)data;
524 iph->version = 4;
525 iph->ihl = 5;
526 if (opt) {
527 iph->ihl += opt->optlen>>2;
528 ip_options_build(skb, opt,
529 ipc->addr, rt, offset);
531 iph->tos = sk->ip_tos;
532 iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
533 iph->id = id;
534 iph->frag_off = htons(offset>>3);
535 iph->frag_off |= mf|df;
536 if (rt->rt_type == RTN_MULTICAST)
537 iph->ttl = sk->ip_mc_ttl;
538 else
539 iph->ttl = sk->ip_ttl;
540 iph->protocol = sk->protocol;
541 iph->check = 0;
542 iph->saddr = rt->rt_src;
543 iph->daddr = rt->rt_dst;
544 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
545 data += iph->ihl*4;
548 * Any further fragments will have MF set.
551 mf = htons(IP_MF);
555 * User data callback
558 if (getfrag(frag, data, offset, fraglen-fragheaderlen)) {
559 err = -EFAULT;
560 kfree_skb(skb);
561 goto error;
564 offset -= (maxfraglen-fragheaderlen);
565 fraglen = maxfraglen;
567 nfrags++;
569 #ifdef CONFIG_FIREWALL
570 switch (call_out_firewall(PF_INET, rt->u.dst.dev, skb->nh.iph, NULL, &skb)) {
571 case FW_QUEUE:
572 kfree_skb(skb);
573 continue;
574 case FW_BLOCK:
575 case FW_REJECT:
576 kfree_skb(skb);
577 err = -EPERM;
578 goto error;
580 #endif
582 err = -ENETDOWN;
583 if (rt->u.dst.output(skb))
584 goto error;
585 } while (offset >= 0);
587 if (nfrags>1)
588 ip_statistics.IpFragCreates += nfrags;
589 dev_unlock_list();
590 return 0;
592 error:
593 ip_statistics.IpOutDiscards++;
594 if (nfrags>1)
595 ip_statistics.IpFragCreates += nfrags;
596 dev_unlock_list();
597 return err;
602 * Fast path for unfragmented packets.
604 int ip_build_xmit(struct sock *sk,
605 int getfrag (const void *,
606 char *,
607 unsigned int,
608 unsigned int),
609 const void *frag,
610 unsigned length,
611 struct ipcm_cookie *ipc,
612 struct rtable *rt,
613 int flags)
615 int err;
616 struct sk_buff *skb;
617 int df;
618 struct iphdr *iph;
621 * Try the simple case first. This leaves fragmented frames, and by
622 * choice RAW frames within 20 bytes of maximum size(rare) to the long path
625 if (!sk->ip_hdrincl) {
626 length += sizeof(struct iphdr);
629 * Check for slow path.
631 if (length > rt->u.dst.pmtu || ipc->opt != NULL)
632 return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags);
633 } else {
634 if (length > rt->u.dst.dev->mtu) {
635 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, rt->u.dst.dev->mtu);
636 return -EMSGSIZE;
641 * Do path mtu discovery if needed.
643 df = 0;
644 if (ip_dont_fragment(sk, &rt->u.dst))
645 df = htons(IP_DF);
648 * Fast path for unfragmented frames without options.
651 int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
653 skb = sock_alloc_send_skb(sk, length+hh_len+15,
654 0, flags&MSG_DONTWAIT, &err);
655 if(skb==NULL)
656 goto error;
657 skb_reserve(skb, hh_len);
660 skb->priority = sk->priority;
661 skb->dst = dst_clone(&rt->u.dst);
663 skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
665 dev_lock_list();
667 if(!sk->ip_hdrincl) {
668 iph->version=4;
669 iph->ihl=5;
670 iph->tos=sk->ip_tos;
671 iph->tot_len = htons(length);
672 iph->id=htons(ip_id_count++);
673 iph->frag_off = df;
674 iph->ttl=sk->ip_mc_ttl;
675 if (rt->rt_type != RTN_MULTICAST)
676 iph->ttl=sk->ip_ttl;
677 iph->protocol=sk->protocol;
678 iph->saddr=rt->rt_src;
679 iph->daddr=rt->rt_dst;
680 iph->check=0;
681 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
682 err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
684 else
685 err = getfrag(frag, (void *)iph, 0, length);
687 dev_unlock_list();
689 if (err)
690 goto error_fault;
692 #ifdef CONFIG_FIREWALL
693 switch (call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb)) {
694 case FW_QUEUE:
695 kfree_skb(skb);
696 return 0;
697 case FW_BLOCK:
698 case FW_REJECT:
699 kfree_skb(skb);
700 err = -EPERM;
701 goto error;
703 #endif
705 return rt->u.dst.output(skb);
707 error_fault:
708 err = -EFAULT;
709 kfree_skb(skb);
710 error:
711 ip_statistics.IpOutDiscards++;
712 return err;
718 * This IP datagram is too large to be sent in one piece. Break it up into
719 * smaller pieces (each of size equal to IP header plus
720 * a block of the data of the original IP data part) that will yet fit in a
721 * single device frame, and queue such a frame for sending.
723 * Yes this is inefficient, feel free to submit a quicker one.
726 void ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
728 struct iphdr *iph;
729 unsigned char *raw;
730 unsigned char *ptr;
731 struct device *dev;
732 struct sk_buff *skb2;
733 unsigned int mtu, hlen, left, len;
734 int offset;
735 int not_last_frag;
736 struct rtable *rt = (struct rtable*)skb->dst;
738 dev = rt->u.dst.dev;
741 * Point into the IP datagram header.
744 raw = skb->nh.raw;
745 iph = (struct iphdr*)raw;
748 * Setup starting values.
751 hlen = iph->ihl * 4;
752 left = ntohs(iph->tot_len) - hlen; /* Space per frame */
753 mtu = rt->u.dst.pmtu - hlen; /* Size of data space */
754 ptr = raw + hlen; /* Where to start from */
757 * The protocol doesn't seem to say what to do in the case that the
758 * frame + options doesn't fit the mtu. As it used to fall down dead
759 * in this case we were fortunate it didn't happen
761 * It is impossible, because mtu>=68. --ANK (980801)
764 #ifdef CONFIG_NET_PARANOIA
765 if (mtu<8)
766 goto fail;
767 #endif
770 * Fragment the datagram.
773 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
774 not_last_frag = iph->frag_off & htons(IP_MF);
777 * Keep copying data until we run out.
780 while(left > 0) {
781 len = left;
782 /* IF: it doesn't fit, use 'mtu' - the data space left */
783 if (len > mtu)
784 len = mtu;
785 /* IF: we are not sending upto and including the packet end
786 then align the next start on an eight byte boundary */
787 if (len < left) {
788 len &= ~7;
791 * Allocate buffer.
794 if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
795 NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
796 goto fail;
800 * Set up data on packet
803 skb2->pkt_type = skb->pkt_type;
804 skb2->priority = skb->priority;
805 skb_reserve(skb2, (dev->hard_header_len+15)&~15);
806 skb_put(skb2, len + hlen);
807 skb2->nh.raw = skb2->data;
808 skb2->h.raw = skb2->data + hlen;
811 * Charge the memory for the fragment to any owner
812 * it might possess
815 if (skb->sk)
816 skb_set_owner_w(skb2, skb->sk);
817 skb2->dst = dst_clone(skb->dst);
820 * Copy the packet header into the new buffer.
823 memcpy(skb2->nh.raw, raw, hlen);
826 * Copy a block of the IP datagram.
828 memcpy(skb2->h.raw, ptr, len);
829 left -= len;
832 * Fill in the new header fields.
834 iph = skb2->nh.iph;
835 iph->frag_off = htons((offset >> 3));
837 /* ANK: dirty, but effective trick. Upgrade options only if
838 * the segment to be fragmented was THE FIRST (otherwise,
839 * options are already fixed) and make it ONCE
840 * on the initial skb, so that all the following fragments
841 * will inherit fixed options.
843 if (offset == 0)
844 ip_options_fragment(skb);
847 * Added AC : If we are fragmenting a fragment that's not the
848 * last fragment then keep MF on each bit
850 if (left > 0 || not_last_frag)
851 iph->frag_off |= htons(IP_MF);
852 ptr += len;
853 offset += len;
856 * Put this fragment into the sending queue.
859 ip_statistics.IpFragCreates++;
861 iph->tot_len = htons(len + hlen);
863 ip_send_check(iph);
865 output(skb2);
867 kfree_skb(skb);
868 ip_statistics.IpFragOKs++;
869 return;
871 fail:
872 kfree_skb(skb);
873 ip_statistics.IpFragFails++;
877 * Fetch data from kernel space and fill in checksum if needed.
879 static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset,
880 unsigned int fraglen)
882 struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr;
883 u16 *pktp = (u16 *)to;
884 struct iovec *iov;
885 int len;
886 int hdrflag = 1;
888 iov = &dp->iov[0];
889 if (offset >= iov->iov_len) {
890 offset -= iov->iov_len;
891 iov++;
892 hdrflag = 0;
894 len = iov->iov_len - offset;
895 if (fraglen > len) { /* overlapping. */
896 dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
897 dp->csum);
898 offset = 0;
899 fraglen -= len;
900 to += len;
901 iov++;
904 dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen,
905 dp->csum);
907 if (hdrflag && dp->csumoffset)
908 *(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */
909 return 0;
913 * Generic function to send a packet as reply to another packet.
914 * Used to send TCP resets so far. ICMP should use this function too.
916 * Should run single threaded per socket because it uses the sock
917 * structure to pass arguments.
919 void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
920 unsigned int len)
922 struct {
923 struct ip_options opt;
924 char data[40];
925 } replyopts;
926 struct ipcm_cookie ipc;
927 u32 daddr;
928 struct rtable *rt = (struct rtable*)skb->dst;
930 if (ip_options_echo(&replyopts.opt, skb))
931 return;
933 sk->ip_tos = skb->nh.iph->tos;
934 sk->priority = skb->priority;
935 sk->protocol = skb->nh.iph->protocol;
937 daddr = ipc.addr = rt->rt_src;
938 ipc.opt = &replyopts.opt;
940 if (ipc.opt->srr)
941 daddr = replyopts.opt.faddr;
942 if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
943 return;
945 /* And let IP do all the hard work. */
946 ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
947 ip_rt_put(rt);
951 * IP protocol layer initialiser
954 static struct packet_type ip_packet_type =
956 __constant_htons(ETH_P_IP),
957 NULL, /* All devices */
958 ip_rcv,
959 NULL,
960 NULL,
965 #ifdef CONFIG_PROC_FS
966 #ifdef CONFIG_IP_MULTICAST
967 static struct proc_dir_entry proc_net_igmp = {
968 PROC_NET_IGMP, 4, "igmp",
969 S_IFREG | S_IRUGO, 1, 0, 0,
970 0, &proc_net_inode_operations,
971 ip_mc_procinfo
973 #endif
974 #endif
977 * IP registers the packet type and then calls the subprotocol initialisers
980 __initfunc(void ip_init(void))
982 dev_add_pack(&ip_packet_type);
984 ip_rt_init();
986 #ifdef CONFIG_PROC_FS
987 #ifdef CONFIG_IP_MULTICAST
988 proc_net_register(&proc_net_igmp);
989 #endif
990 #endif