[PATCH] smsc-ircc2: dont pass iobase around
[linux-2.6.git] / net / ipv6 / ip6_output.c
blob01ef94f7c7f1ce5dfd899703d287be308376a83c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
10 * Based on linux/net/ipv4/ip_output.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
46 #include <net/sock.h>
47 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
73 static inline int ip6_output_finish(struct sk_buff *skb)
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
79 if (hh) {
80 int hh_alen;
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
92 kfree_skb(skb);
93 return -EINVAL;
97 /* dev_loopback_xmit for use with netfilter. */
98 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
106 netif_rx(newskb);
107 return 0;
111 static int ip6_output2(struct sk_buff *skb)
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
116 skb->protocol = htons(ETH_P_IPV6);
117 skb->dev = dev;
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
130 if (newskb)
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
132 newskb->dev,
133 ip6_dev_loopback_xmit);
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
137 kfree_skb(skb);
138 return 0;
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
148 int ip6_output(struct sk_buff *skb)
150 if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
152 else
153 return ip6_output2(skb);
157 * xmit an sk_buff (used by TCP)
160 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
161 struct ipv6_txoptions *opt, int ipfragok)
163 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
164 struct in6_addr *first_hop = &fl->fl6_dst;
165 struct dst_entry *dst = skb->dst;
166 struct ipv6hdr *hdr;
167 u8 proto = fl->proto;
168 int seg_len = skb->len;
169 int hlimit;
170 u32 mtu;
172 if (opt) {
173 int head_room;
175 /* First: exthdrs may take lots of space (~8K for now)
176 MAX_HEADER is not enough.
178 head_room = opt->opt_nflen + opt->opt_flen;
179 seg_len += head_room;
180 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
182 if (skb_headroom(skb) < head_room) {
183 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
184 kfree_skb(skb);
185 skb = skb2;
186 if (skb == NULL) {
187 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
188 return -ENOBUFS;
190 if (sk)
191 skb_set_owner_w(skb, sk);
193 if (opt->opt_flen)
194 ipv6_push_frag_opts(skb, opt, &proto);
195 if (opt->opt_nflen)
196 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
199 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
202 * Fill in the IPv6 header
205 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
206 hlimit = -1;
207 if (np)
208 hlimit = np->hop_limit;
209 if (hlimit < 0)
210 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
211 if (hlimit < 0)
212 hlimit = ipv6_get_hoplimit(dst->dev);
214 hdr->payload_len = htons(seg_len);
215 hdr->nexthdr = proto;
216 hdr->hop_limit = hlimit;
218 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
219 ipv6_addr_copy(&hdr->daddr, first_hop);
221 mtu = dst_mtu(dst);
222 if ((skb->len <= mtu) || ipfragok) {
223 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
224 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
225 dst_output);
228 if (net_ratelimit())
229 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
230 skb->dev = dst->dev;
231 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
232 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
233 kfree_skb(skb);
234 return -EMSGSIZE;
238 * To avoid extra problems ND packets are send through this
239 * routine. It's code duplication but I really want to avoid
240 * extra checks since ipv6_build_header is used by TCP (which
241 * is for us performance critical)
244 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
245 struct in6_addr *saddr, struct in6_addr *daddr,
246 int proto, int len)
248 struct ipv6_pinfo *np = inet6_sk(sk);
249 struct ipv6hdr *hdr;
250 int totlen;
252 skb->protocol = htons(ETH_P_IPV6);
253 skb->dev = dev;
255 totlen = len + sizeof(struct ipv6hdr);
257 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
258 skb->nh.ipv6h = hdr;
260 *(u32*)hdr = htonl(0x60000000);
262 hdr->payload_len = htons(len);
263 hdr->nexthdr = proto;
264 hdr->hop_limit = np->hop_limit;
266 ipv6_addr_copy(&hdr->saddr, saddr);
267 ipv6_addr_copy(&hdr->daddr, daddr);
269 return 0;
272 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
274 struct ip6_ra_chain *ra;
275 struct sock *last = NULL;
277 read_lock(&ip6_ra_lock);
278 for (ra = ip6_ra_chain; ra; ra = ra->next) {
279 struct sock *sk = ra->sk;
280 if (sk && ra->sel == sel &&
281 (!sk->sk_bound_dev_if ||
282 sk->sk_bound_dev_if == skb->dev->ifindex)) {
283 if (last) {
284 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
285 if (skb2)
286 rawv6_rcv(last, skb2);
288 last = sk;
292 if (last) {
293 rawv6_rcv(last, skb);
294 read_unlock(&ip6_ra_lock);
295 return 1;
297 read_unlock(&ip6_ra_lock);
298 return 0;
301 static inline int ip6_forward_finish(struct sk_buff *skb)
303 return dst_output(skb);
306 int ip6_forward(struct sk_buff *skb)
308 struct dst_entry *dst = skb->dst;
309 struct ipv6hdr *hdr = skb->nh.ipv6h;
310 struct inet6_skb_parm *opt = IP6CB(skb);
312 if (ipv6_devconf.forwarding == 0)
313 goto error;
315 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
316 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
317 goto drop;
320 skb->ip_summed = CHECKSUM_NONE;
323 * We DO NOT make any processing on
324 * RA packets, pushing them to user level AS IS
325 * without ane WARRANTY that application will be able
326 * to interpret them. The reason is that we
327 * cannot make anything clever here.
329 * We are not end-node, so that if packet contains
330 * AH/ESP, we cannot make anything.
331 * Defragmentation also would be mistake, RA packets
332 * cannot be fragmented, because there is no warranty
333 * that different fragments will go along one path. --ANK
335 if (opt->ra) {
336 u8 *ptr = skb->nh.raw + opt->ra;
337 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
338 return 0;
342 * check and decrement ttl
344 if (hdr->hop_limit <= 1) {
345 /* Force OUTPUT device used as source address */
346 skb->dev = dst->dev;
347 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
348 0, skb->dev);
350 kfree_skb(skb);
351 return -ETIMEDOUT;
354 if (!xfrm6_route_forward(skb)) {
355 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
356 goto drop;
358 dst = skb->dst;
360 /* IPv6 specs say nothing about it, but it is clear that we cannot
361 send redirects to source routed frames.
363 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
364 struct in6_addr *target = NULL;
365 struct rt6_info *rt;
366 struct neighbour *n = dst->neighbour;
369 * incoming and outgoing devices are the same
370 * send a redirect.
373 rt = (struct rt6_info *) dst;
374 if ((rt->rt6i_flags & RTF_GATEWAY))
375 target = (struct in6_addr*)&n->primary_key;
376 else
377 target = &hdr->daddr;
379 /* Limit redirects both by destination (here)
380 and by source (inside ndisc_send_redirect)
382 if (xrlim_allow(dst, 1*HZ))
383 ndisc_send_redirect(skb, n, target);
384 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
385 |IPV6_ADDR_LINKLOCAL)) {
386 /* This check is security critical. */
387 goto error;
390 if (skb->len > dst_mtu(dst)) {
391 /* Again, force OUTPUT device used as source address */
392 skb->dev = dst->dev;
393 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
394 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
395 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
396 kfree_skb(skb);
397 return -EMSGSIZE;
400 if (skb_cow(skb, dst->dev->hard_header_len)) {
401 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
402 goto drop;
405 hdr = skb->nh.ipv6h;
407 /* Mangling hops number delayed to point after skb COW */
409 hdr->hop_limit--;
411 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
412 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
414 error:
415 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
416 drop:
417 kfree_skb(skb);
418 return -EINVAL;
421 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
423 to->pkt_type = from->pkt_type;
424 to->priority = from->priority;
425 to->protocol = from->protocol;
426 dst_release(to->dst);
427 to->dst = dst_clone(from->dst);
428 to->dev = from->dev;
430 #ifdef CONFIG_NET_SCHED
431 to->tc_index = from->tc_index;
432 #endif
433 #ifdef CONFIG_NETFILTER
434 to->nfmark = from->nfmark;
435 /* Connection association is same as pre-frag packet */
436 to->nfct = from->nfct;
437 nf_conntrack_get(to->nfct);
438 to->nfctinfo = from->nfctinfo;
439 #ifdef CONFIG_BRIDGE_NETFILTER
440 nf_bridge_put(to->nf_bridge);
441 to->nf_bridge = from->nf_bridge;
442 nf_bridge_get(to->nf_bridge);
443 #endif
444 #endif
447 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
449 u16 offset = sizeof(struct ipv6hdr);
450 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
451 unsigned int packet_len = skb->tail - skb->nh.raw;
452 int found_rhdr = 0;
453 *nexthdr = &skb->nh.ipv6h->nexthdr;
455 while (offset + 1 <= packet_len) {
457 switch (**nexthdr) {
459 case NEXTHDR_HOP:
460 case NEXTHDR_ROUTING:
461 case NEXTHDR_DEST:
462 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
463 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
464 offset += ipv6_optlen(exthdr);
465 *nexthdr = &exthdr->nexthdr;
466 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
467 break;
468 default :
469 return offset;
473 return offset;
476 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
478 struct net_device *dev;
479 struct sk_buff *frag;
480 struct rt6_info *rt = (struct rt6_info*)skb->dst;
481 struct ipv6hdr *tmp_hdr;
482 struct frag_hdr *fh;
483 unsigned int mtu, hlen, left, len;
484 u32 frag_id = 0;
485 int ptr, offset = 0, err=0;
486 u8 *prevhdr, nexthdr = 0;
488 dev = rt->u.dst.dev;
489 hlen = ip6_find_1stfragopt(skb, &prevhdr);
490 nexthdr = *prevhdr;
492 mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
494 if (skb_shinfo(skb)->frag_list) {
495 int first_len = skb_pagelen(skb);
497 if (first_len - hlen > mtu ||
498 ((first_len - hlen) & 7) ||
499 skb_cloned(skb))
500 goto slow_path;
502 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
503 /* Correct geometry. */
504 if (frag->len > mtu ||
505 ((frag->len & 7) && frag->next) ||
506 skb_headroom(frag) < hlen)
507 goto slow_path;
509 /* Partially cloned skb? */
510 if (skb_shared(frag))
511 goto slow_path;
513 BUG_ON(frag->sk);
514 if (skb->sk) {
515 sock_hold(skb->sk);
516 frag->sk = skb->sk;
517 frag->destructor = sock_wfree;
518 skb->truesize -= frag->truesize;
522 err = 0;
523 offset = 0;
524 frag = skb_shinfo(skb)->frag_list;
525 skb_shinfo(skb)->frag_list = NULL;
526 /* BUILD HEADER */
528 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
529 if (!tmp_hdr) {
530 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
531 return -ENOMEM;
534 *prevhdr = NEXTHDR_FRAGMENT;
535 memcpy(tmp_hdr, skb->nh.raw, hlen);
536 __skb_pull(skb, hlen);
537 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
538 skb->nh.raw = __skb_push(skb, hlen);
539 memcpy(skb->nh.raw, tmp_hdr, hlen);
541 ipv6_select_ident(skb, fh);
542 fh->nexthdr = nexthdr;
543 fh->reserved = 0;
544 fh->frag_off = htons(IP6_MF);
545 frag_id = fh->identification;
547 first_len = skb_pagelen(skb);
548 skb->data_len = first_len - skb_headlen(skb);
549 skb->len = first_len;
550 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
553 for (;;) {
554 /* Prepare header of the next frame,
555 * before previous one went down. */
556 if (frag) {
557 frag->ip_summed = CHECKSUM_NONE;
558 frag->h.raw = frag->data;
559 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
560 frag->nh.raw = __skb_push(frag, hlen);
561 memcpy(frag->nh.raw, tmp_hdr, hlen);
562 offset += skb->len - hlen - sizeof(struct frag_hdr);
563 fh->nexthdr = nexthdr;
564 fh->reserved = 0;
565 fh->frag_off = htons(offset);
566 if (frag->next != NULL)
567 fh->frag_off |= htons(IP6_MF);
568 fh->identification = frag_id;
569 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
570 ip6_copy_metadata(frag, skb);
573 err = output(skb);
574 if (err || !frag)
575 break;
577 skb = frag;
578 frag = skb->next;
579 skb->next = NULL;
582 if (tmp_hdr)
583 kfree(tmp_hdr);
585 if (err == 0) {
586 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
587 return 0;
590 while (frag) {
591 skb = frag->next;
592 kfree_skb(frag);
593 frag = skb;
596 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
597 return err;
600 slow_path:
601 left = skb->len - hlen; /* Space per frame */
602 ptr = hlen; /* Where to start from */
605 * Fragment the datagram.
608 *prevhdr = NEXTHDR_FRAGMENT;
611 * Keep copying data until we run out.
613 while(left > 0) {
614 len = left;
615 /* IF: it doesn't fit, use 'mtu' - the data space left */
616 if (len > mtu)
617 len = mtu;
618 /* IF: we are not sending upto and including the packet end
619 then align the next start on an eight byte boundary */
620 if (len < left) {
621 len &= ~7;
624 * Allocate buffer.
627 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
628 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
629 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
630 err = -ENOMEM;
631 goto fail;
635 * Set up data on packet
638 ip6_copy_metadata(frag, skb);
639 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
640 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
641 frag->nh.raw = frag->data;
642 fh = (struct frag_hdr*)(frag->data + hlen);
643 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
646 * Charge the memory for the fragment to any owner
647 * it might possess
649 if (skb->sk)
650 skb_set_owner_w(frag, skb->sk);
653 * Copy the packet header into the new buffer.
655 memcpy(frag->nh.raw, skb->data, hlen);
658 * Build fragment header.
660 fh->nexthdr = nexthdr;
661 fh->reserved = 0;
662 if (frag_id) {
663 ipv6_select_ident(skb, fh);
664 frag_id = fh->identification;
665 } else
666 fh->identification = frag_id;
669 * Copy a block of the IP datagram.
671 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
672 BUG();
673 left -= len;
675 fh->frag_off = htons(offset);
676 if (left > 0)
677 fh->frag_off |= htons(IP6_MF);
678 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
680 ptr += len;
681 offset += len;
684 * Put this fragment into the sending queue.
687 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
689 err = output(frag);
690 if (err)
691 goto fail;
693 kfree_skb(skb);
694 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
695 return err;
697 fail:
698 kfree_skb(skb);
699 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
700 return err;
703 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
705 int err = 0;
707 *dst = NULL;
708 if (sk) {
709 struct ipv6_pinfo *np = inet6_sk(sk);
711 *dst = sk_dst_check(sk, np->dst_cookie);
712 if (*dst) {
713 struct rt6_info *rt = (struct rt6_info*)*dst;
715 /* Yes, checking route validity in not connected
716 case is not very simple. Take into account,
717 that we do not support routing by source, TOS,
718 and MSG_DONTROUTE --ANK (980726)
720 1. If route was host route, check that
721 cached destination is current.
722 If it is network route, we still may
723 check its validity using saved pointer
724 to the last used address: daddr_cache.
725 We do not want to save whole address now,
726 (because main consumer of this service
727 is tcp, which has not this problem),
728 so that the last trick works only on connected
729 sockets.
730 2. oif also should be the same.
733 if (((rt->rt6i_dst.plen != 128 ||
734 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
735 && (np->daddr_cache == NULL ||
736 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
737 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
738 dst_release(*dst);
739 *dst = NULL;
744 if (*dst == NULL)
745 *dst = ip6_route_output(sk, fl);
747 if ((err = (*dst)->error))
748 goto out_err_release;
750 if (ipv6_addr_any(&fl->fl6_src)) {
751 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
753 if (err)
754 goto out_err_release;
757 return 0;
759 out_err_release:
760 dst_release(*dst);
761 *dst = NULL;
762 return err;
765 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
766 void *from, int length, int transhdrlen,
767 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
768 unsigned int flags)
770 struct inet_sock *inet = inet_sk(sk);
771 struct ipv6_pinfo *np = inet6_sk(sk);
772 struct sk_buff *skb;
773 unsigned int maxfraglen, fragheaderlen;
774 int exthdrlen;
775 int hh_len;
776 int mtu;
777 int copy;
778 int err;
779 int offset = 0;
780 int csummode = CHECKSUM_NONE;
782 if (flags&MSG_PROBE)
783 return 0;
784 if (skb_queue_empty(&sk->sk_write_queue)) {
786 * setup for corking
788 if (opt) {
789 if (np->cork.opt == NULL) {
790 np->cork.opt = kmalloc(opt->tot_len,
791 sk->sk_allocation);
792 if (unlikely(np->cork.opt == NULL))
793 return -ENOBUFS;
794 } else if (np->cork.opt->tot_len < opt->tot_len) {
795 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
796 return -EINVAL;
798 memcpy(np->cork.opt, opt, opt->tot_len);
799 inet->cork.flags |= IPCORK_OPT;
800 /* need source address above miyazawa*/
802 dst_hold(&rt->u.dst);
803 np->cork.rt = rt;
804 inet->cork.fl = *fl;
805 np->cork.hop_limit = hlimit;
806 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
807 if (dst_allfrag(rt->u.dst.path))
808 inet->cork.flags |= IPCORK_ALLFRAG;
809 inet->cork.length = 0;
810 sk->sk_sndmsg_page = NULL;
811 sk->sk_sndmsg_off = 0;
812 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
813 length += exthdrlen;
814 transhdrlen += exthdrlen;
815 } else {
816 rt = np->cork.rt;
817 fl = &inet->cork.fl;
818 if (inet->cork.flags & IPCORK_OPT)
819 opt = np->cork.opt;
820 transhdrlen = 0;
821 exthdrlen = 0;
822 mtu = inet->cork.fragsize;
825 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
827 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
828 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
830 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
831 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
832 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
833 return -EMSGSIZE;
838 * Let's try using as much space as possible.
839 * Use MTU if total length of the message fits into the MTU.
840 * Otherwise, we need to reserve fragment header and
841 * fragment alignment (= 8-15 octects, in total).
843 * Note that we may need to "move" the data from the tail of
844 * of the buffer to the new fragment when we split
845 * the message.
847 * FIXME: It may be fragmented into multiple chunks
848 * at once if non-fragmentable extension headers
849 * are too large.
850 * --yoshfuji
853 inet->cork.length += length;
855 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
856 goto alloc_new_skb;
858 while (length > 0) {
859 /* Check if the remaining data fits into current packet. */
860 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
861 if (copy < length)
862 copy = maxfraglen - skb->len;
864 if (copy <= 0) {
865 char *data;
866 unsigned int datalen;
867 unsigned int fraglen;
868 unsigned int fraggap;
869 unsigned int alloclen;
870 struct sk_buff *skb_prev;
871 alloc_new_skb:
872 skb_prev = skb;
874 /* There's no room in the current skb */
875 if (skb_prev)
876 fraggap = skb_prev->len - maxfraglen;
877 else
878 fraggap = 0;
881 * If remaining data exceeds the mtu,
882 * we know we need more fragment(s).
884 datalen = length + fraggap;
885 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
886 datalen = maxfraglen - fragheaderlen;
888 fraglen = datalen + fragheaderlen;
889 if ((flags & MSG_MORE) &&
890 !(rt->u.dst.dev->features&NETIF_F_SG))
891 alloclen = mtu;
892 else
893 alloclen = datalen + fragheaderlen;
896 * The last fragment gets additional space at tail.
897 * Note: we overallocate on fragments with MSG_MODE
898 * because we have no idea if we're the last one.
900 if (datalen == length + fraggap)
901 alloclen += rt->u.dst.trailer_len;
904 * We just reserve space for fragment header.
905 * Note: this may be overallocation if the message
906 * (without MSG_MORE) fits into the MTU.
908 alloclen += sizeof(struct frag_hdr);
910 if (transhdrlen) {
911 skb = sock_alloc_send_skb(sk,
912 alloclen + hh_len,
913 (flags & MSG_DONTWAIT), &err);
914 } else {
915 skb = NULL;
916 if (atomic_read(&sk->sk_wmem_alloc) <=
917 2 * sk->sk_sndbuf)
918 skb = sock_wmalloc(sk,
919 alloclen + hh_len, 1,
920 sk->sk_allocation);
921 if (unlikely(skb == NULL))
922 err = -ENOBUFS;
924 if (skb == NULL)
925 goto error;
927 * Fill in the control structures
929 skb->ip_summed = csummode;
930 skb->csum = 0;
931 /* reserve for fragmentation */
932 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
935 * Find where to start putting bytes
937 data = skb_put(skb, fraglen);
938 skb->nh.raw = data + exthdrlen;
939 data += fragheaderlen;
940 skb->h.raw = data + exthdrlen;
942 if (fraggap) {
943 skb->csum = skb_copy_and_csum_bits(
944 skb_prev, maxfraglen,
945 data + transhdrlen, fraggap, 0);
946 skb_prev->csum = csum_sub(skb_prev->csum,
947 skb->csum);
948 data += fraggap;
949 skb_trim(skb_prev, maxfraglen);
951 copy = datalen - transhdrlen - fraggap;
952 if (copy < 0) {
953 err = -EINVAL;
954 kfree_skb(skb);
955 goto error;
956 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
957 err = -EFAULT;
958 kfree_skb(skb);
959 goto error;
962 offset += copy;
963 length -= datalen - fraggap;
964 transhdrlen = 0;
965 exthdrlen = 0;
966 csummode = CHECKSUM_NONE;
969 * Put the packet on the pending queue
971 __skb_queue_tail(&sk->sk_write_queue, skb);
972 continue;
975 if (copy > length)
976 copy = length;
978 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
979 unsigned int off;
981 off = skb->len;
982 if (getfrag(from, skb_put(skb, copy),
983 offset, copy, off, skb) < 0) {
984 __skb_trim(skb, off);
985 err = -EFAULT;
986 goto error;
988 } else {
989 int i = skb_shinfo(skb)->nr_frags;
990 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
991 struct page *page = sk->sk_sndmsg_page;
992 int off = sk->sk_sndmsg_off;
993 unsigned int left;
995 if (page && (left = PAGE_SIZE - off) > 0) {
996 if (copy >= left)
997 copy = left;
998 if (page != frag->page) {
999 if (i == MAX_SKB_FRAGS) {
1000 err = -EMSGSIZE;
1001 goto error;
1003 get_page(page);
1004 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1005 frag = &skb_shinfo(skb)->frags[i];
1007 } else if(i < MAX_SKB_FRAGS) {
1008 if (copy > PAGE_SIZE)
1009 copy = PAGE_SIZE;
1010 page = alloc_pages(sk->sk_allocation, 0);
1011 if (page == NULL) {
1012 err = -ENOMEM;
1013 goto error;
1015 sk->sk_sndmsg_page = page;
1016 sk->sk_sndmsg_off = 0;
1018 skb_fill_page_desc(skb, i, page, 0, 0);
1019 frag = &skb_shinfo(skb)->frags[i];
1020 skb->truesize += PAGE_SIZE;
1021 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1022 } else {
1023 err = -EMSGSIZE;
1024 goto error;
1026 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1027 err = -EFAULT;
1028 goto error;
1030 sk->sk_sndmsg_off += copy;
1031 frag->size += copy;
1032 skb->len += copy;
1033 skb->data_len += copy;
1035 offset += copy;
1036 length -= copy;
1038 return 0;
1039 error:
1040 inet->cork.length -= length;
1041 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1042 return err;
1045 int ip6_push_pending_frames(struct sock *sk)
1047 struct sk_buff *skb, *tmp_skb;
1048 struct sk_buff **tail_skb;
1049 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1050 struct inet_sock *inet = inet_sk(sk);
1051 struct ipv6_pinfo *np = inet6_sk(sk);
1052 struct ipv6hdr *hdr;
1053 struct ipv6_txoptions *opt = np->cork.opt;
1054 struct rt6_info *rt = np->cork.rt;
1055 struct flowi *fl = &inet->cork.fl;
1056 unsigned char proto = fl->proto;
1057 int err = 0;
1059 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1060 goto out;
1061 tail_skb = &(skb_shinfo(skb)->frag_list);
1063 /* move skb->data to ip header from ext header */
1064 if (skb->data < skb->nh.raw)
1065 __skb_pull(skb, skb->nh.raw - skb->data);
1066 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1067 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1068 *tail_skb = tmp_skb;
1069 tail_skb = &(tmp_skb->next);
1070 skb->len += tmp_skb->len;
1071 skb->data_len += tmp_skb->len;
1072 skb->truesize += tmp_skb->truesize;
1073 __sock_put(tmp_skb->sk);
1074 tmp_skb->destructor = NULL;
1075 tmp_skb->sk = NULL;
1078 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1079 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1080 if (opt && opt->opt_flen)
1081 ipv6_push_frag_opts(skb, opt, &proto);
1082 if (opt && opt->opt_nflen)
1083 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1085 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1087 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1089 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1090 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1091 else
1092 hdr->payload_len = 0;
1093 hdr->hop_limit = np->cork.hop_limit;
1094 hdr->nexthdr = proto;
1095 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1096 ipv6_addr_copy(&hdr->daddr, final_dst);
1098 skb->dst = dst_clone(&rt->u.dst);
1099 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1100 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1101 if (err) {
1102 if (err > 0)
1103 err = np->recverr ? net_xmit_errno(err) : 0;
1104 if (err)
1105 goto error;
1108 out:
1109 inet->cork.flags &= ~IPCORK_OPT;
1110 if (np->cork.opt) {
1111 kfree(np->cork.opt);
1112 np->cork.opt = NULL;
1114 if (np->cork.rt) {
1115 dst_release(&np->cork.rt->u.dst);
1116 np->cork.rt = NULL;
1117 inet->cork.flags &= ~IPCORK_ALLFRAG;
1119 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1120 return err;
1121 error:
1122 goto out;
1125 void ip6_flush_pending_frames(struct sock *sk)
1127 struct inet_sock *inet = inet_sk(sk);
1128 struct ipv6_pinfo *np = inet6_sk(sk);
1129 struct sk_buff *skb;
1131 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1132 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1133 kfree_skb(skb);
1136 inet->cork.flags &= ~IPCORK_OPT;
1138 if (np->cork.opt) {
1139 kfree(np->cork.opt);
1140 np->cork.opt = NULL;
1142 if (np->cork.rt) {
1143 dst_release(&np->cork.rt->u.dst);
1144 np->cork.rt = NULL;
1145 inet->cork.flags &= ~IPCORK_ALLFRAG;
1147 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));