ACPI: thinkpad-acpi: preserve radio state across shutdown
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / ip6_output.c
bloba4232000072464ab40f3281d24577ecb68b61bb0
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
44 #include <net/sock.h>
45 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 static u32 ipv6_fragmentation_id = 1;
63 static DEFINE_SPINLOCK(ip6_id_lock);
65 spin_lock_bh(&ip6_id_lock);
66 fhdr->identification = htonl(ipv6_fragmentation_id);
67 if (++ipv6_fragmentation_id == 0)
68 ipv6_fragmentation_id = 1;
69 spin_unlock_bh(&ip6_id_lock);
72 int __ip6_local_out(struct sk_buff *skb)
74 int len;
76 len = skb->len - sizeof(struct ipv6hdr);
77 if (len > IPV6_MAXPLEN)
78 len = 0;
79 ipv6_hdr(skb)->payload_len = htons(len);
81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
82 dst_output);
85 int ip6_local_out(struct sk_buff *skb)
87 int err;
89 err = __ip6_local_out(skb);
90 if (likely(err == 1))
91 err = dst_output(skb);
93 return err;
95 EXPORT_SYMBOL_GPL(ip6_local_out);
97 static int ip6_output_finish(struct sk_buff *skb)
99 struct dst_entry *dst = skb->dst;
101 if (dst->hh)
102 return neigh_hh_output(dst->hh, skb);
103 else if (dst->neighbour)
104 return dst->neighbour->output(skb);
106 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
107 kfree_skb(skb);
108 return -EINVAL;
112 /* dev_loopback_xmit for use with netfilter. */
113 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115 skb_reset_mac_header(newskb);
116 __skb_pull(newskb, skb_network_offset(newskb));
117 newskb->pkt_type = PACKET_LOOPBACK;
118 newskb->ip_summed = CHECKSUM_UNNECESSARY;
119 WARN_ON(!newskb->dst);
121 netif_rx(newskb);
122 return 0;
126 static int ip6_output2(struct sk_buff *skb)
128 struct dst_entry *dst = skb->dst;
129 struct net_device *dev = dst->dev;
131 skb->protocol = htons(ETH_P_IPV6);
132 skb->dev = dev;
134 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
135 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
136 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
138 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
139 ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 &ipv6_hdr(skb)->saddr))) {
142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
144 /* Do not check for IFF_ALLMULTI; multicast routing
145 is not supported in any case.
147 if (newskb)
148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149 NULL, newskb->dev,
150 ip6_dev_loopback_xmit);
152 if (ipv6_hdr(skb)->hop_limit == 0) {
153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
154 kfree_skb(skb);
155 return 0;
159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
166 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171 skb->dst->dev->mtu : dst_mtu(skb->dst);
174 int ip6_output(struct sk_buff *skb)
176 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
177 if (unlikely(idev->cnf.disable_ipv6)) {
178 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
179 kfree_skb(skb);
180 return 0;
183 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
184 dst_allfrag(skb->dst))
185 return ip6_fragment(skb, ip6_output2);
186 else
187 return ip6_output2(skb);
191 * xmit an sk_buff (used by TCP)
194 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
195 struct ipv6_txoptions *opt, int ipfragok)
197 struct ipv6_pinfo *np = inet6_sk(sk);
198 struct in6_addr *first_hop = &fl->fl6_dst;
199 struct dst_entry *dst = skb->dst;
200 struct ipv6hdr *hdr;
201 u8 proto = fl->proto;
202 int seg_len = skb->len;
203 int hlimit, tclass;
204 u32 mtu;
206 if (opt) {
207 unsigned int head_room;
209 /* First: exthdrs may take lots of space (~8K for now)
210 MAX_HEADER is not enough.
212 head_room = opt->opt_nflen + opt->opt_flen;
213 seg_len += head_room;
214 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
216 if (skb_headroom(skb) < head_room) {
217 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
218 if (skb2 == NULL) {
219 IP6_INC_STATS(ip6_dst_idev(skb->dst),
220 IPSTATS_MIB_OUTDISCARDS);
221 kfree_skb(skb);
222 return -ENOBUFS;
224 kfree_skb(skb);
225 skb = skb2;
226 if (sk)
227 skb_set_owner_w(skb, sk);
229 if (opt->opt_flen)
230 ipv6_push_frag_opts(skb, opt, &proto);
231 if (opt->opt_nflen)
232 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
235 skb_push(skb, sizeof(struct ipv6hdr));
236 skb_reset_network_header(skb);
237 hdr = ipv6_hdr(skb);
239 /* Allow local fragmentation. */
240 if (ipfragok)
241 skb->local_df = 1;
244 * Fill in the IPv6 header
247 hlimit = -1;
248 if (np)
249 hlimit = np->hop_limit;
250 if (hlimit < 0)
251 hlimit = ip6_dst_hoplimit(dst);
253 tclass = -1;
254 if (np)
255 tclass = np->tclass;
256 if (tclass < 0)
257 tclass = 0;
259 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
261 hdr->payload_len = htons(seg_len);
262 hdr->nexthdr = proto;
263 hdr->hop_limit = hlimit;
265 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
266 ipv6_addr_copy(&hdr->daddr, first_hop);
268 skb->priority = sk->sk_priority;
269 skb->mark = sk->sk_mark;
271 mtu = dst_mtu(dst);
272 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
273 IP6_INC_STATS(ip6_dst_idev(skb->dst),
274 IPSTATS_MIB_OUTREQUESTS);
275 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
276 dst_output);
279 if (net_ratelimit())
280 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
281 skb->dev = dst->dev;
282 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
283 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
284 kfree_skb(skb);
285 return -EMSGSIZE;
288 EXPORT_SYMBOL(ip6_xmit);
291 * To avoid extra problems ND packets are send through this
292 * routine. It's code duplication but I really want to avoid
293 * extra checks since ipv6_build_header is used by TCP (which
294 * is for us performance critical)
297 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
298 const struct in6_addr *saddr, const struct in6_addr *daddr,
299 int proto, int len)
301 struct ipv6_pinfo *np = inet6_sk(sk);
302 struct ipv6hdr *hdr;
303 int totlen;
305 skb->protocol = htons(ETH_P_IPV6);
306 skb->dev = dev;
308 totlen = len + sizeof(struct ipv6hdr);
310 skb_reset_network_header(skb);
311 skb_put(skb, sizeof(struct ipv6hdr));
312 hdr = ipv6_hdr(skb);
314 *(__be32*)hdr = htonl(0x60000000);
316 hdr->payload_len = htons(len);
317 hdr->nexthdr = proto;
318 hdr->hop_limit = np->hop_limit;
320 ipv6_addr_copy(&hdr->saddr, saddr);
321 ipv6_addr_copy(&hdr->daddr, daddr);
323 return 0;
326 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
328 struct ip6_ra_chain *ra;
329 struct sock *last = NULL;
331 read_lock(&ip6_ra_lock);
332 for (ra = ip6_ra_chain; ra; ra = ra->next) {
333 struct sock *sk = ra->sk;
334 if (sk && ra->sel == sel &&
335 (!sk->sk_bound_dev_if ||
336 sk->sk_bound_dev_if == skb->dev->ifindex)) {
337 if (last) {
338 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
339 if (skb2)
340 rawv6_rcv(last, skb2);
342 last = sk;
346 if (last) {
347 rawv6_rcv(last, skb);
348 read_unlock(&ip6_ra_lock);
349 return 1;
351 read_unlock(&ip6_ra_lock);
352 return 0;
355 static int ip6_forward_proxy_check(struct sk_buff *skb)
357 struct ipv6hdr *hdr = ipv6_hdr(skb);
358 u8 nexthdr = hdr->nexthdr;
359 int offset;
361 if (ipv6_ext_hdr(nexthdr)) {
362 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
363 if (offset < 0)
364 return 0;
365 } else
366 offset = sizeof(struct ipv6hdr);
368 if (nexthdr == IPPROTO_ICMPV6) {
369 struct icmp6hdr *icmp6;
371 if (!pskb_may_pull(skb, (skb_network_header(skb) +
372 offset + 1 - skb->data)))
373 return 0;
375 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
377 switch (icmp6->icmp6_type) {
378 case NDISC_ROUTER_SOLICITATION:
379 case NDISC_ROUTER_ADVERTISEMENT:
380 case NDISC_NEIGHBOUR_SOLICITATION:
381 case NDISC_NEIGHBOUR_ADVERTISEMENT:
382 case NDISC_REDIRECT:
383 /* For reaction involving unicast neighbor discovery
384 * message destined to the proxied address, pass it to
385 * input function.
387 return 1;
388 default:
389 break;
394 * The proxying router can't forward traffic sent to a link-local
395 * address, so signal the sender and discard the packet. This
396 * behavior is clarified by the MIPv6 specification.
398 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
399 dst_link_failure(skb);
400 return -1;
403 return 0;
406 static inline int ip6_forward_finish(struct sk_buff *skb)
408 return dst_output(skb);
411 int ip6_forward(struct sk_buff *skb)
413 struct dst_entry *dst = skb->dst;
414 struct ipv6hdr *hdr = ipv6_hdr(skb);
415 struct inet6_skb_parm *opt = IP6CB(skb);
416 struct net *net = dev_net(dst->dev);
418 if (net->ipv6.devconf_all->forwarding == 0)
419 goto error;
421 if (skb_warn_if_lro(skb))
422 goto drop;
424 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
425 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
426 goto drop;
429 skb_forward_csum(skb);
432 * We DO NOT make any processing on
433 * RA packets, pushing them to user level AS IS
434 * without ane WARRANTY that application will be able
435 * to interpret them. The reason is that we
436 * cannot make anything clever here.
438 * We are not end-node, so that if packet contains
439 * AH/ESP, we cannot make anything.
440 * Defragmentation also would be mistake, RA packets
441 * cannot be fragmented, because there is no warranty
442 * that different fragments will go along one path. --ANK
444 if (opt->ra) {
445 u8 *ptr = skb_network_header(skb) + opt->ra;
446 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
447 return 0;
451 * check and decrement ttl
453 if (hdr->hop_limit <= 1) {
454 /* Force OUTPUT device used as source address */
455 skb->dev = dst->dev;
456 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
457 0, skb->dev);
458 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
460 kfree_skb(skb);
461 return -ETIMEDOUT;
464 /* XXX: idev->cnf.proxy_ndp? */
465 if (net->ipv6.devconf_all->proxy_ndp &&
466 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
467 int proxied = ip6_forward_proxy_check(skb);
468 if (proxied > 0)
469 return ip6_input(skb);
470 else if (proxied < 0) {
471 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
472 goto drop;
476 if (!xfrm6_route_forward(skb)) {
477 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
478 goto drop;
480 dst = skb->dst;
482 /* IPv6 specs say nothing about it, but it is clear that we cannot
483 send redirects to source routed frames.
484 We don't send redirects to frames decapsulated from IPsec.
486 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
487 !skb->sp) {
488 struct in6_addr *target = NULL;
489 struct rt6_info *rt;
490 struct neighbour *n = dst->neighbour;
493 * incoming and outgoing devices are the same
494 * send a redirect.
497 rt = (struct rt6_info *) dst;
498 if ((rt->rt6i_flags & RTF_GATEWAY))
499 target = (struct in6_addr*)&n->primary_key;
500 else
501 target = &hdr->daddr;
503 /* Limit redirects both by destination (here)
504 and by source (inside ndisc_send_redirect)
506 if (xrlim_allow(dst, 1*HZ))
507 ndisc_send_redirect(skb, n, target);
508 } else {
509 int addrtype = ipv6_addr_type(&hdr->saddr);
511 /* This check is security critical. */
512 if (addrtype == IPV6_ADDR_ANY ||
513 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
514 goto error;
515 if (addrtype & IPV6_ADDR_LINKLOCAL) {
516 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
517 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
518 goto error;
522 if (skb->len > dst_mtu(dst)) {
523 /* Again, force OUTPUT device used as source address */
524 skb->dev = dst->dev;
525 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
526 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
527 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
528 kfree_skb(skb);
529 return -EMSGSIZE;
532 if (skb_cow(skb, dst->dev->hard_header_len)) {
533 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
534 goto drop;
537 hdr = ipv6_hdr(skb);
539 /* Mangling hops number delayed to point after skb COW */
541 hdr->hop_limit--;
543 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
544 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
545 ip6_forward_finish);
547 error:
548 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
549 drop:
550 kfree_skb(skb);
551 return -EINVAL;
554 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
556 to->pkt_type = from->pkt_type;
557 to->priority = from->priority;
558 to->protocol = from->protocol;
559 dst_release(to->dst);
560 to->dst = dst_clone(from->dst);
561 to->dev = from->dev;
562 to->mark = from->mark;
564 #ifdef CONFIG_NET_SCHED
565 to->tc_index = from->tc_index;
566 #endif
567 nf_copy(to, from);
568 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
569 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
570 to->nf_trace = from->nf_trace;
571 #endif
572 skb_copy_secmark(to, from);
575 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
577 u16 offset = sizeof(struct ipv6hdr);
578 struct ipv6_opt_hdr *exthdr =
579 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
580 unsigned int packet_len = skb->tail - skb->network_header;
581 int found_rhdr = 0;
582 *nexthdr = &ipv6_hdr(skb)->nexthdr;
584 while (offset + 1 <= packet_len) {
586 switch (**nexthdr) {
588 case NEXTHDR_HOP:
589 break;
590 case NEXTHDR_ROUTING:
591 found_rhdr = 1;
592 break;
593 case NEXTHDR_DEST:
594 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
595 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
596 break;
597 #endif
598 if (found_rhdr)
599 return offset;
600 break;
601 default :
602 return offset;
605 offset += ipv6_optlen(exthdr);
606 *nexthdr = &exthdr->nexthdr;
607 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
608 offset);
611 return offset;
614 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
616 struct net_device *dev;
617 struct sk_buff *frag;
618 struct rt6_info *rt = (struct rt6_info*)skb->dst;
619 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
620 struct ipv6hdr *tmp_hdr;
621 struct frag_hdr *fh;
622 unsigned int mtu, hlen, left, len;
623 __be32 frag_id = 0;
624 int ptr, offset = 0, err=0;
625 u8 *prevhdr, nexthdr = 0;
627 dev = rt->u.dst.dev;
628 hlen = ip6_find_1stfragopt(skb, &prevhdr);
629 nexthdr = *prevhdr;
631 mtu = ip6_skb_dst_mtu(skb);
633 /* We must not fragment if the socket is set to force MTU discovery
634 * or if the skb it not generated by a local socket. (This last
635 * check should be redundant, but it's free.)
637 if (!skb->local_df) {
638 skb->dev = skb->dst->dev;
639 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
640 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
641 kfree_skb(skb);
642 return -EMSGSIZE;
645 if (np && np->frag_size < mtu) {
646 if (np->frag_size)
647 mtu = np->frag_size;
649 mtu -= hlen + sizeof(struct frag_hdr);
651 if (skb_shinfo(skb)->frag_list) {
652 int first_len = skb_pagelen(skb);
653 int truesizes = 0;
655 if (first_len - hlen > mtu ||
656 ((first_len - hlen) & 7) ||
657 skb_cloned(skb))
658 goto slow_path;
660 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
661 /* Correct geometry. */
662 if (frag->len > mtu ||
663 ((frag->len & 7) && frag->next) ||
664 skb_headroom(frag) < hlen)
665 goto slow_path;
667 /* Partially cloned skb? */
668 if (skb_shared(frag))
669 goto slow_path;
671 BUG_ON(frag->sk);
672 if (skb->sk) {
673 sock_hold(skb->sk);
674 frag->sk = skb->sk;
675 frag->destructor = sock_wfree;
676 truesizes += frag->truesize;
680 err = 0;
681 offset = 0;
682 frag = skb_shinfo(skb)->frag_list;
683 skb_shinfo(skb)->frag_list = NULL;
684 /* BUILD HEADER */
686 *prevhdr = NEXTHDR_FRAGMENT;
687 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
688 if (!tmp_hdr) {
689 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
690 return -ENOMEM;
693 __skb_pull(skb, hlen);
694 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
695 __skb_push(skb, hlen);
696 skb_reset_network_header(skb);
697 memcpy(skb_network_header(skb), tmp_hdr, hlen);
699 ipv6_select_ident(skb, fh);
700 fh->nexthdr = nexthdr;
701 fh->reserved = 0;
702 fh->frag_off = htons(IP6_MF);
703 frag_id = fh->identification;
705 first_len = skb_pagelen(skb);
706 skb->data_len = first_len - skb_headlen(skb);
707 skb->truesize -= truesizes;
708 skb->len = first_len;
709 ipv6_hdr(skb)->payload_len = htons(first_len -
710 sizeof(struct ipv6hdr));
712 dst_hold(&rt->u.dst);
714 for (;;) {
715 /* Prepare header of the next frame,
716 * before previous one went down. */
717 if (frag) {
718 frag->ip_summed = CHECKSUM_NONE;
719 skb_reset_transport_header(frag);
720 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
721 __skb_push(frag, hlen);
722 skb_reset_network_header(frag);
723 memcpy(skb_network_header(frag), tmp_hdr,
724 hlen);
725 offset += skb->len - hlen - sizeof(struct frag_hdr);
726 fh->nexthdr = nexthdr;
727 fh->reserved = 0;
728 fh->frag_off = htons(offset);
729 if (frag->next != NULL)
730 fh->frag_off |= htons(IP6_MF);
731 fh->identification = frag_id;
732 ipv6_hdr(frag)->payload_len =
733 htons(frag->len -
734 sizeof(struct ipv6hdr));
735 ip6_copy_metadata(frag, skb);
738 err = output(skb);
739 if(!err)
740 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
742 if (err || !frag)
743 break;
745 skb = frag;
746 frag = skb->next;
747 skb->next = NULL;
750 kfree(tmp_hdr);
752 if (err == 0) {
753 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
754 dst_release(&rt->u.dst);
755 return 0;
758 while (frag) {
759 skb = frag->next;
760 kfree_skb(frag);
761 frag = skb;
764 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
765 dst_release(&rt->u.dst);
766 return err;
769 slow_path:
770 left = skb->len - hlen; /* Space per frame */
771 ptr = hlen; /* Where to start from */
774 * Fragment the datagram.
777 *prevhdr = NEXTHDR_FRAGMENT;
780 * Keep copying data until we run out.
782 while(left > 0) {
783 len = left;
784 /* IF: it doesn't fit, use 'mtu' - the data space left */
785 if (len > mtu)
786 len = mtu;
787 /* IF: we are not sending upto and including the packet end
788 then align the next start on an eight byte boundary */
789 if (len < left) {
790 len &= ~7;
793 * Allocate buffer.
796 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
797 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
798 IP6_INC_STATS(ip6_dst_idev(skb->dst),
799 IPSTATS_MIB_FRAGFAILS);
800 err = -ENOMEM;
801 goto fail;
805 * Set up data on packet
808 ip6_copy_metadata(frag, skb);
809 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
810 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
811 skb_reset_network_header(frag);
812 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
813 frag->transport_header = (frag->network_header + hlen +
814 sizeof(struct frag_hdr));
817 * Charge the memory for the fragment to any owner
818 * it might possess
820 if (skb->sk)
821 skb_set_owner_w(frag, skb->sk);
824 * Copy the packet header into the new buffer.
826 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
829 * Build fragment header.
831 fh->nexthdr = nexthdr;
832 fh->reserved = 0;
833 if (!frag_id) {
834 ipv6_select_ident(skb, fh);
835 frag_id = fh->identification;
836 } else
837 fh->identification = frag_id;
840 * Copy a block of the IP datagram.
842 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
843 BUG();
844 left -= len;
846 fh->frag_off = htons(offset);
847 if (left > 0)
848 fh->frag_off |= htons(IP6_MF);
849 ipv6_hdr(frag)->payload_len = htons(frag->len -
850 sizeof(struct ipv6hdr));
852 ptr += len;
853 offset += len;
856 * Put this fragment into the sending queue.
858 err = output(frag);
859 if (err)
860 goto fail;
862 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
864 IP6_INC_STATS(ip6_dst_idev(skb->dst),
865 IPSTATS_MIB_FRAGOKS);
866 kfree_skb(skb);
867 return err;
869 fail:
870 IP6_INC_STATS(ip6_dst_idev(skb->dst),
871 IPSTATS_MIB_FRAGFAILS);
872 kfree_skb(skb);
873 return err;
876 static inline int ip6_rt_check(struct rt6key *rt_key,
877 struct in6_addr *fl_addr,
878 struct in6_addr *addr_cache)
880 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
881 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
884 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
885 struct dst_entry *dst,
886 struct flowi *fl)
888 struct ipv6_pinfo *np = inet6_sk(sk);
889 struct rt6_info *rt = (struct rt6_info *)dst;
891 if (!dst)
892 goto out;
894 /* Yes, checking route validity in not connected
895 * case is not very simple. Take into account,
896 * that we do not support routing by source, TOS,
897 * and MSG_DONTROUTE --ANK (980726)
899 * 1. ip6_rt_check(): If route was host route,
900 * check that cached destination is current.
901 * If it is network route, we still may
902 * check its validity using saved pointer
903 * to the last used address: daddr_cache.
904 * We do not want to save whole address now,
905 * (because main consumer of this service
906 * is tcp, which has not this problem),
907 * so that the last trick works only on connected
908 * sockets.
909 * 2. oif also should be the same.
911 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
912 #ifdef CONFIG_IPV6_SUBTREES
913 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
914 #endif
915 (fl->oif && fl->oif != dst->dev->ifindex)) {
916 dst_release(dst);
917 dst = NULL;
920 out:
921 return dst;
924 static int ip6_dst_lookup_tail(struct sock *sk,
925 struct dst_entry **dst, struct flowi *fl)
927 int err;
928 struct net *net = sock_net(sk);
930 if (*dst == NULL)
931 *dst = ip6_route_output(net, sk, fl);
933 if ((err = (*dst)->error))
934 goto out_err_release;
936 if (ipv6_addr_any(&fl->fl6_src)) {
937 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
938 &fl->fl6_dst,
939 sk ? inet6_sk(sk)->srcprefs : 0,
940 &fl->fl6_src);
941 if (err)
942 goto out_err_release;
945 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
947 * Here if the dst entry we've looked up
948 * has a neighbour entry that is in the INCOMPLETE
949 * state and the src address from the flow is
950 * marked as OPTIMISTIC, we release the found
951 * dst entry and replace it instead with the
952 * dst entry of the nexthop router
954 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
955 struct inet6_ifaddr *ifp;
956 struct flowi fl_gw;
957 int redirect;
959 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
960 (*dst)->dev, 1);
962 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
963 if (ifp)
964 in6_ifa_put(ifp);
966 if (redirect) {
968 * We need to get the dst entry for the
969 * default router instead
971 dst_release(*dst);
972 memcpy(&fl_gw, fl, sizeof(struct flowi));
973 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
974 *dst = ip6_route_output(net, sk, &fl_gw);
975 if ((err = (*dst)->error))
976 goto out_err_release;
979 #endif
981 return 0;
983 out_err_release:
984 if (err == -ENETUNREACH)
985 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
986 dst_release(*dst);
987 *dst = NULL;
988 return err;
992 * ip6_dst_lookup - perform route lookup on flow
993 * @sk: socket which provides route info
994 * @dst: pointer to dst_entry * for result
995 * @fl: flow to lookup
997 * This function performs a route lookup on the given flow.
999 * It returns zero on success, or a standard errno code on error.
1001 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1003 *dst = NULL;
1004 return ip6_dst_lookup_tail(sk, dst, fl);
1006 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1009 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1010 * @sk: socket which provides the dst cache and route info
1011 * @dst: pointer to dst_entry * for result
1012 * @fl: flow to lookup
1014 * This function performs a route lookup on the given flow with the
1015 * possibility of using the cached route in the socket if it is valid.
1016 * It will take the socket dst lock when operating on the dst cache.
1017 * As a result, this function can only be used in process context.
1019 * It returns zero on success, or a standard errno code on error.
1021 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1023 *dst = NULL;
1024 if (sk) {
1025 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1026 *dst = ip6_sk_dst_check(sk, *dst, fl);
1029 return ip6_dst_lookup_tail(sk, dst, fl);
1031 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1033 static inline int ip6_ufo_append_data(struct sock *sk,
1034 int getfrag(void *from, char *to, int offset, int len,
1035 int odd, struct sk_buff *skb),
1036 void *from, int length, int hh_len, int fragheaderlen,
1037 int transhdrlen, int mtu,unsigned int flags)
1040 struct sk_buff *skb;
1041 int err;
1043 /* There is support for UDP large send offload by network
1044 * device, so create one single skb packet containing complete
1045 * udp datagram
1047 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1048 skb = sock_alloc_send_skb(sk,
1049 hh_len + fragheaderlen + transhdrlen + 20,
1050 (flags & MSG_DONTWAIT), &err);
1051 if (skb == NULL)
1052 return -ENOMEM;
1054 /* reserve space for Hardware header */
1055 skb_reserve(skb, hh_len);
1057 /* create space for UDP/IP header */
1058 skb_put(skb,fragheaderlen + transhdrlen);
1060 /* initialize network header pointer */
1061 skb_reset_network_header(skb);
1063 /* initialize protocol header pointer */
1064 skb->transport_header = skb->network_header + fragheaderlen;
1066 skb->ip_summed = CHECKSUM_PARTIAL;
1067 skb->csum = 0;
1068 sk->sk_sndmsg_off = 0;
1071 err = skb_append_datato_frags(sk,skb, getfrag, from,
1072 (length - transhdrlen));
1073 if (!err) {
1074 struct frag_hdr fhdr;
1076 /* specify the length of each IP datagram fragment*/
1077 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1078 sizeof(struct frag_hdr);
1079 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1080 ipv6_select_ident(skb, &fhdr);
1081 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1082 __skb_queue_tail(&sk->sk_write_queue, skb);
1084 return 0;
1086 /* There is not enough support do UPD LSO,
1087 * so follow normal path
1089 kfree_skb(skb);
1091 return err;
1094 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1095 gfp_t gfp)
1097 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1100 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1101 gfp_t gfp)
1103 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1106 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1107 int offset, int len, int odd, struct sk_buff *skb),
1108 void *from, int length, int transhdrlen,
1109 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1110 struct rt6_info *rt, unsigned int flags)
1112 struct inet_sock *inet = inet_sk(sk);
1113 struct ipv6_pinfo *np = inet6_sk(sk);
1114 struct sk_buff *skb;
1115 unsigned int maxfraglen, fragheaderlen;
1116 int exthdrlen;
1117 int hh_len;
1118 int mtu;
1119 int copy;
1120 int err;
1121 int offset = 0;
1122 int csummode = CHECKSUM_NONE;
1124 if (flags&MSG_PROBE)
1125 return 0;
1126 if (skb_queue_empty(&sk->sk_write_queue)) {
1128 * setup for corking
1130 if (opt) {
1131 if (WARN_ON(np->cork.opt))
1132 return -EINVAL;
1134 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1135 if (unlikely(np->cork.opt == NULL))
1136 return -ENOBUFS;
1138 np->cork.opt->tot_len = opt->tot_len;
1139 np->cork.opt->opt_flen = opt->opt_flen;
1140 np->cork.opt->opt_nflen = opt->opt_nflen;
1142 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1143 sk->sk_allocation);
1144 if (opt->dst0opt && !np->cork.opt->dst0opt)
1145 return -ENOBUFS;
1147 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1148 sk->sk_allocation);
1149 if (opt->dst1opt && !np->cork.opt->dst1opt)
1150 return -ENOBUFS;
1152 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1153 sk->sk_allocation);
1154 if (opt->hopopt && !np->cork.opt->hopopt)
1155 return -ENOBUFS;
1157 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1158 sk->sk_allocation);
1159 if (opt->srcrt && !np->cork.opt->srcrt)
1160 return -ENOBUFS;
1162 /* need source address above miyazawa*/
1164 dst_hold(&rt->u.dst);
1165 inet->cork.dst = &rt->u.dst;
1166 inet->cork.fl = *fl;
1167 np->cork.hop_limit = hlimit;
1168 np->cork.tclass = tclass;
1169 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1170 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1171 if (np->frag_size < mtu) {
1172 if (np->frag_size)
1173 mtu = np->frag_size;
1175 inet->cork.fragsize = mtu;
1176 if (dst_allfrag(rt->u.dst.path))
1177 inet->cork.flags |= IPCORK_ALLFRAG;
1178 inet->cork.length = 0;
1179 sk->sk_sndmsg_page = NULL;
1180 sk->sk_sndmsg_off = 0;
1181 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1182 rt->rt6i_nfheader_len;
1183 length += exthdrlen;
1184 transhdrlen += exthdrlen;
1185 } else {
1186 rt = (struct rt6_info *)inet->cork.dst;
1187 fl = &inet->cork.fl;
1188 opt = np->cork.opt;
1189 transhdrlen = 0;
1190 exthdrlen = 0;
1191 mtu = inet->cork.fragsize;
1194 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1196 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1197 (opt ? opt->opt_nflen : 0);
1198 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1200 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1201 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1202 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1203 return -EMSGSIZE;
1208 * Let's try using as much space as possible.
1209 * Use MTU if total length of the message fits into the MTU.
1210 * Otherwise, we need to reserve fragment header and
1211 * fragment alignment (= 8-15 octects, in total).
1213 * Note that we may need to "move" the data from the tail of
1214 * of the buffer to the new fragment when we split
1215 * the message.
1217 * FIXME: It may be fragmented into multiple chunks
1218 * at once if non-fragmentable extension headers
1219 * are too large.
1220 * --yoshfuji
1223 inet->cork.length += length;
1224 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1225 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1227 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1228 fragheaderlen, transhdrlen, mtu,
1229 flags);
1230 if (err)
1231 goto error;
1232 return 0;
1235 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1236 goto alloc_new_skb;
1238 while (length > 0) {
1239 /* Check if the remaining data fits into current packet. */
1240 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1241 if (copy < length)
1242 copy = maxfraglen - skb->len;
1244 if (copy <= 0) {
1245 char *data;
1246 unsigned int datalen;
1247 unsigned int fraglen;
1248 unsigned int fraggap;
1249 unsigned int alloclen;
1250 struct sk_buff *skb_prev;
1251 alloc_new_skb:
1252 skb_prev = skb;
1254 /* There's no room in the current skb */
1255 if (skb_prev)
1256 fraggap = skb_prev->len - maxfraglen;
1257 else
1258 fraggap = 0;
1261 * If remaining data exceeds the mtu,
1262 * we know we need more fragment(s).
1264 datalen = length + fraggap;
1265 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1266 datalen = maxfraglen - fragheaderlen;
1268 fraglen = datalen + fragheaderlen;
1269 if ((flags & MSG_MORE) &&
1270 !(rt->u.dst.dev->features&NETIF_F_SG))
1271 alloclen = mtu;
1272 else
1273 alloclen = datalen + fragheaderlen;
1276 * The last fragment gets additional space at tail.
1277 * Note: we overallocate on fragments with MSG_MODE
1278 * because we have no idea if we're the last one.
1280 if (datalen == length + fraggap)
1281 alloclen += rt->u.dst.trailer_len;
1284 * We just reserve space for fragment header.
1285 * Note: this may be overallocation if the message
1286 * (without MSG_MORE) fits into the MTU.
1288 alloclen += sizeof(struct frag_hdr);
1290 if (transhdrlen) {
1291 skb = sock_alloc_send_skb(sk,
1292 alloclen + hh_len,
1293 (flags & MSG_DONTWAIT), &err);
1294 } else {
1295 skb = NULL;
1296 if (atomic_read(&sk->sk_wmem_alloc) <=
1297 2 * sk->sk_sndbuf)
1298 skb = sock_wmalloc(sk,
1299 alloclen + hh_len, 1,
1300 sk->sk_allocation);
1301 if (unlikely(skb == NULL))
1302 err = -ENOBUFS;
1304 if (skb == NULL)
1305 goto error;
1307 * Fill in the control structures
1309 skb->ip_summed = csummode;
1310 skb->csum = 0;
1311 /* reserve for fragmentation */
1312 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1315 * Find where to start putting bytes
1317 data = skb_put(skb, fraglen);
1318 skb_set_network_header(skb, exthdrlen);
1319 data += fragheaderlen;
1320 skb->transport_header = (skb->network_header +
1321 fragheaderlen);
1322 if (fraggap) {
1323 skb->csum = skb_copy_and_csum_bits(
1324 skb_prev, maxfraglen,
1325 data + transhdrlen, fraggap, 0);
1326 skb_prev->csum = csum_sub(skb_prev->csum,
1327 skb->csum);
1328 data += fraggap;
1329 pskb_trim_unique(skb_prev, maxfraglen);
1331 copy = datalen - transhdrlen - fraggap;
1332 if (copy < 0) {
1333 err = -EINVAL;
1334 kfree_skb(skb);
1335 goto error;
1336 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1337 err = -EFAULT;
1338 kfree_skb(skb);
1339 goto error;
1342 offset += copy;
1343 length -= datalen - fraggap;
1344 transhdrlen = 0;
1345 exthdrlen = 0;
1346 csummode = CHECKSUM_NONE;
1349 * Put the packet on the pending queue
1351 __skb_queue_tail(&sk->sk_write_queue, skb);
1352 continue;
1355 if (copy > length)
1356 copy = length;
1358 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1359 unsigned int off;
1361 off = skb->len;
1362 if (getfrag(from, skb_put(skb, copy),
1363 offset, copy, off, skb) < 0) {
1364 __skb_trim(skb, off);
1365 err = -EFAULT;
1366 goto error;
1368 } else {
1369 int i = skb_shinfo(skb)->nr_frags;
1370 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1371 struct page *page = sk->sk_sndmsg_page;
1372 int off = sk->sk_sndmsg_off;
1373 unsigned int left;
1375 if (page && (left = PAGE_SIZE - off) > 0) {
1376 if (copy >= left)
1377 copy = left;
1378 if (page != frag->page) {
1379 if (i == MAX_SKB_FRAGS) {
1380 err = -EMSGSIZE;
1381 goto error;
1383 get_page(page);
1384 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1385 frag = &skb_shinfo(skb)->frags[i];
1387 } else if(i < MAX_SKB_FRAGS) {
1388 if (copy > PAGE_SIZE)
1389 copy = PAGE_SIZE;
1390 page = alloc_pages(sk->sk_allocation, 0);
1391 if (page == NULL) {
1392 err = -ENOMEM;
1393 goto error;
1395 sk->sk_sndmsg_page = page;
1396 sk->sk_sndmsg_off = 0;
1398 skb_fill_page_desc(skb, i, page, 0, 0);
1399 frag = &skb_shinfo(skb)->frags[i];
1400 } else {
1401 err = -EMSGSIZE;
1402 goto error;
1404 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1405 err = -EFAULT;
1406 goto error;
1408 sk->sk_sndmsg_off += copy;
1409 frag->size += copy;
1410 skb->len += copy;
1411 skb->data_len += copy;
1412 skb->truesize += copy;
1413 atomic_add(copy, &sk->sk_wmem_alloc);
1415 offset += copy;
1416 length -= copy;
1418 return 0;
1419 error:
1420 inet->cork.length -= length;
1421 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1422 return err;
1425 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1427 if (np->cork.opt) {
1428 kfree(np->cork.opt->dst0opt);
1429 kfree(np->cork.opt->dst1opt);
1430 kfree(np->cork.opt->hopopt);
1431 kfree(np->cork.opt->srcrt);
1432 kfree(np->cork.opt);
1433 np->cork.opt = NULL;
1436 if (inet->cork.dst) {
1437 dst_release(inet->cork.dst);
1438 inet->cork.dst = NULL;
1439 inet->cork.flags &= ~IPCORK_ALLFRAG;
1441 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1444 int ip6_push_pending_frames(struct sock *sk)
1446 struct sk_buff *skb, *tmp_skb;
1447 struct sk_buff **tail_skb;
1448 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1449 struct inet_sock *inet = inet_sk(sk);
1450 struct ipv6_pinfo *np = inet6_sk(sk);
1451 struct ipv6hdr *hdr;
1452 struct ipv6_txoptions *opt = np->cork.opt;
1453 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1454 struct flowi *fl = &inet->cork.fl;
1455 unsigned char proto = fl->proto;
1456 int err = 0;
1458 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1459 goto out;
1460 tail_skb = &(skb_shinfo(skb)->frag_list);
1462 /* move skb->data to ip header from ext header */
1463 if (skb->data < skb_network_header(skb))
1464 __skb_pull(skb, skb_network_offset(skb));
1465 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1466 __skb_pull(tmp_skb, skb_network_header_len(skb));
1467 *tail_skb = tmp_skb;
1468 tail_skb = &(tmp_skb->next);
1469 skb->len += tmp_skb->len;
1470 skb->data_len += tmp_skb->len;
1471 skb->truesize += tmp_skb->truesize;
1472 __sock_put(tmp_skb->sk);
1473 tmp_skb->destructor = NULL;
1474 tmp_skb->sk = NULL;
1477 /* Allow local fragmentation. */
1478 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1479 skb->local_df = 1;
1481 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1482 __skb_pull(skb, skb_network_header_len(skb));
1483 if (opt && opt->opt_flen)
1484 ipv6_push_frag_opts(skb, opt, &proto);
1485 if (opt && opt->opt_nflen)
1486 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1488 skb_push(skb, sizeof(struct ipv6hdr));
1489 skb_reset_network_header(skb);
1490 hdr = ipv6_hdr(skb);
1492 *(__be32*)hdr = fl->fl6_flowlabel |
1493 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1495 hdr->hop_limit = np->cork.hop_limit;
1496 hdr->nexthdr = proto;
1497 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1498 ipv6_addr_copy(&hdr->daddr, final_dst);
1500 skb->priority = sk->sk_priority;
1501 skb->mark = sk->sk_mark;
1503 skb->dst = dst_clone(&rt->u.dst);
1504 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1505 if (proto == IPPROTO_ICMPV6) {
1506 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1508 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1509 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1512 err = ip6_local_out(skb);
1513 if (err) {
1514 if (err > 0)
1515 err = np->recverr ? net_xmit_errno(err) : 0;
1516 if (err)
1517 goto error;
1520 out:
1521 ip6_cork_release(inet, np);
1522 return err;
1523 error:
1524 goto out;
1527 void ip6_flush_pending_frames(struct sock *sk)
1529 struct sk_buff *skb;
1531 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1532 if (skb->dst)
1533 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1534 IPSTATS_MIB_OUTDISCARDS);
1535 kfree_skb(skb);
1538 ip6_cork_release(inet_sk(sk), inet6_sk(sk));