Revert "x86: disable IOMMUs on kernel crash"
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / ip6_output.c
blobeb6d09728633a02c9a8476f48d69847edb45bb0c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
44 #include <net/sock.h>
45 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 int __ip6_local_out(struct sk_buff *skb)
62 int len;
64 len = skb->len - sizeof(struct ipv6hdr);
65 if (len > IPV6_MAXPLEN)
66 len = 0;
67 ipv6_hdr(skb)->payload_len = htons(len);
69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
70 dst_output);
73 int ip6_local_out(struct sk_buff *skb)
75 int err;
77 err = __ip6_local_out(skb);
78 if (likely(err == 1))
79 err = dst_output(skb);
81 return err;
83 EXPORT_SYMBOL_GPL(ip6_local_out);
85 static int ip6_output_finish(struct sk_buff *skb)
87 struct dst_entry *dst = skb_dst(skb);
89 if (dst->hh)
90 return neigh_hh_output(dst->hh, skb);
91 else if (dst->neighbour)
92 return dst->neighbour->output(skb);
94 IP6_INC_STATS_BH(dev_net(dst->dev),
95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
96 kfree_skb(skb);
97 return -EINVAL;
101 /* dev_loopback_xmit for use with netfilter. */
102 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
104 skb_reset_mac_header(newskb);
105 __skb_pull(newskb, skb_network_offset(newskb));
106 newskb->pkt_type = PACKET_LOOPBACK;
107 newskb->ip_summed = CHECKSUM_UNNECESSARY;
108 WARN_ON(!skb_dst(newskb));
110 netif_rx(newskb);
111 return 0;
115 static int ip6_output2(struct sk_buff *skb)
117 struct dst_entry *dst = skb_dst(skb);
118 struct net_device *dev = dst->dev;
120 skb->protocol = htons(ETH_P_IPV6);
121 skb->dev = dev;
123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
124 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
126 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
127 ((mroute6_socket(dev_net(dev)) &&
128 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
129 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
130 &ipv6_hdr(skb)->saddr))) {
131 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
133 /* Do not check for IFF_ALLMULTI; multicast routing
134 is not supported in any case.
136 if (newskb)
137 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
138 NULL, newskb->dev,
139 ip6_dev_loopback_xmit);
141 if (ipv6_hdr(skb)->hop_limit == 0) {
142 IP6_INC_STATS(dev_net(dev), idev,
143 IPSTATS_MIB_OUTDISCARDS);
144 kfree_skb(skb);
145 return 0;
149 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
150 skb->len);
153 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
154 ip6_output_finish);
157 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
159 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
161 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
162 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
165 int ip6_output(struct sk_buff *skb)
167 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
168 if (unlikely(idev->cnf.disable_ipv6)) {
169 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
170 IPSTATS_MIB_OUTDISCARDS);
171 kfree_skb(skb);
172 return 0;
175 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
176 dst_allfrag(skb_dst(skb)))
177 return ip6_fragment(skb, ip6_output2);
178 else
179 return ip6_output2(skb);
183 * xmit an sk_buff (used by TCP)
186 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
187 struct ipv6_txoptions *opt, int ipfragok)
189 struct net *net = sock_net(sk);
190 struct ipv6_pinfo *np = inet6_sk(sk);
191 struct in6_addr *first_hop = &fl->fl6_dst;
192 struct dst_entry *dst = skb_dst(skb);
193 struct ipv6hdr *hdr;
194 u8 proto = fl->proto;
195 int seg_len = skb->len;
196 int hlimit = -1;
197 int tclass = 0;
198 u32 mtu;
200 if (opt) {
201 unsigned int head_room;
203 /* First: exthdrs may take lots of space (~8K for now)
204 MAX_HEADER is not enough.
206 head_room = opt->opt_nflen + opt->opt_flen;
207 seg_len += head_room;
208 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210 if (skb_headroom(skb) < head_room) {
211 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
212 if (skb2 == NULL) {
213 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
214 IPSTATS_MIB_OUTDISCARDS);
215 kfree_skb(skb);
216 return -ENOBUFS;
218 kfree_skb(skb);
219 skb = skb2;
220 if (sk)
221 skb_set_owner_w(skb, sk);
223 if (opt->opt_flen)
224 ipv6_push_frag_opts(skb, opt, &proto);
225 if (opt->opt_nflen)
226 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
229 skb_push(skb, sizeof(struct ipv6hdr));
230 skb_reset_network_header(skb);
231 hdr = ipv6_hdr(skb);
233 /* Allow local fragmentation. */
234 if (ipfragok)
235 skb->local_df = 1;
238 * Fill in the IPv6 header
240 if (np) {
241 tclass = np->tclass;
242 hlimit = np->hop_limit;
244 if (hlimit < 0)
245 hlimit = ip6_dst_hoplimit(dst);
247 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
249 hdr->payload_len = htons(seg_len);
250 hdr->nexthdr = proto;
251 hdr->hop_limit = hlimit;
253 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
254 ipv6_addr_copy(&hdr->daddr, first_hop);
256 skb->priority = sk->sk_priority;
257 skb->mark = sk->sk_mark;
259 mtu = dst_mtu(dst);
260 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
261 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
262 IPSTATS_MIB_OUT, skb->len);
263 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
264 dst_output);
267 if (net_ratelimit())
268 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
269 skb->dev = dst->dev;
270 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
271 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
272 kfree_skb(skb);
273 return -EMSGSIZE;
276 EXPORT_SYMBOL(ip6_xmit);
279 * To avoid extra problems ND packets are send through this
280 * routine. It's code duplication but I really want to avoid
281 * extra checks since ipv6_build_header is used by TCP (which
282 * is for us performance critical)
285 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
286 const struct in6_addr *saddr, const struct in6_addr *daddr,
287 int proto, int len)
289 struct ipv6_pinfo *np = inet6_sk(sk);
290 struct ipv6hdr *hdr;
291 int totlen;
293 skb->protocol = htons(ETH_P_IPV6);
294 skb->dev = dev;
296 totlen = len + sizeof(struct ipv6hdr);
298 skb_reset_network_header(skb);
299 skb_put(skb, sizeof(struct ipv6hdr));
300 hdr = ipv6_hdr(skb);
302 *(__be32*)hdr = htonl(0x60000000);
304 hdr->payload_len = htons(len);
305 hdr->nexthdr = proto;
306 hdr->hop_limit = np->hop_limit;
308 ipv6_addr_copy(&hdr->saddr, saddr);
309 ipv6_addr_copy(&hdr->daddr, daddr);
311 return 0;
314 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316 struct ip6_ra_chain *ra;
317 struct sock *last = NULL;
319 read_lock(&ip6_ra_lock);
320 for (ra = ip6_ra_chain; ra; ra = ra->next) {
321 struct sock *sk = ra->sk;
322 if (sk && ra->sel == sel &&
323 (!sk->sk_bound_dev_if ||
324 sk->sk_bound_dev_if == skb->dev->ifindex)) {
325 if (last) {
326 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
327 if (skb2)
328 rawv6_rcv(last, skb2);
330 last = sk;
334 if (last) {
335 rawv6_rcv(last, skb);
336 read_unlock(&ip6_ra_lock);
337 return 1;
339 read_unlock(&ip6_ra_lock);
340 return 0;
343 static int ip6_forward_proxy_check(struct sk_buff *skb)
345 struct ipv6hdr *hdr = ipv6_hdr(skb);
346 u8 nexthdr = hdr->nexthdr;
347 int offset;
349 if (ipv6_ext_hdr(nexthdr)) {
350 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
351 if (offset < 0)
352 return 0;
353 } else
354 offset = sizeof(struct ipv6hdr);
356 if (nexthdr == IPPROTO_ICMPV6) {
357 struct icmp6hdr *icmp6;
359 if (!pskb_may_pull(skb, (skb_network_header(skb) +
360 offset + 1 - skb->data)))
361 return 0;
363 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
365 switch (icmp6->icmp6_type) {
366 case NDISC_ROUTER_SOLICITATION:
367 case NDISC_ROUTER_ADVERTISEMENT:
368 case NDISC_NEIGHBOUR_SOLICITATION:
369 case NDISC_NEIGHBOUR_ADVERTISEMENT:
370 case NDISC_REDIRECT:
371 /* For reaction involving unicast neighbor discovery
372 * message destined to the proxied address, pass it to
373 * input function.
375 return 1;
376 default:
377 break;
382 * The proxying router can't forward traffic sent to a link-local
383 * address, so signal the sender and discard the packet. This
384 * behavior is clarified by the MIPv6 specification.
386 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
387 dst_link_failure(skb);
388 return -1;
391 return 0;
394 static inline int ip6_forward_finish(struct sk_buff *skb)
396 return dst_output(skb);
399 int ip6_forward(struct sk_buff *skb)
401 struct dst_entry *dst = skb_dst(skb);
402 struct ipv6hdr *hdr = ipv6_hdr(skb);
403 struct inet6_skb_parm *opt = IP6CB(skb);
404 struct net *net = dev_net(dst->dev);
406 if (net->ipv6.devconf_all->forwarding == 0)
407 goto error;
409 if (skb_warn_if_lro(skb))
410 goto drop;
412 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
413 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
414 goto drop;
417 skb_forward_csum(skb);
420 * We DO NOT make any processing on
421 * RA packets, pushing them to user level AS IS
422 * without ane WARRANTY that application will be able
423 * to interpret them. The reason is that we
424 * cannot make anything clever here.
426 * We are not end-node, so that if packet contains
427 * AH/ESP, we cannot make anything.
428 * Defragmentation also would be mistake, RA packets
429 * cannot be fragmented, because there is no warranty
430 * that different fragments will go along one path. --ANK
432 if (opt->ra) {
433 u8 *ptr = skb_network_header(skb) + opt->ra;
434 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
435 return 0;
439 * check and decrement ttl
441 if (hdr->hop_limit <= 1) {
442 /* Force OUTPUT device used as source address */
443 skb->dev = dst->dev;
444 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
445 0, skb->dev);
446 IP6_INC_STATS_BH(net,
447 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
449 kfree_skb(skb);
450 return -ETIMEDOUT;
453 /* XXX: idev->cnf.proxy_ndp? */
454 if (net->ipv6.devconf_all->proxy_ndp &&
455 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
456 int proxied = ip6_forward_proxy_check(skb);
457 if (proxied > 0)
458 return ip6_input(skb);
459 else if (proxied < 0) {
460 IP6_INC_STATS(net, ip6_dst_idev(dst),
461 IPSTATS_MIB_INDISCARDS);
462 goto drop;
466 if (!xfrm6_route_forward(skb)) {
467 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
468 goto drop;
470 dst = skb_dst(skb);
472 /* IPv6 specs say nothing about it, but it is clear that we cannot
473 send redirects to source routed frames.
474 We don't send redirects to frames decapsulated from IPsec.
476 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
477 !skb_sec_path(skb)) {
478 struct in6_addr *target = NULL;
479 struct rt6_info *rt;
480 struct neighbour *n = dst->neighbour;
483 * incoming and outgoing devices are the same
484 * send a redirect.
487 rt = (struct rt6_info *) dst;
488 if ((rt->rt6i_flags & RTF_GATEWAY))
489 target = (struct in6_addr*)&n->primary_key;
490 else
491 target = &hdr->daddr;
493 /* Limit redirects both by destination (here)
494 and by source (inside ndisc_send_redirect)
496 if (xrlim_allow(dst, 1*HZ))
497 ndisc_send_redirect(skb, n, target);
498 } else {
499 int addrtype = ipv6_addr_type(&hdr->saddr);
501 /* This check is security critical. */
502 if (addrtype == IPV6_ADDR_ANY ||
503 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
504 goto error;
505 if (addrtype & IPV6_ADDR_LINKLOCAL) {
506 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
507 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
508 goto error;
512 if (skb->len > dst_mtu(dst)) {
513 /* Again, force OUTPUT device used as source address */
514 skb->dev = dst->dev;
515 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
516 IP6_INC_STATS_BH(net,
517 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
518 IP6_INC_STATS_BH(net,
519 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
520 kfree_skb(skb);
521 return -EMSGSIZE;
524 if (skb_cow(skb, dst->dev->hard_header_len)) {
525 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
526 goto drop;
529 hdr = ipv6_hdr(skb);
531 /* Mangling hops number delayed to point after skb COW */
533 hdr->hop_limit--;
535 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
536 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
537 ip6_forward_finish);
539 error:
540 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
541 drop:
542 kfree_skb(skb);
543 return -EINVAL;
546 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
548 to->pkt_type = from->pkt_type;
549 to->priority = from->priority;
550 to->protocol = from->protocol;
551 skb_dst_drop(to);
552 skb_dst_set(to, dst_clone(skb_dst(from)));
553 to->dev = from->dev;
554 to->mark = from->mark;
556 #ifdef CONFIG_NET_SCHED
557 to->tc_index = from->tc_index;
558 #endif
559 nf_copy(to, from);
560 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
561 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
562 to->nf_trace = from->nf_trace;
563 #endif
564 skb_copy_secmark(to, from);
567 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
569 u16 offset = sizeof(struct ipv6hdr);
570 struct ipv6_opt_hdr *exthdr =
571 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
572 unsigned int packet_len = skb->tail - skb->network_header;
573 int found_rhdr = 0;
574 *nexthdr = &ipv6_hdr(skb)->nexthdr;
576 while (offset + 1 <= packet_len) {
578 switch (**nexthdr) {
580 case NEXTHDR_HOP:
581 break;
582 case NEXTHDR_ROUTING:
583 found_rhdr = 1;
584 break;
585 case NEXTHDR_DEST:
586 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
587 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
588 break;
589 #endif
590 if (found_rhdr)
591 return offset;
592 break;
593 default :
594 return offset;
597 offset += ipv6_optlen(exthdr);
598 *nexthdr = &exthdr->nexthdr;
599 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
600 offset);
603 return offset;
606 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
608 struct sk_buff *frag;
609 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
610 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
611 struct ipv6hdr *tmp_hdr;
612 struct frag_hdr *fh;
613 unsigned int mtu, hlen, left, len;
614 __be32 frag_id = 0;
615 int ptr, offset = 0, err=0;
616 u8 *prevhdr, nexthdr = 0;
617 struct net *net = dev_net(skb_dst(skb)->dev);
619 hlen = ip6_find_1stfragopt(skb, &prevhdr);
620 nexthdr = *prevhdr;
622 mtu = ip6_skb_dst_mtu(skb);
624 /* We must not fragment if the socket is set to force MTU discovery
625 * or if the skb it not generated by a local socket. (This last
626 * check should be redundant, but it's free.)
628 if (!skb->local_df) {
629 skb->dev = skb_dst(skb)->dev;
630 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
631 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
632 IPSTATS_MIB_FRAGFAILS);
633 kfree_skb(skb);
634 return -EMSGSIZE;
637 if (np && np->frag_size < mtu) {
638 if (np->frag_size)
639 mtu = np->frag_size;
641 mtu -= hlen + sizeof(struct frag_hdr);
643 if (skb_has_frags(skb)) {
644 int first_len = skb_pagelen(skb);
645 int truesizes = 0;
647 if (first_len - hlen > mtu ||
648 ((first_len - hlen) & 7) ||
649 skb_cloned(skb))
650 goto slow_path;
652 skb_walk_frags(skb, frag) {
653 /* Correct geometry. */
654 if (frag->len > mtu ||
655 ((frag->len & 7) && frag->next) ||
656 skb_headroom(frag) < hlen)
657 goto slow_path;
659 /* Partially cloned skb? */
660 if (skb_shared(frag))
661 goto slow_path;
663 BUG_ON(frag->sk);
664 if (skb->sk) {
665 frag->sk = skb->sk;
666 frag->destructor = sock_wfree;
667 truesizes += frag->truesize;
671 err = 0;
672 offset = 0;
673 frag = skb_shinfo(skb)->frag_list;
674 skb_frag_list_init(skb);
675 /* BUILD HEADER */
677 *prevhdr = NEXTHDR_FRAGMENT;
678 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
679 if (!tmp_hdr) {
680 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
681 IPSTATS_MIB_FRAGFAILS);
682 return -ENOMEM;
685 __skb_pull(skb, hlen);
686 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
687 __skb_push(skb, hlen);
688 skb_reset_network_header(skb);
689 memcpy(skb_network_header(skb), tmp_hdr, hlen);
691 ipv6_select_ident(fh);
692 fh->nexthdr = nexthdr;
693 fh->reserved = 0;
694 fh->frag_off = htons(IP6_MF);
695 frag_id = fh->identification;
697 first_len = skb_pagelen(skb);
698 skb->data_len = first_len - skb_headlen(skb);
699 skb->truesize -= truesizes;
700 skb->len = first_len;
701 ipv6_hdr(skb)->payload_len = htons(first_len -
702 sizeof(struct ipv6hdr));
704 dst_hold(&rt->u.dst);
706 for (;;) {
707 /* Prepare header of the next frame,
708 * before previous one went down. */
709 if (frag) {
710 frag->ip_summed = CHECKSUM_NONE;
711 skb_reset_transport_header(frag);
712 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
713 __skb_push(frag, hlen);
714 skb_reset_network_header(frag);
715 memcpy(skb_network_header(frag), tmp_hdr,
716 hlen);
717 offset += skb->len - hlen - sizeof(struct frag_hdr);
718 fh->nexthdr = nexthdr;
719 fh->reserved = 0;
720 fh->frag_off = htons(offset);
721 if (frag->next != NULL)
722 fh->frag_off |= htons(IP6_MF);
723 fh->identification = frag_id;
724 ipv6_hdr(frag)->payload_len =
725 htons(frag->len -
726 sizeof(struct ipv6hdr));
727 ip6_copy_metadata(frag, skb);
730 err = output(skb);
731 if(!err)
732 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
733 IPSTATS_MIB_FRAGCREATES);
735 if (err || !frag)
736 break;
738 skb = frag;
739 frag = skb->next;
740 skb->next = NULL;
743 kfree(tmp_hdr);
745 if (err == 0) {
746 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
747 IPSTATS_MIB_FRAGOKS);
748 dst_release(&rt->u.dst);
749 return 0;
752 while (frag) {
753 skb = frag->next;
754 kfree_skb(frag);
755 frag = skb;
758 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
759 IPSTATS_MIB_FRAGFAILS);
760 dst_release(&rt->u.dst);
761 return err;
764 slow_path:
765 left = skb->len - hlen; /* Space per frame */
766 ptr = hlen; /* Where to start from */
769 * Fragment the datagram.
772 *prevhdr = NEXTHDR_FRAGMENT;
775 * Keep copying data until we run out.
777 while(left > 0) {
778 len = left;
779 /* IF: it doesn't fit, use 'mtu' - the data space left */
780 if (len > mtu)
781 len = mtu;
782 /* IF: we are not sending upto and including the packet end
783 then align the next start on an eight byte boundary */
784 if (len < left) {
785 len &= ~7;
788 * Allocate buffer.
791 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
792 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
793 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
794 IPSTATS_MIB_FRAGFAILS);
795 err = -ENOMEM;
796 goto fail;
800 * Set up data on packet
803 ip6_copy_metadata(frag, skb);
804 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
805 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
806 skb_reset_network_header(frag);
807 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
808 frag->transport_header = (frag->network_header + hlen +
809 sizeof(struct frag_hdr));
812 * Charge the memory for the fragment to any owner
813 * it might possess
815 if (skb->sk)
816 skb_set_owner_w(frag, skb->sk);
819 * Copy the packet header into the new buffer.
821 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
824 * Build fragment header.
826 fh->nexthdr = nexthdr;
827 fh->reserved = 0;
828 if (!frag_id) {
829 ipv6_select_ident(fh);
830 frag_id = fh->identification;
831 } else
832 fh->identification = frag_id;
835 * Copy a block of the IP datagram.
837 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
838 BUG();
839 left -= len;
841 fh->frag_off = htons(offset);
842 if (left > 0)
843 fh->frag_off |= htons(IP6_MF);
844 ipv6_hdr(frag)->payload_len = htons(frag->len -
845 sizeof(struct ipv6hdr));
847 ptr += len;
848 offset += len;
851 * Put this fragment into the sending queue.
853 err = output(frag);
854 if (err)
855 goto fail;
857 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
858 IPSTATS_MIB_FRAGCREATES);
860 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
861 IPSTATS_MIB_FRAGOKS);
862 kfree_skb(skb);
863 return err;
865 fail:
866 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867 IPSTATS_MIB_FRAGFAILS);
868 kfree_skb(skb);
869 return err;
872 static inline int ip6_rt_check(struct rt6key *rt_key,
873 struct in6_addr *fl_addr,
874 struct in6_addr *addr_cache)
876 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
880 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 struct dst_entry *dst,
882 struct flowi *fl)
884 struct ipv6_pinfo *np = inet6_sk(sk);
885 struct rt6_info *rt = (struct rt6_info *)dst;
887 if (!dst)
888 goto out;
890 /* Yes, checking route validity in not connected
891 * case is not very simple. Take into account,
892 * that we do not support routing by source, TOS,
893 * and MSG_DONTROUTE --ANK (980726)
895 * 1. ip6_rt_check(): If route was host route,
896 * check that cached destination is current.
897 * If it is network route, we still may
898 * check its validity using saved pointer
899 * to the last used address: daddr_cache.
900 * We do not want to save whole address now,
901 * (because main consumer of this service
902 * is tcp, which has not this problem),
903 * so that the last trick works only on connected
904 * sockets.
905 * 2. oif also should be the same.
907 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
908 #ifdef CONFIG_IPV6_SUBTREES
909 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
910 #endif
911 (fl->oif && fl->oif != dst->dev->ifindex)) {
912 dst_release(dst);
913 dst = NULL;
916 out:
917 return dst;
920 static int ip6_dst_lookup_tail(struct sock *sk,
921 struct dst_entry **dst, struct flowi *fl)
923 int err;
924 struct net *net = sock_net(sk);
926 if (*dst == NULL)
927 *dst = ip6_route_output(net, sk, fl);
929 if ((err = (*dst)->error))
930 goto out_err_release;
932 if (ipv6_addr_any(&fl->fl6_src)) {
933 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
934 &fl->fl6_dst,
935 sk ? inet6_sk(sk)->srcprefs : 0,
936 &fl->fl6_src);
937 if (err)
938 goto out_err_release;
941 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
943 * Here if the dst entry we've looked up
944 * has a neighbour entry that is in the INCOMPLETE
945 * state and the src address from the flow is
946 * marked as OPTIMISTIC, we release the found
947 * dst entry and replace it instead with the
948 * dst entry of the nexthop router
950 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
951 struct inet6_ifaddr *ifp;
952 struct flowi fl_gw;
953 int redirect;
955 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
956 (*dst)->dev, 1);
958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959 if (ifp)
960 in6_ifa_put(ifp);
962 if (redirect) {
964 * We need to get the dst entry for the
965 * default router instead
967 dst_release(*dst);
968 memcpy(&fl_gw, fl, sizeof(struct flowi));
969 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
970 *dst = ip6_route_output(net, sk, &fl_gw);
971 if ((err = (*dst)->error))
972 goto out_err_release;
975 #endif
977 return 0;
979 out_err_release:
980 if (err == -ENETUNREACH)
981 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
982 dst_release(*dst);
983 *dst = NULL;
984 return err;
988 * ip6_dst_lookup - perform route lookup on flow
989 * @sk: socket which provides route info
990 * @dst: pointer to dst_entry * for result
991 * @fl: flow to lookup
993 * This function performs a route lookup on the given flow.
995 * It returns zero on success, or a standard errno code on error.
997 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
999 *dst = NULL;
1000 return ip6_dst_lookup_tail(sk, dst, fl);
1002 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1005 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1006 * @sk: socket which provides the dst cache and route info
1007 * @dst: pointer to dst_entry * for result
1008 * @fl: flow to lookup
1010 * This function performs a route lookup on the given flow with the
1011 * possibility of using the cached route in the socket if it is valid.
1012 * It will take the socket dst lock when operating on the dst cache.
1013 * As a result, this function can only be used in process context.
1015 * It returns zero on success, or a standard errno code on error.
1017 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1019 *dst = NULL;
1020 if (sk) {
1021 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1022 *dst = ip6_sk_dst_check(sk, *dst, fl);
1025 return ip6_dst_lookup_tail(sk, dst, fl);
1027 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1029 static inline int ip6_ufo_append_data(struct sock *sk,
1030 int getfrag(void *from, char *to, int offset, int len,
1031 int odd, struct sk_buff *skb),
1032 void *from, int length, int hh_len, int fragheaderlen,
1033 int transhdrlen, int mtu,unsigned int flags)
1036 struct sk_buff *skb;
1037 int err;
1039 /* There is support for UDP large send offload by network
1040 * device, so create one single skb packet containing complete
1041 * udp datagram
1043 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1044 skb = sock_alloc_send_skb(sk,
1045 hh_len + fragheaderlen + transhdrlen + 20,
1046 (flags & MSG_DONTWAIT), &err);
1047 if (skb == NULL)
1048 return -ENOMEM;
1050 /* reserve space for Hardware header */
1051 skb_reserve(skb, hh_len);
1053 /* create space for UDP/IP header */
1054 skb_put(skb,fragheaderlen + transhdrlen);
1056 /* initialize network header pointer */
1057 skb_reset_network_header(skb);
1059 /* initialize protocol header pointer */
1060 skb->transport_header = skb->network_header + fragheaderlen;
1062 skb->ip_summed = CHECKSUM_PARTIAL;
1063 skb->csum = 0;
1064 sk->sk_sndmsg_off = 0;
1067 err = skb_append_datato_frags(sk,skb, getfrag, from,
1068 (length - transhdrlen));
1069 if (!err) {
1070 struct frag_hdr fhdr;
1072 /* Specify the length of each IPv6 datagram fragment.
1073 * It has to be a multiple of 8.
1075 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1076 sizeof(struct frag_hdr)) & ~7;
1077 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1078 ipv6_select_ident(&fhdr);
1079 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1080 __skb_queue_tail(&sk->sk_write_queue, skb);
1082 return 0;
1084 /* There is not enough support do UPD LSO,
1085 * so follow normal path
1087 kfree_skb(skb);
1089 return err;
1092 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1093 gfp_t gfp)
1095 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1098 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1099 gfp_t gfp)
1101 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1104 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1105 int offset, int len, int odd, struct sk_buff *skb),
1106 void *from, int length, int transhdrlen,
1107 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1108 struct rt6_info *rt, unsigned int flags)
1110 struct inet_sock *inet = inet_sk(sk);
1111 struct ipv6_pinfo *np = inet6_sk(sk);
1112 struct sk_buff *skb;
1113 unsigned int maxfraglen, fragheaderlen;
1114 int exthdrlen;
1115 int hh_len;
1116 int mtu;
1117 int copy;
1118 int err;
1119 int offset = 0;
1120 int csummode = CHECKSUM_NONE;
1122 if (flags&MSG_PROBE)
1123 return 0;
1124 if (skb_queue_empty(&sk->sk_write_queue)) {
1126 * setup for corking
1128 if (opt) {
1129 if (WARN_ON(np->cork.opt))
1130 return -EINVAL;
1132 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1133 if (unlikely(np->cork.opt == NULL))
1134 return -ENOBUFS;
1136 np->cork.opt->tot_len = opt->tot_len;
1137 np->cork.opt->opt_flen = opt->opt_flen;
1138 np->cork.opt->opt_nflen = opt->opt_nflen;
1140 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1141 sk->sk_allocation);
1142 if (opt->dst0opt && !np->cork.opt->dst0opt)
1143 return -ENOBUFS;
1145 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1146 sk->sk_allocation);
1147 if (opt->dst1opt && !np->cork.opt->dst1opt)
1148 return -ENOBUFS;
1150 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1151 sk->sk_allocation);
1152 if (opt->hopopt && !np->cork.opt->hopopt)
1153 return -ENOBUFS;
1155 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1156 sk->sk_allocation);
1157 if (opt->srcrt && !np->cork.opt->srcrt)
1158 return -ENOBUFS;
1160 /* need source address above miyazawa*/
1162 dst_hold(&rt->u.dst);
1163 inet->cork.dst = &rt->u.dst;
1164 inet->cork.fl = *fl;
1165 np->cork.hop_limit = hlimit;
1166 np->cork.tclass = tclass;
1167 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1168 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1169 if (np->frag_size < mtu) {
1170 if (np->frag_size)
1171 mtu = np->frag_size;
1173 inet->cork.fragsize = mtu;
1174 if (dst_allfrag(rt->u.dst.path))
1175 inet->cork.flags |= IPCORK_ALLFRAG;
1176 inet->cork.length = 0;
1177 sk->sk_sndmsg_page = NULL;
1178 sk->sk_sndmsg_off = 0;
1179 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1180 rt->rt6i_nfheader_len;
1181 length += exthdrlen;
1182 transhdrlen += exthdrlen;
1183 } else {
1184 rt = (struct rt6_info *)inet->cork.dst;
1185 fl = &inet->cork.fl;
1186 opt = np->cork.opt;
1187 transhdrlen = 0;
1188 exthdrlen = 0;
1189 mtu = inet->cork.fragsize;
1192 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1194 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1195 (opt ? opt->opt_nflen : 0);
1196 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1198 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1199 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1200 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1201 return -EMSGSIZE;
1206 * Let's try using as much space as possible.
1207 * Use MTU if total length of the message fits into the MTU.
1208 * Otherwise, we need to reserve fragment header and
1209 * fragment alignment (= 8-15 octects, in total).
1211 * Note that we may need to "move" the data from the tail of
1212 * of the buffer to the new fragment when we split
1213 * the message.
1215 * FIXME: It may be fragmented into multiple chunks
1216 * at once if non-fragmentable extension headers
1217 * are too large.
1218 * --yoshfuji
1221 inet->cork.length += length;
1222 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1223 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1225 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1226 fragheaderlen, transhdrlen, mtu,
1227 flags);
1228 if (err)
1229 goto error;
1230 return 0;
1233 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1234 goto alloc_new_skb;
1236 while (length > 0) {
1237 /* Check if the remaining data fits into current packet. */
1238 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1239 if (copy < length)
1240 copy = maxfraglen - skb->len;
1242 if (copy <= 0) {
1243 char *data;
1244 unsigned int datalen;
1245 unsigned int fraglen;
1246 unsigned int fraggap;
1247 unsigned int alloclen;
1248 struct sk_buff *skb_prev;
1249 alloc_new_skb:
1250 skb_prev = skb;
1252 /* There's no room in the current skb */
1253 if (skb_prev)
1254 fraggap = skb_prev->len - maxfraglen;
1255 else
1256 fraggap = 0;
1259 * If remaining data exceeds the mtu,
1260 * we know we need more fragment(s).
1262 datalen = length + fraggap;
1263 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1264 datalen = maxfraglen - fragheaderlen;
1266 fraglen = datalen + fragheaderlen;
1267 if ((flags & MSG_MORE) &&
1268 !(rt->u.dst.dev->features&NETIF_F_SG))
1269 alloclen = mtu;
1270 else
1271 alloclen = datalen + fragheaderlen;
1274 * The last fragment gets additional space at tail.
1275 * Note: we overallocate on fragments with MSG_MODE
1276 * because we have no idea if we're the last one.
1278 if (datalen == length + fraggap)
1279 alloclen += rt->u.dst.trailer_len;
1282 * We just reserve space for fragment header.
1283 * Note: this may be overallocation if the message
1284 * (without MSG_MORE) fits into the MTU.
1286 alloclen += sizeof(struct frag_hdr);
1288 if (transhdrlen) {
1289 skb = sock_alloc_send_skb(sk,
1290 alloclen + hh_len,
1291 (flags & MSG_DONTWAIT), &err);
1292 } else {
1293 skb = NULL;
1294 if (atomic_read(&sk->sk_wmem_alloc) <=
1295 2 * sk->sk_sndbuf)
1296 skb = sock_wmalloc(sk,
1297 alloclen + hh_len, 1,
1298 sk->sk_allocation);
1299 if (unlikely(skb == NULL))
1300 err = -ENOBUFS;
1302 if (skb == NULL)
1303 goto error;
1305 * Fill in the control structures
1307 skb->ip_summed = csummode;
1308 skb->csum = 0;
1309 /* reserve for fragmentation */
1310 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1313 * Find where to start putting bytes
1315 data = skb_put(skb, fraglen);
1316 skb_set_network_header(skb, exthdrlen);
1317 data += fragheaderlen;
1318 skb->transport_header = (skb->network_header +
1319 fragheaderlen);
1320 if (fraggap) {
1321 skb->csum = skb_copy_and_csum_bits(
1322 skb_prev, maxfraglen,
1323 data + transhdrlen, fraggap, 0);
1324 skb_prev->csum = csum_sub(skb_prev->csum,
1325 skb->csum);
1326 data += fraggap;
1327 pskb_trim_unique(skb_prev, maxfraglen);
1329 copy = datalen - transhdrlen - fraggap;
1330 if (copy < 0) {
1331 err = -EINVAL;
1332 kfree_skb(skb);
1333 goto error;
1334 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1335 err = -EFAULT;
1336 kfree_skb(skb);
1337 goto error;
1340 offset += copy;
1341 length -= datalen - fraggap;
1342 transhdrlen = 0;
1343 exthdrlen = 0;
1344 csummode = CHECKSUM_NONE;
1347 * Put the packet on the pending queue
1349 __skb_queue_tail(&sk->sk_write_queue, skb);
1350 continue;
1353 if (copy > length)
1354 copy = length;
1356 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1357 unsigned int off;
1359 off = skb->len;
1360 if (getfrag(from, skb_put(skb, copy),
1361 offset, copy, off, skb) < 0) {
1362 __skb_trim(skb, off);
1363 err = -EFAULT;
1364 goto error;
1366 } else {
1367 int i = skb_shinfo(skb)->nr_frags;
1368 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1369 struct page *page = sk->sk_sndmsg_page;
1370 int off = sk->sk_sndmsg_off;
1371 unsigned int left;
1373 if (page && (left = PAGE_SIZE - off) > 0) {
1374 if (copy >= left)
1375 copy = left;
1376 if (page != frag->page) {
1377 if (i == MAX_SKB_FRAGS) {
1378 err = -EMSGSIZE;
1379 goto error;
1381 get_page(page);
1382 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1383 frag = &skb_shinfo(skb)->frags[i];
1385 } else if(i < MAX_SKB_FRAGS) {
1386 if (copy > PAGE_SIZE)
1387 copy = PAGE_SIZE;
1388 page = alloc_pages(sk->sk_allocation, 0);
1389 if (page == NULL) {
1390 err = -ENOMEM;
1391 goto error;
1393 sk->sk_sndmsg_page = page;
1394 sk->sk_sndmsg_off = 0;
1396 skb_fill_page_desc(skb, i, page, 0, 0);
1397 frag = &skb_shinfo(skb)->frags[i];
1398 } else {
1399 err = -EMSGSIZE;
1400 goto error;
1402 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1403 err = -EFAULT;
1404 goto error;
1406 sk->sk_sndmsg_off += copy;
1407 frag->size += copy;
1408 skb->len += copy;
1409 skb->data_len += copy;
1410 skb->truesize += copy;
1411 atomic_add(copy, &sk->sk_wmem_alloc);
1413 offset += copy;
1414 length -= copy;
1416 return 0;
1417 error:
1418 inet->cork.length -= length;
1419 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1420 return err;
1423 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1425 if (np->cork.opt) {
1426 kfree(np->cork.opt->dst0opt);
1427 kfree(np->cork.opt->dst1opt);
1428 kfree(np->cork.opt->hopopt);
1429 kfree(np->cork.opt->srcrt);
1430 kfree(np->cork.opt);
1431 np->cork.opt = NULL;
1434 if (inet->cork.dst) {
1435 dst_release(inet->cork.dst);
1436 inet->cork.dst = NULL;
1437 inet->cork.flags &= ~IPCORK_ALLFRAG;
1439 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1442 int ip6_push_pending_frames(struct sock *sk)
1444 struct sk_buff *skb, *tmp_skb;
1445 struct sk_buff **tail_skb;
1446 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1447 struct inet_sock *inet = inet_sk(sk);
1448 struct ipv6_pinfo *np = inet6_sk(sk);
1449 struct net *net = sock_net(sk);
1450 struct ipv6hdr *hdr;
1451 struct ipv6_txoptions *opt = np->cork.opt;
1452 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1453 struct flowi *fl = &inet->cork.fl;
1454 unsigned char proto = fl->proto;
1455 int err = 0;
1457 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1458 goto out;
1459 tail_skb = &(skb_shinfo(skb)->frag_list);
1461 /* move skb->data to ip header from ext header */
1462 if (skb->data < skb_network_header(skb))
1463 __skb_pull(skb, skb_network_offset(skb));
1464 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1465 __skb_pull(tmp_skb, skb_network_header_len(skb));
1466 *tail_skb = tmp_skb;
1467 tail_skb = &(tmp_skb->next);
1468 skb->len += tmp_skb->len;
1469 skb->data_len += tmp_skb->len;
1470 skb->truesize += tmp_skb->truesize;
1471 tmp_skb->destructor = NULL;
1472 tmp_skb->sk = NULL;
1475 /* Allow local fragmentation. */
1476 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1477 skb->local_df = 1;
1479 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1480 __skb_pull(skb, skb_network_header_len(skb));
1481 if (opt && opt->opt_flen)
1482 ipv6_push_frag_opts(skb, opt, &proto);
1483 if (opt && opt->opt_nflen)
1484 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1486 skb_push(skb, sizeof(struct ipv6hdr));
1487 skb_reset_network_header(skb);
1488 hdr = ipv6_hdr(skb);
1490 *(__be32*)hdr = fl->fl6_flowlabel |
1491 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1493 hdr->hop_limit = np->cork.hop_limit;
1494 hdr->nexthdr = proto;
1495 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1496 ipv6_addr_copy(&hdr->daddr, final_dst);
1498 skb->priority = sk->sk_priority;
1499 skb->mark = sk->sk_mark;
1501 skb_dst_set(skb, dst_clone(&rt->u.dst));
1502 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1503 if (proto == IPPROTO_ICMPV6) {
1504 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1506 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1507 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1510 err = ip6_local_out(skb);
1511 if (err) {
1512 if (err > 0)
1513 err = net_xmit_errno(err);
1514 if (err)
1515 goto error;
1518 out:
1519 ip6_cork_release(inet, np);
1520 return err;
1521 error:
1522 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1523 goto out;
1526 void ip6_flush_pending_frames(struct sock *sk)
1528 struct sk_buff *skb;
1530 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1531 if (skb_dst(skb))
1532 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1533 IPSTATS_MIB_OUTDISCARDS);
1534 kfree_skb(skb);
1537 ip6_cork_release(inet_sk(sk), inet6_sk(sk));