arch/powerpc: Move dma_mask from of_device into pdev_archdata
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / ip6_output.c
blob65f9c379df386c6de0f70173889038f4a7d17909
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
45 #include <net/sock.h>
46 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 int __ip6_local_out(struct sk_buff *skb)
63 int len;
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
70 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
71 dst_output);
74 int ip6_local_out(struct sk_buff *skb)
76 int err;
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
82 return err;
84 EXPORT_SYMBOL_GPL(ip6_local_out);
86 static int ip6_output_finish(struct sk_buff *skb)
88 struct dst_entry *dst = skb_dst(skb);
90 if (dst->hh)
91 return neigh_hh_output(dst->hh, skb);
92 else if (dst->neighbour)
93 return dst->neighbour->output(skb);
95 IP6_INC_STATS_BH(dev_net(dst->dev),
96 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
97 kfree_skb(skb);
98 return -EINVAL;
102 /* dev_loopback_xmit for use with netfilter. */
103 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
105 skb_reset_mac_header(newskb);
106 __skb_pull(newskb, skb_network_offset(newskb));
107 newskb->pkt_type = PACKET_LOOPBACK;
108 newskb->ip_summed = CHECKSUM_UNNECESSARY;
109 WARN_ON(!skb_dst(newskb));
111 netif_rx_ni(newskb);
112 return 0;
116 static int ip6_output2(struct sk_buff *skb)
118 struct dst_entry *dst = skb_dst(skb);
119 struct net_device *dev = dst->dev;
121 skb->protocol = htons(ETH_P_IPV6);
122 skb->dev = dev;
124 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
125 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
127 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
128 ((mroute6_socket(dev_net(dev)) &&
129 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
130 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
131 &ipv6_hdr(skb)->saddr))) {
132 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
134 /* Do not check for IFF_ALLMULTI; multicast routing
135 is not supported in any case.
137 if (newskb)
138 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
139 NULL, newskb->dev,
140 ip6_dev_loopback_xmit);
142 if (ipv6_hdr(skb)->hop_limit == 0) {
143 IP6_INC_STATS(dev_net(dev), idev,
144 IPSTATS_MIB_OUTDISCARDS);
145 kfree_skb(skb);
146 return 0;
150 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
151 skb->len);
154 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
155 ip6_output_finish);
158 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
160 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
162 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
163 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
166 int ip6_output(struct sk_buff *skb)
168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169 if (unlikely(idev->cnf.disable_ipv6)) {
170 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
171 IPSTATS_MIB_OUTDISCARDS);
172 kfree_skb(skb);
173 return 0;
176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177 dst_allfrag(skb_dst(skb)))
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
184 * xmit an sk_buff (used by TCP)
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
190 struct net *net = sock_net(sk);
191 struct ipv6_pinfo *np = inet6_sk(sk);
192 struct in6_addr *first_hop = &fl->fl6_dst;
193 struct dst_entry *dst = skb_dst(skb);
194 struct ipv6hdr *hdr;
195 u8 proto = fl->proto;
196 int seg_len = skb->len;
197 int hlimit = -1;
198 int tclass = 0;
199 u32 mtu;
201 if (opt) {
202 unsigned int head_room;
204 /* First: exthdrs may take lots of space (~8K for now)
205 MAX_HEADER is not enough.
207 head_room = opt->opt_nflen + opt->opt_flen;
208 seg_len += head_room;
209 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
211 if (skb_headroom(skb) < head_room) {
212 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
213 if (skb2 == NULL) {
214 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
215 IPSTATS_MIB_OUTDISCARDS);
216 kfree_skb(skb);
217 return -ENOBUFS;
219 kfree_skb(skb);
220 skb = skb2;
221 if (sk)
222 skb_set_owner_w(skb, sk);
224 if (opt->opt_flen)
225 ipv6_push_frag_opts(skb, opt, &proto);
226 if (opt->opt_nflen)
227 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
230 skb_push(skb, sizeof(struct ipv6hdr));
231 skb_reset_network_header(skb);
232 hdr = ipv6_hdr(skb);
234 /* Allow local fragmentation. */
235 if (ipfragok)
236 skb->local_df = 1;
239 * Fill in the IPv6 header
241 if (np) {
242 tclass = np->tclass;
243 hlimit = np->hop_limit;
245 if (hlimit < 0)
246 hlimit = ip6_dst_hoplimit(dst);
248 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
250 hdr->payload_len = htons(seg_len);
251 hdr->nexthdr = proto;
252 hdr->hop_limit = hlimit;
254 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
255 ipv6_addr_copy(&hdr->daddr, first_hop);
257 skb->priority = sk->sk_priority;
258 skb->mark = sk->sk_mark;
260 mtu = dst_mtu(dst);
261 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
262 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
263 IPSTATS_MIB_OUT, skb->len);
264 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
265 dst_output);
268 if (net_ratelimit())
269 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
270 skb->dev = dst->dev;
271 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
272 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
273 kfree_skb(skb);
274 return -EMSGSIZE;
277 EXPORT_SYMBOL(ip6_xmit);
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
286 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
287 const struct in6_addr *saddr, const struct in6_addr *daddr,
288 int proto, int len)
290 struct ipv6_pinfo *np = inet6_sk(sk);
291 struct ipv6hdr *hdr;
292 int totlen;
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->dev = dev;
297 totlen = len + sizeof(struct ipv6hdr);
299 skb_reset_network_header(skb);
300 skb_put(skb, sizeof(struct ipv6hdr));
301 hdr = ipv6_hdr(skb);
303 *(__be32*)hdr = htonl(0x60000000);
305 hdr->payload_len = htons(len);
306 hdr->nexthdr = proto;
307 hdr->hop_limit = np->hop_limit;
309 ipv6_addr_copy(&hdr->saddr, saddr);
310 ipv6_addr_copy(&hdr->daddr, daddr);
312 return 0;
315 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
317 struct ip6_ra_chain *ra;
318 struct sock *last = NULL;
320 read_lock(&ip6_ra_lock);
321 for (ra = ip6_ra_chain; ra; ra = ra->next) {
322 struct sock *sk = ra->sk;
323 if (sk && ra->sel == sel &&
324 (!sk->sk_bound_dev_if ||
325 sk->sk_bound_dev_if == skb->dev->ifindex)) {
326 if (last) {
327 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328 if (skb2)
329 rawv6_rcv(last, skb2);
331 last = sk;
335 if (last) {
336 rawv6_rcv(last, skb);
337 read_unlock(&ip6_ra_lock);
338 return 1;
340 read_unlock(&ip6_ra_lock);
341 return 0;
344 static int ip6_forward_proxy_check(struct sk_buff *skb)
346 struct ipv6hdr *hdr = ipv6_hdr(skb);
347 u8 nexthdr = hdr->nexthdr;
348 int offset;
350 if (ipv6_ext_hdr(nexthdr)) {
351 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
352 if (offset < 0)
353 return 0;
354 } else
355 offset = sizeof(struct ipv6hdr);
357 if (nexthdr == IPPROTO_ICMPV6) {
358 struct icmp6hdr *icmp6;
360 if (!pskb_may_pull(skb, (skb_network_header(skb) +
361 offset + 1 - skb->data)))
362 return 0;
364 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
366 switch (icmp6->icmp6_type) {
367 case NDISC_ROUTER_SOLICITATION:
368 case NDISC_ROUTER_ADVERTISEMENT:
369 case NDISC_NEIGHBOUR_SOLICITATION:
370 case NDISC_NEIGHBOUR_ADVERTISEMENT:
371 case NDISC_REDIRECT:
372 /* For reaction involving unicast neighbor discovery
373 * message destined to the proxied address, pass it to
374 * input function.
376 return 1;
377 default:
378 break;
383 * The proxying router can't forward traffic sent to a link-local
384 * address, so signal the sender and discard the packet. This
385 * behavior is clarified by the MIPv6 specification.
387 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
388 dst_link_failure(skb);
389 return -1;
392 return 0;
395 static inline int ip6_forward_finish(struct sk_buff *skb)
397 return dst_output(skb);
400 int ip6_forward(struct sk_buff *skb)
402 struct dst_entry *dst = skb_dst(skb);
403 struct ipv6hdr *hdr = ipv6_hdr(skb);
404 struct inet6_skb_parm *opt = IP6CB(skb);
405 struct net *net = dev_net(dst->dev);
406 u32 mtu;
408 if (net->ipv6.devconf_all->forwarding == 0)
409 goto error;
411 if (skb_warn_if_lro(skb))
412 goto drop;
414 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
415 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
416 goto drop;
419 skb_forward_csum(skb);
422 * We DO NOT make any processing on
423 * RA packets, pushing them to user level AS IS
424 * without ane WARRANTY that application will be able
425 * to interpret them. The reason is that we
426 * cannot make anything clever here.
428 * We are not end-node, so that if packet contains
429 * AH/ESP, we cannot make anything.
430 * Defragmentation also would be mistake, RA packets
431 * cannot be fragmented, because there is no warranty
432 * that different fragments will go along one path. --ANK
434 if (opt->ra) {
435 u8 *ptr = skb_network_header(skb) + opt->ra;
436 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
437 return 0;
441 * check and decrement ttl
443 if (hdr->hop_limit <= 1) {
444 /* Force OUTPUT device used as source address */
445 skb->dev = dst->dev;
446 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
447 IP6_INC_STATS_BH(net,
448 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
450 kfree_skb(skb);
451 return -ETIMEDOUT;
454 /* XXX: idev->cnf.proxy_ndp? */
455 if (net->ipv6.devconf_all->proxy_ndp &&
456 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
457 int proxied = ip6_forward_proxy_check(skb);
458 if (proxied > 0)
459 return ip6_input(skb);
460 else if (proxied < 0) {
461 IP6_INC_STATS(net, ip6_dst_idev(dst),
462 IPSTATS_MIB_INDISCARDS);
463 goto drop;
467 if (!xfrm6_route_forward(skb)) {
468 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
469 goto drop;
471 dst = skb_dst(skb);
473 /* IPv6 specs say nothing about it, but it is clear that we cannot
474 send redirects to source routed frames.
475 We don't send redirects to frames decapsulated from IPsec.
477 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
478 !skb_sec_path(skb)) {
479 struct in6_addr *target = NULL;
480 struct rt6_info *rt;
481 struct neighbour *n = dst->neighbour;
484 * incoming and outgoing devices are the same
485 * send a redirect.
488 rt = (struct rt6_info *) dst;
489 if ((rt->rt6i_flags & RTF_GATEWAY))
490 target = (struct in6_addr*)&n->primary_key;
491 else
492 target = &hdr->daddr;
494 /* Limit redirects both by destination (here)
495 and by source (inside ndisc_send_redirect)
497 if (xrlim_allow(dst, 1*HZ))
498 ndisc_send_redirect(skb, n, target);
499 } else {
500 int addrtype = ipv6_addr_type(&hdr->saddr);
502 /* This check is security critical. */
503 if (addrtype == IPV6_ADDR_ANY ||
504 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
505 goto error;
506 if (addrtype & IPV6_ADDR_LINKLOCAL) {
507 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
508 ICMPV6_NOT_NEIGHBOUR, 0);
509 goto error;
513 mtu = dst_mtu(dst);
514 if (mtu < IPV6_MIN_MTU)
515 mtu = IPV6_MIN_MTU;
517 if (skb->len > mtu) {
518 /* Again, force OUTPUT device used as source address */
519 skb->dev = dst->dev;
520 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
521 IP6_INC_STATS_BH(net,
522 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
523 IP6_INC_STATS_BH(net,
524 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
525 kfree_skb(skb);
526 return -EMSGSIZE;
529 if (skb_cow(skb, dst->dev->hard_header_len)) {
530 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
531 goto drop;
534 hdr = ipv6_hdr(skb);
536 /* Mangling hops number delayed to point after skb COW */
538 hdr->hop_limit--;
540 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
541 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
542 ip6_forward_finish);
544 error:
545 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
546 drop:
547 kfree_skb(skb);
548 return -EINVAL;
551 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
553 to->pkt_type = from->pkt_type;
554 to->priority = from->priority;
555 to->protocol = from->protocol;
556 skb_dst_drop(to);
557 skb_dst_set(to, dst_clone(skb_dst(from)));
558 to->dev = from->dev;
559 to->mark = from->mark;
561 #ifdef CONFIG_NET_SCHED
562 to->tc_index = from->tc_index;
563 #endif
564 nf_copy(to, from);
565 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
566 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
567 to->nf_trace = from->nf_trace;
568 #endif
569 skb_copy_secmark(to, from);
572 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
574 u16 offset = sizeof(struct ipv6hdr);
575 struct ipv6_opt_hdr *exthdr =
576 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
577 unsigned int packet_len = skb->tail - skb->network_header;
578 int found_rhdr = 0;
579 *nexthdr = &ipv6_hdr(skb)->nexthdr;
581 while (offset + 1 <= packet_len) {
583 switch (**nexthdr) {
585 case NEXTHDR_HOP:
586 break;
587 case NEXTHDR_ROUTING:
588 found_rhdr = 1;
589 break;
590 case NEXTHDR_DEST:
591 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
592 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
593 break;
594 #endif
595 if (found_rhdr)
596 return offset;
597 break;
598 default :
599 return offset;
602 offset += ipv6_optlen(exthdr);
603 *nexthdr = &exthdr->nexthdr;
604 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
605 offset);
608 return offset;
611 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
613 struct sk_buff *frag;
614 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
615 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
616 struct ipv6hdr *tmp_hdr;
617 struct frag_hdr *fh;
618 unsigned int mtu, hlen, left, len;
619 __be32 frag_id = 0;
620 int ptr, offset = 0, err=0;
621 u8 *prevhdr, nexthdr = 0;
622 struct net *net = dev_net(skb_dst(skb)->dev);
624 hlen = ip6_find_1stfragopt(skb, &prevhdr);
625 nexthdr = *prevhdr;
627 mtu = ip6_skb_dst_mtu(skb);
629 /* We must not fragment if the socket is set to force MTU discovery
630 * or if the skb it not generated by a local socket.
632 if (!skb->local_df) {
633 skb->dev = skb_dst(skb)->dev;
634 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
635 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
636 IPSTATS_MIB_FRAGFAILS);
637 kfree_skb(skb);
638 return -EMSGSIZE;
641 if (np && np->frag_size < mtu) {
642 if (np->frag_size)
643 mtu = np->frag_size;
645 mtu -= hlen + sizeof(struct frag_hdr);
647 if (skb_has_frags(skb)) {
648 int first_len = skb_pagelen(skb);
649 int truesizes = 0;
651 if (first_len - hlen > mtu ||
652 ((first_len - hlen) & 7) ||
653 skb_cloned(skb))
654 goto slow_path;
656 skb_walk_frags(skb, frag) {
657 /* Correct geometry. */
658 if (frag->len > mtu ||
659 ((frag->len & 7) && frag->next) ||
660 skb_headroom(frag) < hlen)
661 goto slow_path;
663 /* Partially cloned skb? */
664 if (skb_shared(frag))
665 goto slow_path;
667 BUG_ON(frag->sk);
668 if (skb->sk) {
669 frag->sk = skb->sk;
670 frag->destructor = sock_wfree;
671 truesizes += frag->truesize;
675 err = 0;
676 offset = 0;
677 frag = skb_shinfo(skb)->frag_list;
678 skb_frag_list_init(skb);
679 /* BUILD HEADER */
681 *prevhdr = NEXTHDR_FRAGMENT;
682 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
683 if (!tmp_hdr) {
684 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
685 IPSTATS_MIB_FRAGFAILS);
686 return -ENOMEM;
689 __skb_pull(skb, hlen);
690 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
691 __skb_push(skb, hlen);
692 skb_reset_network_header(skb);
693 memcpy(skb_network_header(skb), tmp_hdr, hlen);
695 ipv6_select_ident(fh);
696 fh->nexthdr = nexthdr;
697 fh->reserved = 0;
698 fh->frag_off = htons(IP6_MF);
699 frag_id = fh->identification;
701 first_len = skb_pagelen(skb);
702 skb->data_len = first_len - skb_headlen(skb);
703 skb->truesize -= truesizes;
704 skb->len = first_len;
705 ipv6_hdr(skb)->payload_len = htons(first_len -
706 sizeof(struct ipv6hdr));
708 dst_hold(&rt->u.dst);
710 for (;;) {
711 /* Prepare header of the next frame,
712 * before previous one went down. */
713 if (frag) {
714 frag->ip_summed = CHECKSUM_NONE;
715 skb_reset_transport_header(frag);
716 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
717 __skb_push(frag, hlen);
718 skb_reset_network_header(frag);
719 memcpy(skb_network_header(frag), tmp_hdr,
720 hlen);
721 offset += skb->len - hlen - sizeof(struct frag_hdr);
722 fh->nexthdr = nexthdr;
723 fh->reserved = 0;
724 fh->frag_off = htons(offset);
725 if (frag->next != NULL)
726 fh->frag_off |= htons(IP6_MF);
727 fh->identification = frag_id;
728 ipv6_hdr(frag)->payload_len =
729 htons(frag->len -
730 sizeof(struct ipv6hdr));
731 ip6_copy_metadata(frag, skb);
734 err = output(skb);
735 if(!err)
736 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
737 IPSTATS_MIB_FRAGCREATES);
739 if (err || !frag)
740 break;
742 skb = frag;
743 frag = skb->next;
744 skb->next = NULL;
747 kfree(tmp_hdr);
749 if (err == 0) {
750 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
751 IPSTATS_MIB_FRAGOKS);
752 dst_release(&rt->u.dst);
753 return 0;
756 while (frag) {
757 skb = frag->next;
758 kfree_skb(frag);
759 frag = skb;
762 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
763 IPSTATS_MIB_FRAGFAILS);
764 dst_release(&rt->u.dst);
765 return err;
768 slow_path:
769 left = skb->len - hlen; /* Space per frame */
770 ptr = hlen; /* Where to start from */
773 * Fragment the datagram.
776 *prevhdr = NEXTHDR_FRAGMENT;
779 * Keep copying data until we run out.
781 while(left > 0) {
782 len = left;
783 /* IF: it doesn't fit, use 'mtu' - the data space left */
784 if (len > mtu)
785 len = mtu;
786 /* IF: we are not sending upto and including the packet end
787 then align the next start on an eight byte boundary */
788 if (len < left) {
789 len &= ~7;
792 * Allocate buffer.
795 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
796 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
797 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
798 IPSTATS_MIB_FRAGFAILS);
799 err = -ENOMEM;
800 goto fail;
804 * Set up data on packet
807 ip6_copy_metadata(frag, skb);
808 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
809 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
810 skb_reset_network_header(frag);
811 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
812 frag->transport_header = (frag->network_header + hlen +
813 sizeof(struct frag_hdr));
816 * Charge the memory for the fragment to any owner
817 * it might possess
819 if (skb->sk)
820 skb_set_owner_w(frag, skb->sk);
823 * Copy the packet header into the new buffer.
825 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
828 * Build fragment header.
830 fh->nexthdr = nexthdr;
831 fh->reserved = 0;
832 if (!frag_id) {
833 ipv6_select_ident(fh);
834 frag_id = fh->identification;
835 } else
836 fh->identification = frag_id;
839 * Copy a block of the IP datagram.
841 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
842 BUG();
843 left -= len;
845 fh->frag_off = htons(offset);
846 if (left > 0)
847 fh->frag_off |= htons(IP6_MF);
848 ipv6_hdr(frag)->payload_len = htons(frag->len -
849 sizeof(struct ipv6hdr));
851 ptr += len;
852 offset += len;
855 * Put this fragment into the sending queue.
857 err = output(frag);
858 if (err)
859 goto fail;
861 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
862 IPSTATS_MIB_FRAGCREATES);
864 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
865 IPSTATS_MIB_FRAGOKS);
866 kfree_skb(skb);
867 return err;
869 fail:
870 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
871 IPSTATS_MIB_FRAGFAILS);
872 kfree_skb(skb);
873 return err;
876 static inline int ip6_rt_check(struct rt6key *rt_key,
877 struct in6_addr *fl_addr,
878 struct in6_addr *addr_cache)
880 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
881 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
884 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
885 struct dst_entry *dst,
886 struct flowi *fl)
888 struct ipv6_pinfo *np = inet6_sk(sk);
889 struct rt6_info *rt = (struct rt6_info *)dst;
891 if (!dst)
892 goto out;
894 /* Yes, checking route validity in not connected
895 * case is not very simple. Take into account,
896 * that we do not support routing by source, TOS,
897 * and MSG_DONTROUTE --ANK (980726)
899 * 1. ip6_rt_check(): If route was host route,
900 * check that cached destination is current.
901 * If it is network route, we still may
902 * check its validity using saved pointer
903 * to the last used address: daddr_cache.
904 * We do not want to save whole address now,
905 * (because main consumer of this service
906 * is tcp, which has not this problem),
907 * so that the last trick works only on connected
908 * sockets.
909 * 2. oif also should be the same.
911 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
912 #ifdef CONFIG_IPV6_SUBTREES
913 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
914 #endif
915 (fl->oif && fl->oif != dst->dev->ifindex)) {
916 dst_release(dst);
917 dst = NULL;
920 out:
921 return dst;
924 static int ip6_dst_lookup_tail(struct sock *sk,
925 struct dst_entry **dst, struct flowi *fl)
927 int err;
928 struct net *net = sock_net(sk);
930 if (*dst == NULL)
931 *dst = ip6_route_output(net, sk, fl);
933 if ((err = (*dst)->error))
934 goto out_err_release;
936 if (ipv6_addr_any(&fl->fl6_src)) {
937 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
938 &fl->fl6_dst,
939 sk ? inet6_sk(sk)->srcprefs : 0,
940 &fl->fl6_src);
941 if (err)
942 goto out_err_release;
945 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
947 * Here if the dst entry we've looked up
948 * has a neighbour entry that is in the INCOMPLETE
949 * state and the src address from the flow is
950 * marked as OPTIMISTIC, we release the found
951 * dst entry and replace it instead with the
952 * dst entry of the nexthop router
954 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
955 struct inet6_ifaddr *ifp;
956 struct flowi fl_gw;
957 int redirect;
959 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
960 (*dst)->dev, 1);
962 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
963 if (ifp)
964 in6_ifa_put(ifp);
966 if (redirect) {
968 * We need to get the dst entry for the
969 * default router instead
971 dst_release(*dst);
972 memcpy(&fl_gw, fl, sizeof(struct flowi));
973 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
974 *dst = ip6_route_output(net, sk, &fl_gw);
975 if ((err = (*dst)->error))
976 goto out_err_release;
979 #endif
981 return 0;
983 out_err_release:
984 if (err == -ENETUNREACH)
985 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
986 dst_release(*dst);
987 *dst = NULL;
988 return err;
992 * ip6_dst_lookup - perform route lookup on flow
993 * @sk: socket which provides route info
994 * @dst: pointer to dst_entry * for result
995 * @fl: flow to lookup
997 * This function performs a route lookup on the given flow.
999 * It returns zero on success, or a standard errno code on error.
1001 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1003 *dst = NULL;
1004 return ip6_dst_lookup_tail(sk, dst, fl);
1006 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1009 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1010 * @sk: socket which provides the dst cache and route info
1011 * @dst: pointer to dst_entry * for result
1012 * @fl: flow to lookup
1014 * This function performs a route lookup on the given flow with the
1015 * possibility of using the cached route in the socket if it is valid.
1016 * It will take the socket dst lock when operating on the dst cache.
1017 * As a result, this function can only be used in process context.
1019 * It returns zero on success, or a standard errno code on error.
1021 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1023 *dst = NULL;
1024 if (sk) {
1025 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1026 *dst = ip6_sk_dst_check(sk, *dst, fl);
1029 return ip6_dst_lookup_tail(sk, dst, fl);
1031 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1033 static inline int ip6_ufo_append_data(struct sock *sk,
1034 int getfrag(void *from, char *to, int offset, int len,
1035 int odd, struct sk_buff *skb),
1036 void *from, int length, int hh_len, int fragheaderlen,
1037 int transhdrlen, int mtu,unsigned int flags)
1040 struct sk_buff *skb;
1041 int err;
1043 /* There is support for UDP large send offload by network
1044 * device, so create one single skb packet containing complete
1045 * udp datagram
1047 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1048 skb = sock_alloc_send_skb(sk,
1049 hh_len + fragheaderlen + transhdrlen + 20,
1050 (flags & MSG_DONTWAIT), &err);
1051 if (skb == NULL)
1052 return -ENOMEM;
1054 /* reserve space for Hardware header */
1055 skb_reserve(skb, hh_len);
1057 /* create space for UDP/IP header */
1058 skb_put(skb,fragheaderlen + transhdrlen);
1060 /* initialize network header pointer */
1061 skb_reset_network_header(skb);
1063 /* initialize protocol header pointer */
1064 skb->transport_header = skb->network_header + fragheaderlen;
1066 skb->ip_summed = CHECKSUM_PARTIAL;
1067 skb->csum = 0;
1068 sk->sk_sndmsg_off = 0;
1071 err = skb_append_datato_frags(sk,skb, getfrag, from,
1072 (length - transhdrlen));
1073 if (!err) {
1074 struct frag_hdr fhdr;
1076 /* Specify the length of each IPv6 datagram fragment.
1077 * It has to be a multiple of 8.
1079 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1080 sizeof(struct frag_hdr)) & ~7;
1081 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1082 ipv6_select_ident(&fhdr);
1083 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1084 __skb_queue_tail(&sk->sk_write_queue, skb);
1086 return 0;
1088 /* There is not enough support do UPD LSO,
1089 * so follow normal path
1091 kfree_skb(skb);
1093 return err;
1096 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1097 gfp_t gfp)
1099 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1102 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1103 gfp_t gfp)
1105 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1108 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1109 int offset, int len, int odd, struct sk_buff *skb),
1110 void *from, int length, int transhdrlen,
1111 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1112 struct rt6_info *rt, unsigned int flags)
1114 struct inet_sock *inet = inet_sk(sk);
1115 struct ipv6_pinfo *np = inet6_sk(sk);
1116 struct sk_buff *skb;
1117 unsigned int maxfraglen, fragheaderlen;
1118 int exthdrlen;
1119 int hh_len;
1120 int mtu;
1121 int copy;
1122 int err;
1123 int offset = 0;
1124 int csummode = CHECKSUM_NONE;
1126 if (flags&MSG_PROBE)
1127 return 0;
1128 if (skb_queue_empty(&sk->sk_write_queue)) {
1130 * setup for corking
1132 if (opt) {
1133 if (WARN_ON(np->cork.opt))
1134 return -EINVAL;
1136 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1137 if (unlikely(np->cork.opt == NULL))
1138 return -ENOBUFS;
1140 np->cork.opt->tot_len = opt->tot_len;
1141 np->cork.opt->opt_flen = opt->opt_flen;
1142 np->cork.opt->opt_nflen = opt->opt_nflen;
1144 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1145 sk->sk_allocation);
1146 if (opt->dst0opt && !np->cork.opt->dst0opt)
1147 return -ENOBUFS;
1149 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1150 sk->sk_allocation);
1151 if (opt->dst1opt && !np->cork.opt->dst1opt)
1152 return -ENOBUFS;
1154 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1155 sk->sk_allocation);
1156 if (opt->hopopt && !np->cork.opt->hopopt)
1157 return -ENOBUFS;
1159 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1160 sk->sk_allocation);
1161 if (opt->srcrt && !np->cork.opt->srcrt)
1162 return -ENOBUFS;
1164 /* need source address above miyazawa*/
1166 dst_hold(&rt->u.dst);
1167 inet->cork.dst = &rt->u.dst;
1168 inet->cork.fl = *fl;
1169 np->cork.hop_limit = hlimit;
1170 np->cork.tclass = tclass;
1171 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1172 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1173 if (np->frag_size < mtu) {
1174 if (np->frag_size)
1175 mtu = np->frag_size;
1177 inet->cork.fragsize = mtu;
1178 if (dst_allfrag(rt->u.dst.path))
1179 inet->cork.flags |= IPCORK_ALLFRAG;
1180 inet->cork.length = 0;
1181 sk->sk_sndmsg_page = NULL;
1182 sk->sk_sndmsg_off = 0;
1183 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1184 rt->rt6i_nfheader_len;
1185 length += exthdrlen;
1186 transhdrlen += exthdrlen;
1187 } else {
1188 rt = (struct rt6_info *)inet->cork.dst;
1189 fl = &inet->cork.fl;
1190 opt = np->cork.opt;
1191 transhdrlen = 0;
1192 exthdrlen = 0;
1193 mtu = inet->cork.fragsize;
1196 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1198 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1199 (opt ? opt->opt_nflen : 0);
1200 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1202 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1203 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1204 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1205 return -EMSGSIZE;
1210 * Let's try using as much space as possible.
1211 * Use MTU if total length of the message fits into the MTU.
1212 * Otherwise, we need to reserve fragment header and
1213 * fragment alignment (= 8-15 octects, in total).
1215 * Note that we may need to "move" the data from the tail of
1216 * of the buffer to the new fragment when we split
1217 * the message.
1219 * FIXME: It may be fragmented into multiple chunks
1220 * at once if non-fragmentable extension headers
1221 * are too large.
1222 * --yoshfuji
1225 inet->cork.length += length;
1226 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1227 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1229 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1230 fragheaderlen, transhdrlen, mtu,
1231 flags);
1232 if (err)
1233 goto error;
1234 return 0;
1237 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1238 goto alloc_new_skb;
1240 while (length > 0) {
1241 /* Check if the remaining data fits into current packet. */
1242 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1243 if (copy < length)
1244 copy = maxfraglen - skb->len;
1246 if (copy <= 0) {
1247 char *data;
1248 unsigned int datalen;
1249 unsigned int fraglen;
1250 unsigned int fraggap;
1251 unsigned int alloclen;
1252 struct sk_buff *skb_prev;
1253 alloc_new_skb:
1254 skb_prev = skb;
1256 /* There's no room in the current skb */
1257 if (skb_prev)
1258 fraggap = skb_prev->len - maxfraglen;
1259 else
1260 fraggap = 0;
1263 * If remaining data exceeds the mtu,
1264 * we know we need more fragment(s).
1266 datalen = length + fraggap;
1267 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1268 datalen = maxfraglen - fragheaderlen;
1270 fraglen = datalen + fragheaderlen;
1271 if ((flags & MSG_MORE) &&
1272 !(rt->u.dst.dev->features&NETIF_F_SG))
1273 alloclen = mtu;
1274 else
1275 alloclen = datalen + fragheaderlen;
1278 * The last fragment gets additional space at tail.
1279 * Note: we overallocate on fragments with MSG_MODE
1280 * because we have no idea if we're the last one.
1282 if (datalen == length + fraggap)
1283 alloclen += rt->u.dst.trailer_len;
1286 * We just reserve space for fragment header.
1287 * Note: this may be overallocation if the message
1288 * (without MSG_MORE) fits into the MTU.
1290 alloclen += sizeof(struct frag_hdr);
1292 if (transhdrlen) {
1293 skb = sock_alloc_send_skb(sk,
1294 alloclen + hh_len,
1295 (flags & MSG_DONTWAIT), &err);
1296 } else {
1297 skb = NULL;
1298 if (atomic_read(&sk->sk_wmem_alloc) <=
1299 2 * sk->sk_sndbuf)
1300 skb = sock_wmalloc(sk,
1301 alloclen + hh_len, 1,
1302 sk->sk_allocation);
1303 if (unlikely(skb == NULL))
1304 err = -ENOBUFS;
1306 if (skb == NULL)
1307 goto error;
1309 * Fill in the control structures
1311 skb->ip_summed = csummode;
1312 skb->csum = 0;
1313 /* reserve for fragmentation */
1314 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1317 * Find where to start putting bytes
1319 data = skb_put(skb, fraglen);
1320 skb_set_network_header(skb, exthdrlen);
1321 data += fragheaderlen;
1322 skb->transport_header = (skb->network_header +
1323 fragheaderlen);
1324 if (fraggap) {
1325 skb->csum = skb_copy_and_csum_bits(
1326 skb_prev, maxfraglen,
1327 data + transhdrlen, fraggap, 0);
1328 skb_prev->csum = csum_sub(skb_prev->csum,
1329 skb->csum);
1330 data += fraggap;
1331 pskb_trim_unique(skb_prev, maxfraglen);
1333 copy = datalen - transhdrlen - fraggap;
1334 if (copy < 0) {
1335 err = -EINVAL;
1336 kfree_skb(skb);
1337 goto error;
1338 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1339 err = -EFAULT;
1340 kfree_skb(skb);
1341 goto error;
1344 offset += copy;
1345 length -= datalen - fraggap;
1346 transhdrlen = 0;
1347 exthdrlen = 0;
1348 csummode = CHECKSUM_NONE;
1351 * Put the packet on the pending queue
1353 __skb_queue_tail(&sk->sk_write_queue, skb);
1354 continue;
1357 if (copy > length)
1358 copy = length;
1360 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1361 unsigned int off;
1363 off = skb->len;
1364 if (getfrag(from, skb_put(skb, copy),
1365 offset, copy, off, skb) < 0) {
1366 __skb_trim(skb, off);
1367 err = -EFAULT;
1368 goto error;
1370 } else {
1371 int i = skb_shinfo(skb)->nr_frags;
1372 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1373 struct page *page = sk->sk_sndmsg_page;
1374 int off = sk->sk_sndmsg_off;
1375 unsigned int left;
1377 if (page && (left = PAGE_SIZE - off) > 0) {
1378 if (copy >= left)
1379 copy = left;
1380 if (page != frag->page) {
1381 if (i == MAX_SKB_FRAGS) {
1382 err = -EMSGSIZE;
1383 goto error;
1385 get_page(page);
1386 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1387 frag = &skb_shinfo(skb)->frags[i];
1389 } else if(i < MAX_SKB_FRAGS) {
1390 if (copy > PAGE_SIZE)
1391 copy = PAGE_SIZE;
1392 page = alloc_pages(sk->sk_allocation, 0);
1393 if (page == NULL) {
1394 err = -ENOMEM;
1395 goto error;
1397 sk->sk_sndmsg_page = page;
1398 sk->sk_sndmsg_off = 0;
1400 skb_fill_page_desc(skb, i, page, 0, 0);
1401 frag = &skb_shinfo(skb)->frags[i];
1402 } else {
1403 err = -EMSGSIZE;
1404 goto error;
1406 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1407 err = -EFAULT;
1408 goto error;
1410 sk->sk_sndmsg_off += copy;
1411 frag->size += copy;
1412 skb->len += copy;
1413 skb->data_len += copy;
1414 skb->truesize += copy;
1415 atomic_add(copy, &sk->sk_wmem_alloc);
1417 offset += copy;
1418 length -= copy;
1420 return 0;
1421 error:
1422 inet->cork.length -= length;
1423 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1424 return err;
1427 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1429 if (np->cork.opt) {
1430 kfree(np->cork.opt->dst0opt);
1431 kfree(np->cork.opt->dst1opt);
1432 kfree(np->cork.opt->hopopt);
1433 kfree(np->cork.opt->srcrt);
1434 kfree(np->cork.opt);
1435 np->cork.opt = NULL;
1438 if (inet->cork.dst) {
1439 dst_release(inet->cork.dst);
1440 inet->cork.dst = NULL;
1441 inet->cork.flags &= ~IPCORK_ALLFRAG;
1443 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1446 int ip6_push_pending_frames(struct sock *sk)
1448 struct sk_buff *skb, *tmp_skb;
1449 struct sk_buff **tail_skb;
1450 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1451 struct inet_sock *inet = inet_sk(sk);
1452 struct ipv6_pinfo *np = inet6_sk(sk);
1453 struct net *net = sock_net(sk);
1454 struct ipv6hdr *hdr;
1455 struct ipv6_txoptions *opt = np->cork.opt;
1456 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1457 struct flowi *fl = &inet->cork.fl;
1458 unsigned char proto = fl->proto;
1459 int err = 0;
1461 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1462 goto out;
1463 tail_skb = &(skb_shinfo(skb)->frag_list);
1465 /* move skb->data to ip header from ext header */
1466 if (skb->data < skb_network_header(skb))
1467 __skb_pull(skb, skb_network_offset(skb));
1468 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1469 __skb_pull(tmp_skb, skb_network_header_len(skb));
1470 *tail_skb = tmp_skb;
1471 tail_skb = &(tmp_skb->next);
1472 skb->len += tmp_skb->len;
1473 skb->data_len += tmp_skb->len;
1474 skb->truesize += tmp_skb->truesize;
1475 tmp_skb->destructor = NULL;
1476 tmp_skb->sk = NULL;
1479 /* Allow local fragmentation. */
1480 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1481 skb->local_df = 1;
1483 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1484 __skb_pull(skb, skb_network_header_len(skb));
1485 if (opt && opt->opt_flen)
1486 ipv6_push_frag_opts(skb, opt, &proto);
1487 if (opt && opt->opt_nflen)
1488 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1490 skb_push(skb, sizeof(struct ipv6hdr));
1491 skb_reset_network_header(skb);
1492 hdr = ipv6_hdr(skb);
1494 *(__be32*)hdr = fl->fl6_flowlabel |
1495 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1497 hdr->hop_limit = np->cork.hop_limit;
1498 hdr->nexthdr = proto;
1499 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1500 ipv6_addr_copy(&hdr->daddr, final_dst);
1502 skb->priority = sk->sk_priority;
1503 skb->mark = sk->sk_mark;
1505 skb_dst_set(skb, dst_clone(&rt->u.dst));
1506 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1507 if (proto == IPPROTO_ICMPV6) {
1508 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1510 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1511 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1514 err = ip6_local_out(skb);
1515 if (err) {
1516 if (err > 0)
1517 err = net_xmit_errno(err);
1518 if (err)
1519 goto error;
1522 out:
1523 ip6_cork_release(inet, np);
1524 return err;
1525 error:
1526 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1527 goto out;
1530 void ip6_flush_pending_frames(struct sock *sk)
1532 struct sk_buff *skb;
1534 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1535 if (skb_dst(skb))
1536 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1537 IPSTATS_MIB_OUTDISCARDS);
1538 kfree_skb(skb);
1541 ip6_cork_release(inet_sk(sk), inet6_sk(sk));