x86, mce, AMD: Fix leaving freed data in a list
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / ip6_output.c
blobb4c050ac08f5e0fc4e8c770e4eb844e99d0c0839
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
44 #include <net/sock.h>
45 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 int __ip6_local_out(struct sk_buff *skb)
62 int len;
64 len = skb->len - sizeof(struct ipv6hdr);
65 if (len > IPV6_MAXPLEN)
66 len = 0;
67 ipv6_hdr(skb)->payload_len = htons(len);
69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
70 dst_output);
73 int ip6_local_out(struct sk_buff *skb)
75 int err;
77 err = __ip6_local_out(skb);
78 if (likely(err == 1))
79 err = dst_output(skb);
81 return err;
83 EXPORT_SYMBOL_GPL(ip6_local_out);
85 static int ip6_output_finish(struct sk_buff *skb)
87 struct dst_entry *dst = skb_dst(skb);
89 if (dst->hh)
90 return neigh_hh_output(dst->hh, skb);
91 else if (dst->neighbour)
92 return dst->neighbour->output(skb);
94 IP6_INC_STATS_BH(dev_net(dst->dev),
95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
96 kfree_skb(skb);
97 return -EINVAL;
101 /* dev_loopback_xmit for use with netfilter. */
102 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
104 skb_reset_mac_header(newskb);
105 __skb_pull(newskb, skb_network_offset(newskb));
106 newskb->pkt_type = PACKET_LOOPBACK;
107 newskb->ip_summed = CHECKSUM_UNNECESSARY;
108 WARN_ON(!skb_dst(newskb));
110 netif_rx(newskb);
111 return 0;
115 static int ip6_output2(struct sk_buff *skb)
117 struct dst_entry *dst = skb_dst(skb);
118 struct net_device *dev = dst->dev;
120 skb->protocol = htons(ETH_P_IPV6);
121 skb->dev = dev;
123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
124 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
126 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
127 ((mroute6_socket(dev_net(dev)) &&
128 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
129 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
130 &ipv6_hdr(skb)->saddr))) {
131 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
133 /* Do not check for IFF_ALLMULTI; multicast routing
134 is not supported in any case.
136 if (newskb)
137 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
138 NULL, newskb->dev,
139 ip6_dev_loopback_xmit);
141 if (ipv6_hdr(skb)->hop_limit == 0) {
142 IP6_INC_STATS(dev_net(dev), idev,
143 IPSTATS_MIB_OUTDISCARDS);
144 kfree_skb(skb);
145 return 0;
149 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
150 skb->len);
153 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
154 ip6_output_finish);
157 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
159 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
161 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
162 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
165 int ip6_output(struct sk_buff *skb)
167 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
168 if (unlikely(idev->cnf.disable_ipv6)) {
169 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
170 IPSTATS_MIB_OUTDISCARDS);
171 kfree_skb(skb);
172 return 0;
175 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
176 dst_allfrag(skb_dst(skb)))
177 return ip6_fragment(skb, ip6_output2);
178 else
179 return ip6_output2(skb);
183 * xmit an sk_buff (used by TCP)
186 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
187 struct ipv6_txoptions *opt, int ipfragok)
189 struct net *net = sock_net(sk);
190 struct ipv6_pinfo *np = inet6_sk(sk);
191 struct in6_addr *first_hop = &fl->fl6_dst;
192 struct dst_entry *dst = skb_dst(skb);
193 struct ipv6hdr *hdr;
194 u8 proto = fl->proto;
195 int seg_len = skb->len;
196 int hlimit = -1;
197 int tclass = 0;
198 u32 mtu;
200 if (opt) {
201 unsigned int head_room;
203 /* First: exthdrs may take lots of space (~8K for now)
204 MAX_HEADER is not enough.
206 head_room = opt->opt_nflen + opt->opt_flen;
207 seg_len += head_room;
208 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210 if (skb_headroom(skb) < head_room) {
211 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
212 if (skb2 == NULL) {
213 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
214 IPSTATS_MIB_OUTDISCARDS);
215 kfree_skb(skb);
216 return -ENOBUFS;
218 kfree_skb(skb);
219 skb = skb2;
220 if (sk)
221 skb_set_owner_w(skb, sk);
223 if (opt->opt_flen)
224 ipv6_push_frag_opts(skb, opt, &proto);
225 if (opt->opt_nflen)
226 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
229 skb_push(skb, sizeof(struct ipv6hdr));
230 skb_reset_network_header(skb);
231 hdr = ipv6_hdr(skb);
233 /* Allow local fragmentation. */
234 if (ipfragok)
235 skb->local_df = 1;
238 * Fill in the IPv6 header
240 if (np) {
241 tclass = np->tclass;
242 hlimit = np->hop_limit;
244 if (hlimit < 0)
245 hlimit = ip6_dst_hoplimit(dst);
247 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
249 hdr->payload_len = htons(seg_len);
250 hdr->nexthdr = proto;
251 hdr->hop_limit = hlimit;
253 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
254 ipv6_addr_copy(&hdr->daddr, first_hop);
256 skb->priority = sk->sk_priority;
257 skb->mark = sk->sk_mark;
259 mtu = dst_mtu(dst);
260 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
261 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
262 IPSTATS_MIB_OUT, skb->len);
263 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
264 dst_output);
267 if (net_ratelimit())
268 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
269 skb->dev = dst->dev;
270 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
271 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
272 kfree_skb(skb);
273 return -EMSGSIZE;
276 EXPORT_SYMBOL(ip6_xmit);
279 * To avoid extra problems ND packets are send through this
280 * routine. It's code duplication but I really want to avoid
281 * extra checks since ipv6_build_header is used by TCP (which
282 * is for us performance critical)
285 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
286 const struct in6_addr *saddr, const struct in6_addr *daddr,
287 int proto, int len)
289 struct ipv6_pinfo *np = inet6_sk(sk);
290 struct ipv6hdr *hdr;
291 int totlen;
293 skb->protocol = htons(ETH_P_IPV6);
294 skb->dev = dev;
296 totlen = len + sizeof(struct ipv6hdr);
298 skb_reset_network_header(skb);
299 skb_put(skb, sizeof(struct ipv6hdr));
300 hdr = ipv6_hdr(skb);
302 *(__be32*)hdr = htonl(0x60000000);
304 hdr->payload_len = htons(len);
305 hdr->nexthdr = proto;
306 hdr->hop_limit = np->hop_limit;
308 ipv6_addr_copy(&hdr->saddr, saddr);
309 ipv6_addr_copy(&hdr->daddr, daddr);
311 return 0;
314 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316 struct ip6_ra_chain *ra;
317 struct sock *last = NULL;
319 read_lock(&ip6_ra_lock);
320 for (ra = ip6_ra_chain; ra; ra = ra->next) {
321 struct sock *sk = ra->sk;
322 if (sk && ra->sel == sel &&
323 (!sk->sk_bound_dev_if ||
324 sk->sk_bound_dev_if == skb->dev->ifindex)) {
325 if (last) {
326 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
327 if (skb2)
328 rawv6_rcv(last, skb2);
330 last = sk;
334 if (last) {
335 rawv6_rcv(last, skb);
336 read_unlock(&ip6_ra_lock);
337 return 1;
339 read_unlock(&ip6_ra_lock);
340 return 0;
343 static int ip6_forward_proxy_check(struct sk_buff *skb)
345 struct ipv6hdr *hdr = ipv6_hdr(skb);
346 u8 nexthdr = hdr->nexthdr;
347 int offset;
349 if (ipv6_ext_hdr(nexthdr)) {
350 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
351 if (offset < 0)
352 return 0;
353 } else
354 offset = sizeof(struct ipv6hdr);
356 if (nexthdr == IPPROTO_ICMPV6) {
357 struct icmp6hdr *icmp6;
359 if (!pskb_may_pull(skb, (skb_network_header(skb) +
360 offset + 1 - skb->data)))
361 return 0;
363 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
365 switch (icmp6->icmp6_type) {
366 case NDISC_ROUTER_SOLICITATION:
367 case NDISC_ROUTER_ADVERTISEMENT:
368 case NDISC_NEIGHBOUR_SOLICITATION:
369 case NDISC_NEIGHBOUR_ADVERTISEMENT:
370 case NDISC_REDIRECT:
371 /* For reaction involving unicast neighbor discovery
372 * message destined to the proxied address, pass it to
373 * input function.
375 return 1;
376 default:
377 break;
382 * The proxying router can't forward traffic sent to a link-local
383 * address, so signal the sender and discard the packet. This
384 * behavior is clarified by the MIPv6 specification.
386 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
387 dst_link_failure(skb);
388 return -1;
391 return 0;
394 static inline int ip6_forward_finish(struct sk_buff *skb)
396 return dst_output(skb);
399 int ip6_forward(struct sk_buff *skb)
401 struct dst_entry *dst = skb_dst(skb);
402 struct ipv6hdr *hdr = ipv6_hdr(skb);
403 struct inet6_skb_parm *opt = IP6CB(skb);
404 struct net *net = dev_net(dst->dev);
406 if (net->ipv6.devconf_all->forwarding == 0)
407 goto error;
409 if (skb_warn_if_lro(skb))
410 goto drop;
412 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
413 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
414 goto drop;
417 skb_forward_csum(skb);
420 * We DO NOT make any processing on
421 * RA packets, pushing them to user level AS IS
422 * without ane WARRANTY that application will be able
423 * to interpret them. The reason is that we
424 * cannot make anything clever here.
426 * We are not end-node, so that if packet contains
427 * AH/ESP, we cannot make anything.
428 * Defragmentation also would be mistake, RA packets
429 * cannot be fragmented, because there is no warranty
430 * that different fragments will go along one path. --ANK
432 if (opt->ra) {
433 u8 *ptr = skb_network_header(skb) + opt->ra;
434 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
435 return 0;
439 * check and decrement ttl
441 if (hdr->hop_limit <= 1) {
442 /* Force OUTPUT device used as source address */
443 skb->dev = dst->dev;
444 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
445 0, skb->dev);
446 IP6_INC_STATS_BH(net,
447 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
449 kfree_skb(skb);
450 return -ETIMEDOUT;
453 /* XXX: idev->cnf.proxy_ndp? */
454 if (net->ipv6.devconf_all->proxy_ndp &&
455 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
456 int proxied = ip6_forward_proxy_check(skb);
457 if (proxied > 0)
458 return ip6_input(skb);
459 else if (proxied < 0) {
460 IP6_INC_STATS(net, ip6_dst_idev(dst),
461 IPSTATS_MIB_INDISCARDS);
462 goto drop;
466 if (!xfrm6_route_forward(skb)) {
467 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
468 goto drop;
470 dst = skb_dst(skb);
472 /* IPv6 specs say nothing about it, but it is clear that we cannot
473 send redirects to source routed frames.
474 We don't send redirects to frames decapsulated from IPsec.
476 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
477 !skb_sec_path(skb)) {
478 struct in6_addr *target = NULL;
479 struct rt6_info *rt;
480 struct neighbour *n = dst->neighbour;
483 * incoming and outgoing devices are the same
484 * send a redirect.
487 rt = (struct rt6_info *) dst;
488 if ((rt->rt6i_flags & RTF_GATEWAY))
489 target = (struct in6_addr*)&n->primary_key;
490 else
491 target = &hdr->daddr;
493 /* Limit redirects both by destination (here)
494 and by source (inside ndisc_send_redirect)
496 if (xrlim_allow(dst, 1*HZ))
497 ndisc_send_redirect(skb, n, target);
498 } else {
499 int addrtype = ipv6_addr_type(&hdr->saddr);
501 /* This check is security critical. */
502 if (addrtype == IPV6_ADDR_ANY ||
503 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
504 goto error;
505 if (addrtype & IPV6_ADDR_LINKLOCAL) {
506 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
507 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
508 goto error;
512 if (skb->len > dst_mtu(dst)) {
513 /* Again, force OUTPUT device used as source address */
514 skb->dev = dst->dev;
515 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
516 IP6_INC_STATS_BH(net,
517 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
518 IP6_INC_STATS_BH(net,
519 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
520 kfree_skb(skb);
521 return -EMSGSIZE;
524 if (skb_cow(skb, dst->dev->hard_header_len)) {
525 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
526 goto drop;
529 hdr = ipv6_hdr(skb);
531 /* Mangling hops number delayed to point after skb COW */
533 hdr->hop_limit--;
535 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
536 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
537 ip6_forward_finish);
539 error:
540 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
541 drop:
542 kfree_skb(skb);
543 return -EINVAL;
546 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
548 to->pkt_type = from->pkt_type;
549 to->priority = from->priority;
550 to->protocol = from->protocol;
551 skb_dst_drop(to);
552 skb_dst_set(to, dst_clone(skb_dst(from)));
553 to->dev = from->dev;
554 to->mark = from->mark;
556 #ifdef CONFIG_NET_SCHED
557 to->tc_index = from->tc_index;
558 #endif
559 nf_copy(to, from);
560 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
561 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
562 to->nf_trace = from->nf_trace;
563 #endif
564 skb_copy_secmark(to, from);
567 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
569 u16 offset = sizeof(struct ipv6hdr);
570 struct ipv6_opt_hdr *exthdr =
571 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
572 unsigned int packet_len = skb->tail - skb->network_header;
573 int found_rhdr = 0;
574 *nexthdr = &ipv6_hdr(skb)->nexthdr;
576 while (offset + 1 <= packet_len) {
578 switch (**nexthdr) {
580 case NEXTHDR_HOP:
581 break;
582 case NEXTHDR_ROUTING:
583 found_rhdr = 1;
584 break;
585 case NEXTHDR_DEST:
586 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
587 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
588 break;
589 #endif
590 if (found_rhdr)
591 return offset;
592 break;
593 default :
594 return offset;
597 offset += ipv6_optlen(exthdr);
598 *nexthdr = &exthdr->nexthdr;
599 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
600 offset);
603 return offset;
606 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
608 struct sk_buff *frag;
609 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
610 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
611 struct ipv6hdr *tmp_hdr;
612 struct frag_hdr *fh;
613 unsigned int mtu, hlen, left, len;
614 __be32 frag_id = 0;
615 int ptr, offset = 0, err=0;
616 u8 *prevhdr, nexthdr = 0;
617 struct net *net = dev_net(skb_dst(skb)->dev);
619 hlen = ip6_find_1stfragopt(skb, &prevhdr);
620 nexthdr = *prevhdr;
622 mtu = ip6_skb_dst_mtu(skb);
624 /* We must not fragment if the socket is set to force MTU discovery
625 * or if the skb it not generated by a local socket. (This last
626 * check should be redundant, but it's free.)
628 if (!skb->local_df) {
629 skb->dev = skb_dst(skb)->dev;
630 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
631 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
632 IPSTATS_MIB_FRAGFAILS);
633 kfree_skb(skb);
634 return -EMSGSIZE;
637 if (np && np->frag_size < mtu) {
638 if (np->frag_size)
639 mtu = np->frag_size;
641 mtu -= hlen + sizeof(struct frag_hdr);
643 if (skb_has_frags(skb)) {
644 int first_len = skb_pagelen(skb);
645 struct sk_buff *frag2;
647 if (first_len - hlen > mtu ||
648 ((first_len - hlen) & 7) ||
649 skb_cloned(skb))
650 goto slow_path;
652 skb_walk_frags(skb, frag) {
653 /* Correct geometry. */
654 if (frag->len > mtu ||
655 ((frag->len & 7) && frag->next) ||
656 skb_headroom(frag) < hlen)
657 goto slow_path_clean;
659 /* Partially cloned skb? */
660 if (skb_shared(frag))
661 goto slow_path_clean;
663 BUG_ON(frag->sk);
664 if (skb->sk) {
665 frag->sk = skb->sk;
666 frag->destructor = sock_wfree;
668 skb->truesize -= frag->truesize;
671 err = 0;
672 offset = 0;
673 frag = skb_shinfo(skb)->frag_list;
674 skb_frag_list_init(skb);
675 /* BUILD HEADER */
677 *prevhdr = NEXTHDR_FRAGMENT;
678 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
679 if (!tmp_hdr) {
680 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
681 IPSTATS_MIB_FRAGFAILS);
682 return -ENOMEM;
685 __skb_pull(skb, hlen);
686 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
687 __skb_push(skb, hlen);
688 skb_reset_network_header(skb);
689 memcpy(skb_network_header(skb), tmp_hdr, hlen);
691 ipv6_select_ident(fh);
692 fh->nexthdr = nexthdr;
693 fh->reserved = 0;
694 fh->frag_off = htons(IP6_MF);
695 frag_id = fh->identification;
697 first_len = skb_pagelen(skb);
698 skb->data_len = first_len - skb_headlen(skb);
699 skb->len = first_len;
700 ipv6_hdr(skb)->payload_len = htons(first_len -
701 sizeof(struct ipv6hdr));
703 dst_hold(&rt->u.dst);
705 for (;;) {
706 /* Prepare header of the next frame,
707 * before previous one went down. */
708 if (frag) {
709 frag->ip_summed = CHECKSUM_NONE;
710 skb_reset_transport_header(frag);
711 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
712 __skb_push(frag, hlen);
713 skb_reset_network_header(frag);
714 memcpy(skb_network_header(frag), tmp_hdr,
715 hlen);
716 offset += skb->len - hlen - sizeof(struct frag_hdr);
717 fh->nexthdr = nexthdr;
718 fh->reserved = 0;
719 fh->frag_off = htons(offset);
720 if (frag->next != NULL)
721 fh->frag_off |= htons(IP6_MF);
722 fh->identification = frag_id;
723 ipv6_hdr(frag)->payload_len =
724 htons(frag->len -
725 sizeof(struct ipv6hdr));
726 ip6_copy_metadata(frag, skb);
729 err = output(skb);
730 if(!err)
731 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
732 IPSTATS_MIB_FRAGCREATES);
734 if (err || !frag)
735 break;
737 skb = frag;
738 frag = skb->next;
739 skb->next = NULL;
742 kfree(tmp_hdr);
744 if (err == 0) {
745 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
746 IPSTATS_MIB_FRAGOKS);
747 dst_release(&rt->u.dst);
748 return 0;
751 while (frag) {
752 skb = frag->next;
753 kfree_skb(frag);
754 frag = skb;
757 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
758 IPSTATS_MIB_FRAGFAILS);
759 dst_release(&rt->u.dst);
760 return err;
762 slow_path_clean:
763 skb_walk_frags(skb, frag2) {
764 if (frag2 == frag)
765 break;
766 frag2->sk = NULL;
767 frag2->destructor = NULL;
768 skb->truesize += frag2->truesize;
772 slow_path:
773 left = skb->len - hlen; /* Space per frame */
774 ptr = hlen; /* Where to start from */
777 * Fragment the datagram.
780 *prevhdr = NEXTHDR_FRAGMENT;
783 * Keep copying data until we run out.
785 while(left > 0) {
786 len = left;
787 /* IF: it doesn't fit, use 'mtu' - the data space left */
788 if (len > mtu)
789 len = mtu;
790 /* IF: we are not sending upto and including the packet end
791 then align the next start on an eight byte boundary */
792 if (len < left) {
793 len &= ~7;
796 * Allocate buffer.
799 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
800 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
801 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
802 IPSTATS_MIB_FRAGFAILS);
803 err = -ENOMEM;
804 goto fail;
808 * Set up data on packet
811 ip6_copy_metadata(frag, skb);
812 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
813 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
814 skb_reset_network_header(frag);
815 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
816 frag->transport_header = (frag->network_header + hlen +
817 sizeof(struct frag_hdr));
820 * Charge the memory for the fragment to any owner
821 * it might possess
823 if (skb->sk)
824 skb_set_owner_w(frag, skb->sk);
827 * Copy the packet header into the new buffer.
829 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
832 * Build fragment header.
834 fh->nexthdr = nexthdr;
835 fh->reserved = 0;
836 if (!frag_id) {
837 ipv6_select_ident(fh);
838 frag_id = fh->identification;
839 } else
840 fh->identification = frag_id;
843 * Copy a block of the IP datagram.
845 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
846 BUG();
847 left -= len;
849 fh->frag_off = htons(offset);
850 if (left > 0)
851 fh->frag_off |= htons(IP6_MF);
852 ipv6_hdr(frag)->payload_len = htons(frag->len -
853 sizeof(struct ipv6hdr));
855 ptr += len;
856 offset += len;
859 * Put this fragment into the sending queue.
861 err = output(frag);
862 if (err)
863 goto fail;
865 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
866 IPSTATS_MIB_FRAGCREATES);
868 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
869 IPSTATS_MIB_FRAGOKS);
870 kfree_skb(skb);
871 return err;
873 fail:
874 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
875 IPSTATS_MIB_FRAGFAILS);
876 kfree_skb(skb);
877 return err;
880 static inline int ip6_rt_check(struct rt6key *rt_key,
881 struct in6_addr *fl_addr,
882 struct in6_addr *addr_cache)
884 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
885 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
888 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
889 struct dst_entry *dst,
890 struct flowi *fl)
892 struct ipv6_pinfo *np = inet6_sk(sk);
893 struct rt6_info *rt = (struct rt6_info *)dst;
895 if (!dst)
896 goto out;
898 /* Yes, checking route validity in not connected
899 * case is not very simple. Take into account,
900 * that we do not support routing by source, TOS,
901 * and MSG_DONTROUTE --ANK (980726)
903 * 1. ip6_rt_check(): If route was host route,
904 * check that cached destination is current.
905 * If it is network route, we still may
906 * check its validity using saved pointer
907 * to the last used address: daddr_cache.
908 * We do not want to save whole address now,
909 * (because main consumer of this service
910 * is tcp, which has not this problem),
911 * so that the last trick works only on connected
912 * sockets.
913 * 2. oif also should be the same.
915 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
916 #ifdef CONFIG_IPV6_SUBTREES
917 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
918 #endif
919 (fl->oif && fl->oif != dst->dev->ifindex)) {
920 dst_release(dst);
921 dst = NULL;
924 out:
925 return dst;
928 static int ip6_dst_lookup_tail(struct sock *sk,
929 struct dst_entry **dst, struct flowi *fl)
931 int err;
932 struct net *net = sock_net(sk);
934 if (*dst == NULL)
935 *dst = ip6_route_output(net, sk, fl);
937 if ((err = (*dst)->error))
938 goto out_err_release;
940 if (ipv6_addr_any(&fl->fl6_src)) {
941 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
942 &fl->fl6_dst,
943 sk ? inet6_sk(sk)->srcprefs : 0,
944 &fl->fl6_src);
945 if (err)
946 goto out_err_release;
949 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
951 * Here if the dst entry we've looked up
952 * has a neighbour entry that is in the INCOMPLETE
953 * state and the src address from the flow is
954 * marked as OPTIMISTIC, we release the found
955 * dst entry and replace it instead with the
956 * dst entry of the nexthop router
958 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
959 struct inet6_ifaddr *ifp;
960 struct flowi fl_gw;
961 int redirect;
963 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
964 (*dst)->dev, 1);
966 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
967 if (ifp)
968 in6_ifa_put(ifp);
970 if (redirect) {
972 * We need to get the dst entry for the
973 * default router instead
975 dst_release(*dst);
976 memcpy(&fl_gw, fl, sizeof(struct flowi));
977 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
978 *dst = ip6_route_output(net, sk, &fl_gw);
979 if ((err = (*dst)->error))
980 goto out_err_release;
983 #endif
985 return 0;
987 out_err_release:
988 if (err == -ENETUNREACH)
989 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
990 dst_release(*dst);
991 *dst = NULL;
992 return err;
996 * ip6_dst_lookup - perform route lookup on flow
997 * @sk: socket which provides route info
998 * @dst: pointer to dst_entry * for result
999 * @fl: flow to lookup
1001 * This function performs a route lookup on the given flow.
1003 * It returns zero on success, or a standard errno code on error.
1005 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1007 *dst = NULL;
1008 return ip6_dst_lookup_tail(sk, dst, fl);
1010 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1013 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1014 * @sk: socket which provides the dst cache and route info
1015 * @dst: pointer to dst_entry * for result
1016 * @fl: flow to lookup
1018 * This function performs a route lookup on the given flow with the
1019 * possibility of using the cached route in the socket if it is valid.
1020 * It will take the socket dst lock when operating on the dst cache.
1021 * As a result, this function can only be used in process context.
1023 * It returns zero on success, or a standard errno code on error.
1025 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1027 *dst = NULL;
1028 if (sk) {
1029 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1030 *dst = ip6_sk_dst_check(sk, *dst, fl);
1033 return ip6_dst_lookup_tail(sk, dst, fl);
1035 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1037 static inline int ip6_ufo_append_data(struct sock *sk,
1038 int getfrag(void *from, char *to, int offset, int len,
1039 int odd, struct sk_buff *skb),
1040 void *from, int length, int hh_len, int fragheaderlen,
1041 int transhdrlen, int mtu,unsigned int flags)
1044 struct sk_buff *skb;
1045 int err;
1047 /* There is support for UDP large send offload by network
1048 * device, so create one single skb packet containing complete
1049 * udp datagram
1051 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1052 skb = sock_alloc_send_skb(sk,
1053 hh_len + fragheaderlen + transhdrlen + 20,
1054 (flags & MSG_DONTWAIT), &err);
1055 if (skb == NULL)
1056 return -ENOMEM;
1058 /* reserve space for Hardware header */
1059 skb_reserve(skb, hh_len);
1061 /* create space for UDP/IP header */
1062 skb_put(skb,fragheaderlen + transhdrlen);
1064 /* initialize network header pointer */
1065 skb_reset_network_header(skb);
1067 /* initialize protocol header pointer */
1068 skb->transport_header = skb->network_header + fragheaderlen;
1070 skb->ip_summed = CHECKSUM_PARTIAL;
1071 skb->csum = 0;
1072 sk->sk_sndmsg_off = 0;
1075 err = skb_append_datato_frags(sk,skb, getfrag, from,
1076 (length - transhdrlen));
1077 if (!err) {
1078 struct frag_hdr fhdr;
1080 /* Specify the length of each IPv6 datagram fragment.
1081 * It has to be a multiple of 8.
1083 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1084 sizeof(struct frag_hdr)) & ~7;
1085 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1086 ipv6_select_ident(&fhdr);
1087 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1088 __skb_queue_tail(&sk->sk_write_queue, skb);
1090 return 0;
1092 /* There is not enough support do UPD LSO,
1093 * so follow normal path
1095 kfree_skb(skb);
1097 return err;
1100 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1101 gfp_t gfp)
1103 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1106 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1107 gfp_t gfp)
1109 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1112 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1113 int offset, int len, int odd, struct sk_buff *skb),
1114 void *from, int length, int transhdrlen,
1115 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1116 struct rt6_info *rt, unsigned int flags)
1118 struct inet_sock *inet = inet_sk(sk);
1119 struct ipv6_pinfo *np = inet6_sk(sk);
1120 struct sk_buff *skb;
1121 unsigned int maxfraglen, fragheaderlen;
1122 int exthdrlen;
1123 int hh_len;
1124 int mtu;
1125 int copy;
1126 int err;
1127 int offset = 0;
1128 int csummode = CHECKSUM_NONE;
1130 if (flags&MSG_PROBE)
1131 return 0;
1132 if (skb_queue_empty(&sk->sk_write_queue)) {
1134 * setup for corking
1136 if (opt) {
1137 if (WARN_ON(np->cork.opt))
1138 return -EINVAL;
1140 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1141 if (unlikely(np->cork.opt == NULL))
1142 return -ENOBUFS;
1144 np->cork.opt->tot_len = opt->tot_len;
1145 np->cork.opt->opt_flen = opt->opt_flen;
1146 np->cork.opt->opt_nflen = opt->opt_nflen;
1148 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1149 sk->sk_allocation);
1150 if (opt->dst0opt && !np->cork.opt->dst0opt)
1151 return -ENOBUFS;
1153 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1154 sk->sk_allocation);
1155 if (opt->dst1opt && !np->cork.opt->dst1opt)
1156 return -ENOBUFS;
1158 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1159 sk->sk_allocation);
1160 if (opt->hopopt && !np->cork.opt->hopopt)
1161 return -ENOBUFS;
1163 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1164 sk->sk_allocation);
1165 if (opt->srcrt && !np->cork.opt->srcrt)
1166 return -ENOBUFS;
1168 /* need source address above miyazawa*/
1170 dst_hold(&rt->u.dst);
1171 inet->cork.dst = &rt->u.dst;
1172 inet->cork.fl = *fl;
1173 np->cork.hop_limit = hlimit;
1174 np->cork.tclass = tclass;
1175 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1176 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1177 if (np->frag_size < mtu) {
1178 if (np->frag_size)
1179 mtu = np->frag_size;
1181 inet->cork.fragsize = mtu;
1182 if (dst_allfrag(rt->u.dst.path))
1183 inet->cork.flags |= IPCORK_ALLFRAG;
1184 inet->cork.length = 0;
1185 sk->sk_sndmsg_page = NULL;
1186 sk->sk_sndmsg_off = 0;
1187 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1188 rt->rt6i_nfheader_len;
1189 length += exthdrlen;
1190 transhdrlen += exthdrlen;
1191 } else {
1192 rt = (struct rt6_info *)inet->cork.dst;
1193 fl = &inet->cork.fl;
1194 opt = np->cork.opt;
1195 transhdrlen = 0;
1196 exthdrlen = 0;
1197 mtu = inet->cork.fragsize;
1200 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1202 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1203 (opt ? opt->opt_nflen : 0);
1204 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1206 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1207 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1208 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1209 return -EMSGSIZE;
1214 * Let's try using as much space as possible.
1215 * Use MTU if total length of the message fits into the MTU.
1216 * Otherwise, we need to reserve fragment header and
1217 * fragment alignment (= 8-15 octects, in total).
1219 * Note that we may need to "move" the data from the tail of
1220 * of the buffer to the new fragment when we split
1221 * the message.
1223 * FIXME: It may be fragmented into multiple chunks
1224 * at once if non-fragmentable extension headers
1225 * are too large.
1226 * --yoshfuji
1229 inet->cork.length += length;
1230 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1231 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1233 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1234 fragheaderlen, transhdrlen, mtu,
1235 flags);
1236 if (err)
1237 goto error;
1238 return 0;
1241 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1242 goto alloc_new_skb;
1244 while (length > 0) {
1245 /* Check if the remaining data fits into current packet. */
1246 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1247 if (copy < length)
1248 copy = maxfraglen - skb->len;
1250 if (copy <= 0) {
1251 char *data;
1252 unsigned int datalen;
1253 unsigned int fraglen;
1254 unsigned int fraggap;
1255 unsigned int alloclen;
1256 struct sk_buff *skb_prev;
1257 alloc_new_skb:
1258 skb_prev = skb;
1260 /* There's no room in the current skb */
1261 if (skb_prev)
1262 fraggap = skb_prev->len - maxfraglen;
1263 else
1264 fraggap = 0;
1267 * If remaining data exceeds the mtu,
1268 * we know we need more fragment(s).
1270 datalen = length + fraggap;
1271 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1272 datalen = maxfraglen - fragheaderlen;
1274 fraglen = datalen + fragheaderlen;
1275 if ((flags & MSG_MORE) &&
1276 !(rt->u.dst.dev->features&NETIF_F_SG))
1277 alloclen = mtu;
1278 else
1279 alloclen = datalen + fragheaderlen;
1282 * The last fragment gets additional space at tail.
1283 * Note: we overallocate on fragments with MSG_MODE
1284 * because we have no idea if we're the last one.
1286 if (datalen == length + fraggap)
1287 alloclen += rt->u.dst.trailer_len;
1290 * We just reserve space for fragment header.
1291 * Note: this may be overallocation if the message
1292 * (without MSG_MORE) fits into the MTU.
1294 alloclen += sizeof(struct frag_hdr);
1296 if (transhdrlen) {
1297 skb = sock_alloc_send_skb(sk,
1298 alloclen + hh_len,
1299 (flags & MSG_DONTWAIT), &err);
1300 } else {
1301 skb = NULL;
1302 if (atomic_read(&sk->sk_wmem_alloc) <=
1303 2 * sk->sk_sndbuf)
1304 skb = sock_wmalloc(sk,
1305 alloclen + hh_len, 1,
1306 sk->sk_allocation);
1307 if (unlikely(skb == NULL))
1308 err = -ENOBUFS;
1310 if (skb == NULL)
1311 goto error;
1313 * Fill in the control structures
1315 skb->ip_summed = csummode;
1316 skb->csum = 0;
1317 /* reserve for fragmentation */
1318 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1321 * Find where to start putting bytes
1323 data = skb_put(skb, fraglen);
1324 skb_set_network_header(skb, exthdrlen);
1325 data += fragheaderlen;
1326 skb->transport_header = (skb->network_header +
1327 fragheaderlen);
1328 if (fraggap) {
1329 skb->csum = skb_copy_and_csum_bits(
1330 skb_prev, maxfraglen,
1331 data + transhdrlen, fraggap, 0);
1332 skb_prev->csum = csum_sub(skb_prev->csum,
1333 skb->csum);
1334 data += fraggap;
1335 pskb_trim_unique(skb_prev, maxfraglen);
1337 copy = datalen - transhdrlen - fraggap;
1338 if (copy < 0) {
1339 err = -EINVAL;
1340 kfree_skb(skb);
1341 goto error;
1342 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1343 err = -EFAULT;
1344 kfree_skb(skb);
1345 goto error;
1348 offset += copy;
1349 length -= datalen - fraggap;
1350 transhdrlen = 0;
1351 exthdrlen = 0;
1352 csummode = CHECKSUM_NONE;
1355 * Put the packet on the pending queue
1357 __skb_queue_tail(&sk->sk_write_queue, skb);
1358 continue;
1361 if (copy > length)
1362 copy = length;
1364 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1365 unsigned int off;
1367 off = skb->len;
1368 if (getfrag(from, skb_put(skb, copy),
1369 offset, copy, off, skb) < 0) {
1370 __skb_trim(skb, off);
1371 err = -EFAULT;
1372 goto error;
1374 } else {
1375 int i = skb_shinfo(skb)->nr_frags;
1376 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1377 struct page *page = sk->sk_sndmsg_page;
1378 int off = sk->sk_sndmsg_off;
1379 unsigned int left;
1381 if (page && (left = PAGE_SIZE - off) > 0) {
1382 if (copy >= left)
1383 copy = left;
1384 if (page != frag->page) {
1385 if (i == MAX_SKB_FRAGS) {
1386 err = -EMSGSIZE;
1387 goto error;
1389 get_page(page);
1390 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1391 frag = &skb_shinfo(skb)->frags[i];
1393 } else if(i < MAX_SKB_FRAGS) {
1394 if (copy > PAGE_SIZE)
1395 copy = PAGE_SIZE;
1396 page = alloc_pages(sk->sk_allocation, 0);
1397 if (page == NULL) {
1398 err = -ENOMEM;
1399 goto error;
1401 sk->sk_sndmsg_page = page;
1402 sk->sk_sndmsg_off = 0;
1404 skb_fill_page_desc(skb, i, page, 0, 0);
1405 frag = &skb_shinfo(skb)->frags[i];
1406 } else {
1407 err = -EMSGSIZE;
1408 goto error;
1410 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1411 err = -EFAULT;
1412 goto error;
1414 sk->sk_sndmsg_off += copy;
1415 frag->size += copy;
1416 skb->len += copy;
1417 skb->data_len += copy;
1418 skb->truesize += copy;
1419 atomic_add(copy, &sk->sk_wmem_alloc);
1421 offset += copy;
1422 length -= copy;
1424 return 0;
1425 error:
1426 inet->cork.length -= length;
1427 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1428 return err;
1431 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1433 if (np->cork.opt) {
1434 kfree(np->cork.opt->dst0opt);
1435 kfree(np->cork.opt->dst1opt);
1436 kfree(np->cork.opt->hopopt);
1437 kfree(np->cork.opt->srcrt);
1438 kfree(np->cork.opt);
1439 np->cork.opt = NULL;
1442 if (inet->cork.dst) {
1443 dst_release(inet->cork.dst);
1444 inet->cork.dst = NULL;
1445 inet->cork.flags &= ~IPCORK_ALLFRAG;
1447 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1450 int ip6_push_pending_frames(struct sock *sk)
1452 struct sk_buff *skb, *tmp_skb;
1453 struct sk_buff **tail_skb;
1454 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1455 struct inet_sock *inet = inet_sk(sk);
1456 struct ipv6_pinfo *np = inet6_sk(sk);
1457 struct net *net = sock_net(sk);
1458 struct ipv6hdr *hdr;
1459 struct ipv6_txoptions *opt = np->cork.opt;
1460 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1461 struct flowi *fl = &inet->cork.fl;
1462 unsigned char proto = fl->proto;
1463 int err = 0;
1465 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1466 goto out;
1467 tail_skb = &(skb_shinfo(skb)->frag_list);
1469 /* move skb->data to ip header from ext header */
1470 if (skb->data < skb_network_header(skb))
1471 __skb_pull(skb, skb_network_offset(skb));
1472 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1473 __skb_pull(tmp_skb, skb_network_header_len(skb));
1474 *tail_skb = tmp_skb;
1475 tail_skb = &(tmp_skb->next);
1476 skb->len += tmp_skb->len;
1477 skb->data_len += tmp_skb->len;
1478 skb->truesize += tmp_skb->truesize;
1479 tmp_skb->destructor = NULL;
1480 tmp_skb->sk = NULL;
1483 /* Allow local fragmentation. */
1484 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1485 skb->local_df = 1;
1487 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1488 __skb_pull(skb, skb_network_header_len(skb));
1489 if (opt && opt->opt_flen)
1490 ipv6_push_frag_opts(skb, opt, &proto);
1491 if (opt && opt->opt_nflen)
1492 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1494 skb_push(skb, sizeof(struct ipv6hdr));
1495 skb_reset_network_header(skb);
1496 hdr = ipv6_hdr(skb);
1498 *(__be32*)hdr = fl->fl6_flowlabel |
1499 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1501 hdr->hop_limit = np->cork.hop_limit;
1502 hdr->nexthdr = proto;
1503 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1504 ipv6_addr_copy(&hdr->daddr, final_dst);
1506 skb->priority = sk->sk_priority;
1507 skb->mark = sk->sk_mark;
1509 skb_dst_set(skb, dst_clone(&rt->u.dst));
1510 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1511 if (proto == IPPROTO_ICMPV6) {
1512 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1514 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1515 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1518 err = ip6_local_out(skb);
1519 if (err) {
1520 if (err > 0)
1521 err = net_xmit_errno(err);
1522 if (err)
1523 goto error;
1526 out:
1527 ip6_cork_release(inet, np);
1528 return err;
1529 error:
1530 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1531 goto out;
1534 void ip6_flush_pending_frames(struct sock *sk)
1536 struct sk_buff *skb;
1538 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1539 if (skb_dst(skb))
1540 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1541 IPSTATS_MIB_OUTDISCARDS);
1542 kfree_skb(skb);
1545 ip6_cork_release(inet_sk(sk), inet6_sk(sk));