Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / ip6_output.c
blob93beee944657f12c61482996b941dd6b9cf1f7be
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
44 #include <net/sock.h>
45 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 int __ip6_local_out(struct sk_buff *skb)
62 int len;
64 len = skb->len - sizeof(struct ipv6hdr);
65 if (len > IPV6_MAXPLEN)
66 len = 0;
67 ipv6_hdr(skb)->payload_len = htons(len);
69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
70 dst_output);
73 int ip6_local_out(struct sk_buff *skb)
75 int err;
77 err = __ip6_local_out(skb);
78 if (likely(err == 1))
79 err = dst_output(skb);
81 return err;
83 EXPORT_SYMBOL_GPL(ip6_local_out);
85 static int ip6_output_finish(struct sk_buff *skb)
87 struct dst_entry *dst = skb_dst(skb);
89 if (dst->hh)
90 return neigh_hh_output(dst->hh, skb);
91 else if (dst->neighbour)
92 return dst->neighbour->output(skb);
94 IP6_INC_STATS_BH(dev_net(dst->dev),
95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
96 kfree_skb(skb);
97 return -EINVAL;
101 /* dev_loopback_xmit for use with netfilter. */
102 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
104 skb_reset_mac_header(newskb);
105 __skb_pull(newskb, skb_network_offset(newskb));
106 newskb->pkt_type = PACKET_LOOPBACK;
107 newskb->ip_summed = CHECKSUM_UNNECESSARY;
108 WARN_ON(!skb_dst(newskb));
110 netif_rx(newskb);
111 return 0;
115 static int ip6_output2(struct sk_buff *skb)
117 struct dst_entry *dst = skb_dst(skb);
118 struct net_device *dev = dst->dev;
120 skb->protocol = htons(ETH_P_IPV6);
121 skb->dev = dev;
123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
124 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
125 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
127 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
128 ((mroute6_socket(dev_net(dev)) &&
129 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
130 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
131 &ipv6_hdr(skb)->saddr))) {
132 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
134 /* Do not check for IFF_ALLMULTI; multicast routing
135 is not supported in any case.
137 if (newskb)
138 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
139 NULL, newskb->dev,
140 ip6_dev_loopback_xmit);
142 if (ipv6_hdr(skb)->hop_limit == 0) {
143 IP6_INC_STATS(dev_net(dev), idev,
144 IPSTATS_MIB_OUTDISCARDS);
145 kfree_skb(skb);
146 return 0;
150 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
151 skb->len);
154 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
155 ip6_output_finish);
158 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
160 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
162 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
163 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
166 int ip6_output(struct sk_buff *skb)
168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169 if (unlikely(idev->cnf.disable_ipv6)) {
170 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
171 IPSTATS_MIB_OUTDISCARDS);
172 kfree_skb(skb);
173 return 0;
176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177 dst_allfrag(skb_dst(skb)))
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
184 * xmit an sk_buff (used by TCP)
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
190 struct net *net = sock_net(sk);
191 struct ipv6_pinfo *np = inet6_sk(sk);
192 struct in6_addr *first_hop = &fl->fl6_dst;
193 struct dst_entry *dst = skb_dst(skb);
194 struct ipv6hdr *hdr;
195 u8 proto = fl->proto;
196 int seg_len = skb->len;
197 int hlimit, tclass;
198 u32 mtu;
200 if (opt) {
201 unsigned int head_room;
203 /* First: exthdrs may take lots of space (~8K for now)
204 MAX_HEADER is not enough.
206 head_room = opt->opt_nflen + opt->opt_flen;
207 seg_len += head_room;
208 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210 if (skb_headroom(skb) < head_room) {
211 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
212 if (skb2 == NULL) {
213 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
214 IPSTATS_MIB_OUTDISCARDS);
215 kfree_skb(skb);
216 return -ENOBUFS;
218 kfree_skb(skb);
219 skb = skb2;
220 if (sk)
221 skb_set_owner_w(skb, sk);
223 if (opt->opt_flen)
224 ipv6_push_frag_opts(skb, opt, &proto);
225 if (opt->opt_nflen)
226 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
229 skb_push(skb, sizeof(struct ipv6hdr));
230 skb_reset_network_header(skb);
231 hdr = ipv6_hdr(skb);
233 /* Allow local fragmentation. */
234 if (ipfragok)
235 skb->local_df = 1;
238 * Fill in the IPv6 header
241 hlimit = -1;
242 if (np)
243 hlimit = np->hop_limit;
244 if (hlimit < 0)
245 hlimit = ip6_dst_hoplimit(dst);
247 tclass = -1;
248 if (np)
249 tclass = np->tclass;
250 if (tclass < 0)
251 tclass = 0;
253 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
255 hdr->payload_len = htons(seg_len);
256 hdr->nexthdr = proto;
257 hdr->hop_limit = hlimit;
259 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
260 ipv6_addr_copy(&hdr->daddr, first_hop);
262 skb->priority = sk->sk_priority;
263 skb->mark = sk->sk_mark;
265 mtu = dst_mtu(dst);
266 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
267 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
268 IPSTATS_MIB_OUT, skb->len);
269 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
270 dst_output);
273 if (net_ratelimit())
274 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
275 skb->dev = dst->dev;
276 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
277 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
278 kfree_skb(skb);
279 return -EMSGSIZE;
282 EXPORT_SYMBOL(ip6_xmit);
285 * To avoid extra problems ND packets are send through this
286 * routine. It's code duplication but I really want to avoid
287 * extra checks since ipv6_build_header is used by TCP (which
288 * is for us performance critical)
291 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
292 const struct in6_addr *saddr, const struct in6_addr *daddr,
293 int proto, int len)
295 struct ipv6_pinfo *np = inet6_sk(sk);
296 struct ipv6hdr *hdr;
297 int totlen;
299 skb->protocol = htons(ETH_P_IPV6);
300 skb->dev = dev;
302 totlen = len + sizeof(struct ipv6hdr);
304 skb_reset_network_header(skb);
305 skb_put(skb, sizeof(struct ipv6hdr));
306 hdr = ipv6_hdr(skb);
308 *(__be32*)hdr = htonl(0x60000000);
310 hdr->payload_len = htons(len);
311 hdr->nexthdr = proto;
312 hdr->hop_limit = np->hop_limit;
314 ipv6_addr_copy(&hdr->saddr, saddr);
315 ipv6_addr_copy(&hdr->daddr, daddr);
317 return 0;
320 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
322 struct ip6_ra_chain *ra;
323 struct sock *last = NULL;
325 read_lock(&ip6_ra_lock);
326 for (ra = ip6_ra_chain; ra; ra = ra->next) {
327 struct sock *sk = ra->sk;
328 if (sk && ra->sel == sel &&
329 (!sk->sk_bound_dev_if ||
330 sk->sk_bound_dev_if == skb->dev->ifindex)) {
331 if (last) {
332 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
333 if (skb2)
334 rawv6_rcv(last, skb2);
336 last = sk;
340 if (last) {
341 rawv6_rcv(last, skb);
342 read_unlock(&ip6_ra_lock);
343 return 1;
345 read_unlock(&ip6_ra_lock);
346 return 0;
349 static int ip6_forward_proxy_check(struct sk_buff *skb)
351 struct ipv6hdr *hdr = ipv6_hdr(skb);
352 u8 nexthdr = hdr->nexthdr;
353 int offset;
355 if (ipv6_ext_hdr(nexthdr)) {
356 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
357 if (offset < 0)
358 return 0;
359 } else
360 offset = sizeof(struct ipv6hdr);
362 if (nexthdr == IPPROTO_ICMPV6) {
363 struct icmp6hdr *icmp6;
365 if (!pskb_may_pull(skb, (skb_network_header(skb) +
366 offset + 1 - skb->data)))
367 return 0;
369 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
371 switch (icmp6->icmp6_type) {
372 case NDISC_ROUTER_SOLICITATION:
373 case NDISC_ROUTER_ADVERTISEMENT:
374 case NDISC_NEIGHBOUR_SOLICITATION:
375 case NDISC_NEIGHBOUR_ADVERTISEMENT:
376 case NDISC_REDIRECT:
377 /* For reaction involving unicast neighbor discovery
378 * message destined to the proxied address, pass it to
379 * input function.
381 return 1;
382 default:
383 break;
388 * The proxying router can't forward traffic sent to a link-local
389 * address, so signal the sender and discard the packet. This
390 * behavior is clarified by the MIPv6 specification.
392 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
393 dst_link_failure(skb);
394 return -1;
397 return 0;
400 static inline int ip6_forward_finish(struct sk_buff *skb)
402 return dst_output(skb);
405 int ip6_forward(struct sk_buff *skb)
407 struct dst_entry *dst = skb_dst(skb);
408 struct ipv6hdr *hdr = ipv6_hdr(skb);
409 struct inet6_skb_parm *opt = IP6CB(skb);
410 struct net *net = dev_net(dst->dev);
412 if (net->ipv6.devconf_all->forwarding == 0)
413 goto error;
415 if (skb_warn_if_lro(skb))
416 goto drop;
418 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
419 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
420 goto drop;
423 skb_forward_csum(skb);
426 * We DO NOT make any processing on
427 * RA packets, pushing them to user level AS IS
428 * without ane WARRANTY that application will be able
429 * to interpret them. The reason is that we
430 * cannot make anything clever here.
432 * We are not end-node, so that if packet contains
433 * AH/ESP, we cannot make anything.
434 * Defragmentation also would be mistake, RA packets
435 * cannot be fragmented, because there is no warranty
436 * that different fragments will go along one path. --ANK
438 if (opt->ra) {
439 u8 *ptr = skb_network_header(skb) + opt->ra;
440 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
441 return 0;
445 * check and decrement ttl
447 if (hdr->hop_limit <= 1) {
448 /* Force OUTPUT device used as source address */
449 skb->dev = dst->dev;
450 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
451 0, skb->dev);
452 IP6_INC_STATS_BH(net,
453 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
455 kfree_skb(skb);
456 return -ETIMEDOUT;
459 /* XXX: idev->cnf.proxy_ndp? */
460 if (net->ipv6.devconf_all->proxy_ndp &&
461 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
462 int proxied = ip6_forward_proxy_check(skb);
463 if (proxied > 0)
464 return ip6_input(skb);
465 else if (proxied < 0) {
466 IP6_INC_STATS(net, ip6_dst_idev(dst),
467 IPSTATS_MIB_INDISCARDS);
468 goto drop;
472 if (!xfrm6_route_forward(skb)) {
473 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
474 goto drop;
476 dst = skb_dst(skb);
478 /* IPv6 specs say nothing about it, but it is clear that we cannot
479 send redirects to source routed frames.
480 We don't send redirects to frames decapsulated from IPsec.
482 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
483 !skb_sec_path(skb)) {
484 struct in6_addr *target = NULL;
485 struct rt6_info *rt;
486 struct neighbour *n = dst->neighbour;
489 * incoming and outgoing devices are the same
490 * send a redirect.
493 rt = (struct rt6_info *) dst;
494 if ((rt->rt6i_flags & RTF_GATEWAY))
495 target = (struct in6_addr*)&n->primary_key;
496 else
497 target = &hdr->daddr;
499 /* Limit redirects both by destination (here)
500 and by source (inside ndisc_send_redirect)
502 if (xrlim_allow(dst, 1*HZ))
503 ndisc_send_redirect(skb, n, target);
504 } else {
505 int addrtype = ipv6_addr_type(&hdr->saddr);
507 /* This check is security critical. */
508 if (addrtype == IPV6_ADDR_ANY ||
509 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
510 goto error;
511 if (addrtype & IPV6_ADDR_LINKLOCAL) {
512 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
513 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
514 goto error;
518 if (skb->len > dst_mtu(dst)) {
519 /* Again, force OUTPUT device used as source address */
520 skb->dev = dst->dev;
521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
522 IP6_INC_STATS_BH(net,
523 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
524 IP6_INC_STATS_BH(net,
525 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
526 kfree_skb(skb);
527 return -EMSGSIZE;
530 if (skb_cow(skb, dst->dev->hard_header_len)) {
531 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
532 goto drop;
535 hdr = ipv6_hdr(skb);
537 /* Mangling hops number delayed to point after skb COW */
539 hdr->hop_limit--;
541 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
542 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
543 ip6_forward_finish);
545 error:
546 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
547 drop:
548 kfree_skb(skb);
549 return -EINVAL;
552 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
554 to->pkt_type = from->pkt_type;
555 to->priority = from->priority;
556 to->protocol = from->protocol;
557 skb_dst_drop(to);
558 skb_dst_set(to, dst_clone(skb_dst(from)));
559 to->dev = from->dev;
560 to->mark = from->mark;
562 #ifdef CONFIG_NET_SCHED
563 to->tc_index = from->tc_index;
564 #endif
565 nf_copy(to, from);
566 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
567 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
568 to->nf_trace = from->nf_trace;
569 #endif
570 skb_copy_secmark(to, from);
573 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
575 u16 offset = sizeof(struct ipv6hdr);
576 struct ipv6_opt_hdr *exthdr =
577 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
578 unsigned int packet_len = skb->tail - skb->network_header;
579 int found_rhdr = 0;
580 *nexthdr = &ipv6_hdr(skb)->nexthdr;
582 while (offset + 1 <= packet_len) {
584 switch (**nexthdr) {
586 case NEXTHDR_HOP:
587 break;
588 case NEXTHDR_ROUTING:
589 found_rhdr = 1;
590 break;
591 case NEXTHDR_DEST:
592 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
593 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
594 break;
595 #endif
596 if (found_rhdr)
597 return offset;
598 break;
599 default :
600 return offset;
603 offset += ipv6_optlen(exthdr);
604 *nexthdr = &exthdr->nexthdr;
605 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
606 offset);
609 return offset;
612 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
614 struct sk_buff *frag;
615 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
616 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
617 struct ipv6hdr *tmp_hdr;
618 struct frag_hdr *fh;
619 unsigned int mtu, hlen, left, len;
620 __be32 frag_id = 0;
621 int ptr, offset = 0, err=0;
622 u8 *prevhdr, nexthdr = 0;
623 struct net *net = dev_net(skb_dst(skb)->dev);
625 hlen = ip6_find_1stfragopt(skb, &prevhdr);
626 nexthdr = *prevhdr;
628 mtu = ip6_skb_dst_mtu(skb);
630 /* We must not fragment if the socket is set to force MTU discovery
631 * or if the skb it not generated by a local socket. (This last
632 * check should be redundant, but it's free.)
634 if (!skb->local_df) {
635 skb->dev = skb_dst(skb)->dev;
636 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
637 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
638 IPSTATS_MIB_FRAGFAILS);
639 kfree_skb(skb);
640 return -EMSGSIZE;
643 if (np && np->frag_size < mtu) {
644 if (np->frag_size)
645 mtu = np->frag_size;
647 mtu -= hlen + sizeof(struct frag_hdr);
649 if (skb_has_frags(skb)) {
650 int first_len = skb_pagelen(skb);
651 int truesizes = 0;
653 if (first_len - hlen > mtu ||
654 ((first_len - hlen) & 7) ||
655 skb_cloned(skb))
656 goto slow_path;
658 skb_walk_frags(skb, frag) {
659 /* Correct geometry. */
660 if (frag->len > mtu ||
661 ((frag->len & 7) && frag->next) ||
662 skb_headroom(frag) < hlen)
663 goto slow_path;
665 /* Partially cloned skb? */
666 if (skb_shared(frag))
667 goto slow_path;
669 BUG_ON(frag->sk);
670 if (skb->sk) {
671 frag->sk = skb->sk;
672 frag->destructor = sock_wfree;
673 truesizes += frag->truesize;
677 err = 0;
678 offset = 0;
679 frag = skb_shinfo(skb)->frag_list;
680 skb_frag_list_init(skb);
681 /* BUILD HEADER */
683 *prevhdr = NEXTHDR_FRAGMENT;
684 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
685 if (!tmp_hdr) {
686 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
687 IPSTATS_MIB_FRAGFAILS);
688 return -ENOMEM;
691 __skb_pull(skb, hlen);
692 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
693 __skb_push(skb, hlen);
694 skb_reset_network_header(skb);
695 memcpy(skb_network_header(skb), tmp_hdr, hlen);
697 ipv6_select_ident(fh);
698 fh->nexthdr = nexthdr;
699 fh->reserved = 0;
700 fh->frag_off = htons(IP6_MF);
701 frag_id = fh->identification;
703 first_len = skb_pagelen(skb);
704 skb->data_len = first_len - skb_headlen(skb);
705 skb->truesize -= truesizes;
706 skb->len = first_len;
707 ipv6_hdr(skb)->payload_len = htons(first_len -
708 sizeof(struct ipv6hdr));
710 dst_hold(&rt->u.dst);
712 for (;;) {
713 /* Prepare header of the next frame,
714 * before previous one went down. */
715 if (frag) {
716 frag->ip_summed = CHECKSUM_NONE;
717 skb_reset_transport_header(frag);
718 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
719 __skb_push(frag, hlen);
720 skb_reset_network_header(frag);
721 memcpy(skb_network_header(frag), tmp_hdr,
722 hlen);
723 offset += skb->len - hlen - sizeof(struct frag_hdr);
724 fh->nexthdr = nexthdr;
725 fh->reserved = 0;
726 fh->frag_off = htons(offset);
727 if (frag->next != NULL)
728 fh->frag_off |= htons(IP6_MF);
729 fh->identification = frag_id;
730 ipv6_hdr(frag)->payload_len =
731 htons(frag->len -
732 sizeof(struct ipv6hdr));
733 ip6_copy_metadata(frag, skb);
736 err = output(skb);
737 if(!err)
738 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
739 IPSTATS_MIB_FRAGCREATES);
741 if (err || !frag)
742 break;
744 skb = frag;
745 frag = skb->next;
746 skb->next = NULL;
749 kfree(tmp_hdr);
751 if (err == 0) {
752 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
753 IPSTATS_MIB_FRAGOKS);
754 dst_release(&rt->u.dst);
755 return 0;
758 while (frag) {
759 skb = frag->next;
760 kfree_skb(frag);
761 frag = skb;
764 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
765 IPSTATS_MIB_FRAGFAILS);
766 dst_release(&rt->u.dst);
767 return err;
770 slow_path:
771 left = skb->len - hlen; /* Space per frame */
772 ptr = hlen; /* Where to start from */
775 * Fragment the datagram.
778 *prevhdr = NEXTHDR_FRAGMENT;
781 * Keep copying data until we run out.
783 while(left > 0) {
784 len = left;
785 /* IF: it doesn't fit, use 'mtu' - the data space left */
786 if (len > mtu)
787 len = mtu;
788 /* IF: we are not sending upto and including the packet end
789 then align the next start on an eight byte boundary */
790 if (len < left) {
791 len &= ~7;
794 * Allocate buffer.
797 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
798 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
799 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
800 IPSTATS_MIB_FRAGFAILS);
801 err = -ENOMEM;
802 goto fail;
806 * Set up data on packet
809 ip6_copy_metadata(frag, skb);
810 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
811 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
812 skb_reset_network_header(frag);
813 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
814 frag->transport_header = (frag->network_header + hlen +
815 sizeof(struct frag_hdr));
818 * Charge the memory for the fragment to any owner
819 * it might possess
821 if (skb->sk)
822 skb_set_owner_w(frag, skb->sk);
825 * Copy the packet header into the new buffer.
827 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
830 * Build fragment header.
832 fh->nexthdr = nexthdr;
833 fh->reserved = 0;
834 if (!frag_id) {
835 ipv6_select_ident(fh);
836 frag_id = fh->identification;
837 } else
838 fh->identification = frag_id;
841 * Copy a block of the IP datagram.
843 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
844 BUG();
845 left -= len;
847 fh->frag_off = htons(offset);
848 if (left > 0)
849 fh->frag_off |= htons(IP6_MF);
850 ipv6_hdr(frag)->payload_len = htons(frag->len -
851 sizeof(struct ipv6hdr));
853 ptr += len;
854 offset += len;
857 * Put this fragment into the sending queue.
859 err = output(frag);
860 if (err)
861 goto fail;
863 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
864 IPSTATS_MIB_FRAGCREATES);
866 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867 IPSTATS_MIB_FRAGOKS);
868 kfree_skb(skb);
869 return err;
871 fail:
872 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
873 IPSTATS_MIB_FRAGFAILS);
874 kfree_skb(skb);
875 return err;
878 static inline int ip6_rt_check(struct rt6key *rt_key,
879 struct in6_addr *fl_addr,
880 struct in6_addr *addr_cache)
882 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
883 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
886 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
887 struct dst_entry *dst,
888 struct flowi *fl)
890 struct ipv6_pinfo *np = inet6_sk(sk);
891 struct rt6_info *rt = (struct rt6_info *)dst;
893 if (!dst)
894 goto out;
896 /* Yes, checking route validity in not connected
897 * case is not very simple. Take into account,
898 * that we do not support routing by source, TOS,
899 * and MSG_DONTROUTE --ANK (980726)
901 * 1. ip6_rt_check(): If route was host route,
902 * check that cached destination is current.
903 * If it is network route, we still may
904 * check its validity using saved pointer
905 * to the last used address: daddr_cache.
906 * We do not want to save whole address now,
907 * (because main consumer of this service
908 * is tcp, which has not this problem),
909 * so that the last trick works only on connected
910 * sockets.
911 * 2. oif also should be the same.
913 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
914 #ifdef CONFIG_IPV6_SUBTREES
915 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
916 #endif
917 (fl->oif && fl->oif != dst->dev->ifindex)) {
918 dst_release(dst);
919 dst = NULL;
922 out:
923 return dst;
926 static int ip6_dst_lookup_tail(struct sock *sk,
927 struct dst_entry **dst, struct flowi *fl)
929 int err;
930 struct net *net = sock_net(sk);
932 if (*dst == NULL)
933 *dst = ip6_route_output(net, sk, fl);
935 if ((err = (*dst)->error))
936 goto out_err_release;
938 if (ipv6_addr_any(&fl->fl6_src)) {
939 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
940 &fl->fl6_dst,
941 sk ? inet6_sk(sk)->srcprefs : 0,
942 &fl->fl6_src);
943 if (err)
944 goto out_err_release;
947 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
949 * Here if the dst entry we've looked up
950 * has a neighbour entry that is in the INCOMPLETE
951 * state and the src address from the flow is
952 * marked as OPTIMISTIC, we release the found
953 * dst entry and replace it instead with the
954 * dst entry of the nexthop router
956 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
957 struct inet6_ifaddr *ifp;
958 struct flowi fl_gw;
959 int redirect;
961 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
962 (*dst)->dev, 1);
964 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
965 if (ifp)
966 in6_ifa_put(ifp);
968 if (redirect) {
970 * We need to get the dst entry for the
971 * default router instead
973 dst_release(*dst);
974 memcpy(&fl_gw, fl, sizeof(struct flowi));
975 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
976 *dst = ip6_route_output(net, sk, &fl_gw);
977 if ((err = (*dst)->error))
978 goto out_err_release;
981 #endif
983 return 0;
985 out_err_release:
986 if (err == -ENETUNREACH)
987 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
988 dst_release(*dst);
989 *dst = NULL;
990 return err;
994 * ip6_dst_lookup - perform route lookup on flow
995 * @sk: socket which provides route info
996 * @dst: pointer to dst_entry * for result
997 * @fl: flow to lookup
999 * This function performs a route lookup on the given flow.
1001 * It returns zero on success, or a standard errno code on error.
1003 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1005 *dst = NULL;
1006 return ip6_dst_lookup_tail(sk, dst, fl);
1008 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1011 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1012 * @sk: socket which provides the dst cache and route info
1013 * @dst: pointer to dst_entry * for result
1014 * @fl: flow to lookup
1016 * This function performs a route lookup on the given flow with the
1017 * possibility of using the cached route in the socket if it is valid.
1018 * It will take the socket dst lock when operating on the dst cache.
1019 * As a result, this function can only be used in process context.
1021 * It returns zero on success, or a standard errno code on error.
1023 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1025 *dst = NULL;
1026 if (sk) {
1027 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1028 *dst = ip6_sk_dst_check(sk, *dst, fl);
1031 return ip6_dst_lookup_tail(sk, dst, fl);
1033 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1035 static inline int ip6_ufo_append_data(struct sock *sk,
1036 int getfrag(void *from, char *to, int offset, int len,
1037 int odd, struct sk_buff *skb),
1038 void *from, int length, int hh_len, int fragheaderlen,
1039 int transhdrlen, int mtu,unsigned int flags)
1042 struct sk_buff *skb;
1043 int err;
1045 /* There is support for UDP large send offload by network
1046 * device, so create one single skb packet containing complete
1047 * udp datagram
1049 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1050 skb = sock_alloc_send_skb(sk,
1051 hh_len + fragheaderlen + transhdrlen + 20,
1052 (flags & MSG_DONTWAIT), &err);
1053 if (skb == NULL)
1054 return -ENOMEM;
1056 /* reserve space for Hardware header */
1057 skb_reserve(skb, hh_len);
1059 /* create space for UDP/IP header */
1060 skb_put(skb,fragheaderlen + transhdrlen);
1062 /* initialize network header pointer */
1063 skb_reset_network_header(skb);
1065 /* initialize protocol header pointer */
1066 skb->transport_header = skb->network_header + fragheaderlen;
1068 skb->ip_summed = CHECKSUM_PARTIAL;
1069 skb->csum = 0;
1070 sk->sk_sndmsg_off = 0;
1073 err = skb_append_datato_frags(sk,skb, getfrag, from,
1074 (length - transhdrlen));
1075 if (!err) {
1076 struct frag_hdr fhdr;
1078 /* Specify the length of each IPv6 datagram fragment.
1079 * It has to be a multiple of 8.
1081 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1082 sizeof(struct frag_hdr)) & ~7;
1083 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1084 ipv6_select_ident(&fhdr);
1085 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1086 __skb_queue_tail(&sk->sk_write_queue, skb);
1088 return 0;
1090 /* There is not enough support do UPD LSO,
1091 * so follow normal path
1093 kfree_skb(skb);
1095 return err;
1098 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1099 gfp_t gfp)
1101 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1104 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1105 gfp_t gfp)
1107 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1110 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1111 int offset, int len, int odd, struct sk_buff *skb),
1112 void *from, int length, int transhdrlen,
1113 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1114 struct rt6_info *rt, unsigned int flags)
1116 struct inet_sock *inet = inet_sk(sk);
1117 struct ipv6_pinfo *np = inet6_sk(sk);
1118 struct sk_buff *skb;
1119 unsigned int maxfraglen, fragheaderlen;
1120 int exthdrlen;
1121 int hh_len;
1122 int mtu;
1123 int copy;
1124 int err;
1125 int offset = 0;
1126 int csummode = CHECKSUM_NONE;
1128 if (flags&MSG_PROBE)
1129 return 0;
1130 if (skb_queue_empty(&sk->sk_write_queue)) {
1132 * setup for corking
1134 if (opt) {
1135 if (WARN_ON(np->cork.opt))
1136 return -EINVAL;
1138 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1139 if (unlikely(np->cork.opt == NULL))
1140 return -ENOBUFS;
1142 np->cork.opt->tot_len = opt->tot_len;
1143 np->cork.opt->opt_flen = opt->opt_flen;
1144 np->cork.opt->opt_nflen = opt->opt_nflen;
1146 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1147 sk->sk_allocation);
1148 if (opt->dst0opt && !np->cork.opt->dst0opt)
1149 return -ENOBUFS;
1151 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1152 sk->sk_allocation);
1153 if (opt->dst1opt && !np->cork.opt->dst1opt)
1154 return -ENOBUFS;
1156 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1157 sk->sk_allocation);
1158 if (opt->hopopt && !np->cork.opt->hopopt)
1159 return -ENOBUFS;
1161 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1162 sk->sk_allocation);
1163 if (opt->srcrt && !np->cork.opt->srcrt)
1164 return -ENOBUFS;
1166 /* need source address above miyazawa*/
1168 dst_hold(&rt->u.dst);
1169 inet->cork.dst = &rt->u.dst;
1170 inet->cork.fl = *fl;
1171 np->cork.hop_limit = hlimit;
1172 np->cork.tclass = tclass;
1173 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1174 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1175 if (np->frag_size < mtu) {
1176 if (np->frag_size)
1177 mtu = np->frag_size;
1179 inet->cork.fragsize = mtu;
1180 if (dst_allfrag(rt->u.dst.path))
1181 inet->cork.flags |= IPCORK_ALLFRAG;
1182 inet->cork.length = 0;
1183 sk->sk_sndmsg_page = NULL;
1184 sk->sk_sndmsg_off = 0;
1185 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1186 rt->rt6i_nfheader_len;
1187 length += exthdrlen;
1188 transhdrlen += exthdrlen;
1189 } else {
1190 rt = (struct rt6_info *)inet->cork.dst;
1191 fl = &inet->cork.fl;
1192 opt = np->cork.opt;
1193 transhdrlen = 0;
1194 exthdrlen = 0;
1195 mtu = inet->cork.fragsize;
1198 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1200 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1201 (opt ? opt->opt_nflen : 0);
1202 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1204 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1205 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1206 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1207 return -EMSGSIZE;
1212 * Let's try using as much space as possible.
1213 * Use MTU if total length of the message fits into the MTU.
1214 * Otherwise, we need to reserve fragment header and
1215 * fragment alignment (= 8-15 octects, in total).
1217 * Note that we may need to "move" the data from the tail of
1218 * of the buffer to the new fragment when we split
1219 * the message.
1221 * FIXME: It may be fragmented into multiple chunks
1222 * at once if non-fragmentable extension headers
1223 * are too large.
1224 * --yoshfuji
1227 inet->cork.length += length;
1228 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1229 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1231 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1232 fragheaderlen, transhdrlen, mtu,
1233 flags);
1234 if (err)
1235 goto error;
1236 return 0;
1239 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1240 goto alloc_new_skb;
1242 while (length > 0) {
1243 /* Check if the remaining data fits into current packet. */
1244 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1245 if (copy < length)
1246 copy = maxfraglen - skb->len;
1248 if (copy <= 0) {
1249 char *data;
1250 unsigned int datalen;
1251 unsigned int fraglen;
1252 unsigned int fraggap;
1253 unsigned int alloclen;
1254 struct sk_buff *skb_prev;
1255 alloc_new_skb:
1256 skb_prev = skb;
1258 /* There's no room in the current skb */
1259 if (skb_prev)
1260 fraggap = skb_prev->len - maxfraglen;
1261 else
1262 fraggap = 0;
1265 * If remaining data exceeds the mtu,
1266 * we know we need more fragment(s).
1268 datalen = length + fraggap;
1269 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1270 datalen = maxfraglen - fragheaderlen;
1272 fraglen = datalen + fragheaderlen;
1273 if ((flags & MSG_MORE) &&
1274 !(rt->u.dst.dev->features&NETIF_F_SG))
1275 alloclen = mtu;
1276 else
1277 alloclen = datalen + fragheaderlen;
1280 * The last fragment gets additional space at tail.
1281 * Note: we overallocate on fragments with MSG_MODE
1282 * because we have no idea if we're the last one.
1284 if (datalen == length + fraggap)
1285 alloclen += rt->u.dst.trailer_len;
1288 * We just reserve space for fragment header.
1289 * Note: this may be overallocation if the message
1290 * (without MSG_MORE) fits into the MTU.
1292 alloclen += sizeof(struct frag_hdr);
1294 if (transhdrlen) {
1295 skb = sock_alloc_send_skb(sk,
1296 alloclen + hh_len,
1297 (flags & MSG_DONTWAIT), &err);
1298 } else {
1299 skb = NULL;
1300 if (atomic_read(&sk->sk_wmem_alloc) <=
1301 2 * sk->sk_sndbuf)
1302 skb = sock_wmalloc(sk,
1303 alloclen + hh_len, 1,
1304 sk->sk_allocation);
1305 if (unlikely(skb == NULL))
1306 err = -ENOBUFS;
1308 if (skb == NULL)
1309 goto error;
1311 * Fill in the control structures
1313 skb->ip_summed = csummode;
1314 skb->csum = 0;
1315 /* reserve for fragmentation */
1316 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1319 * Find where to start putting bytes
1321 data = skb_put(skb, fraglen);
1322 skb_set_network_header(skb, exthdrlen);
1323 data += fragheaderlen;
1324 skb->transport_header = (skb->network_header +
1325 fragheaderlen);
1326 if (fraggap) {
1327 skb->csum = skb_copy_and_csum_bits(
1328 skb_prev, maxfraglen,
1329 data + transhdrlen, fraggap, 0);
1330 skb_prev->csum = csum_sub(skb_prev->csum,
1331 skb->csum);
1332 data += fraggap;
1333 pskb_trim_unique(skb_prev, maxfraglen);
1335 copy = datalen - transhdrlen - fraggap;
1336 if (copy < 0) {
1337 err = -EINVAL;
1338 kfree_skb(skb);
1339 goto error;
1340 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1341 err = -EFAULT;
1342 kfree_skb(skb);
1343 goto error;
1346 offset += copy;
1347 length -= datalen - fraggap;
1348 transhdrlen = 0;
1349 exthdrlen = 0;
1350 csummode = CHECKSUM_NONE;
1353 * Put the packet on the pending queue
1355 __skb_queue_tail(&sk->sk_write_queue, skb);
1356 continue;
1359 if (copy > length)
1360 copy = length;
1362 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1363 unsigned int off;
1365 off = skb->len;
1366 if (getfrag(from, skb_put(skb, copy),
1367 offset, copy, off, skb) < 0) {
1368 __skb_trim(skb, off);
1369 err = -EFAULT;
1370 goto error;
1372 } else {
1373 int i = skb_shinfo(skb)->nr_frags;
1374 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1375 struct page *page = sk->sk_sndmsg_page;
1376 int off = sk->sk_sndmsg_off;
1377 unsigned int left;
1379 if (page && (left = PAGE_SIZE - off) > 0) {
1380 if (copy >= left)
1381 copy = left;
1382 if (page != frag->page) {
1383 if (i == MAX_SKB_FRAGS) {
1384 err = -EMSGSIZE;
1385 goto error;
1387 get_page(page);
1388 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1389 frag = &skb_shinfo(skb)->frags[i];
1391 } else if(i < MAX_SKB_FRAGS) {
1392 if (copy > PAGE_SIZE)
1393 copy = PAGE_SIZE;
1394 page = alloc_pages(sk->sk_allocation, 0);
1395 if (page == NULL) {
1396 err = -ENOMEM;
1397 goto error;
1399 sk->sk_sndmsg_page = page;
1400 sk->sk_sndmsg_off = 0;
1402 skb_fill_page_desc(skb, i, page, 0, 0);
1403 frag = &skb_shinfo(skb)->frags[i];
1404 } else {
1405 err = -EMSGSIZE;
1406 goto error;
1408 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1409 err = -EFAULT;
1410 goto error;
1412 sk->sk_sndmsg_off += copy;
1413 frag->size += copy;
1414 skb->len += copy;
1415 skb->data_len += copy;
1416 skb->truesize += copy;
1417 atomic_add(copy, &sk->sk_wmem_alloc);
1419 offset += copy;
1420 length -= copy;
1422 return 0;
1423 error:
1424 inet->cork.length -= length;
1425 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1426 return err;
1429 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1431 if (np->cork.opt) {
1432 kfree(np->cork.opt->dst0opt);
1433 kfree(np->cork.opt->dst1opt);
1434 kfree(np->cork.opt->hopopt);
1435 kfree(np->cork.opt->srcrt);
1436 kfree(np->cork.opt);
1437 np->cork.opt = NULL;
1440 if (inet->cork.dst) {
1441 dst_release(inet->cork.dst);
1442 inet->cork.dst = NULL;
1443 inet->cork.flags &= ~IPCORK_ALLFRAG;
1445 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1448 int ip6_push_pending_frames(struct sock *sk)
1450 struct sk_buff *skb, *tmp_skb;
1451 struct sk_buff **tail_skb;
1452 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1453 struct inet_sock *inet = inet_sk(sk);
1454 struct ipv6_pinfo *np = inet6_sk(sk);
1455 struct net *net = sock_net(sk);
1456 struct ipv6hdr *hdr;
1457 struct ipv6_txoptions *opt = np->cork.opt;
1458 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1459 struct flowi *fl = &inet->cork.fl;
1460 unsigned char proto = fl->proto;
1461 int err = 0;
1463 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1464 goto out;
1465 tail_skb = &(skb_shinfo(skb)->frag_list);
1467 /* move skb->data to ip header from ext header */
1468 if (skb->data < skb_network_header(skb))
1469 __skb_pull(skb, skb_network_offset(skb));
1470 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1471 __skb_pull(tmp_skb, skb_network_header_len(skb));
1472 *tail_skb = tmp_skb;
1473 tail_skb = &(tmp_skb->next);
1474 skb->len += tmp_skb->len;
1475 skb->data_len += tmp_skb->len;
1476 skb->truesize += tmp_skb->truesize;
1477 tmp_skb->destructor = NULL;
1478 tmp_skb->sk = NULL;
1481 /* Allow local fragmentation. */
1482 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1483 skb->local_df = 1;
1485 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1486 __skb_pull(skb, skb_network_header_len(skb));
1487 if (opt && opt->opt_flen)
1488 ipv6_push_frag_opts(skb, opt, &proto);
1489 if (opt && opt->opt_nflen)
1490 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1492 skb_push(skb, sizeof(struct ipv6hdr));
1493 skb_reset_network_header(skb);
1494 hdr = ipv6_hdr(skb);
1496 *(__be32*)hdr = fl->fl6_flowlabel |
1497 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1499 hdr->hop_limit = np->cork.hop_limit;
1500 hdr->nexthdr = proto;
1501 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1502 ipv6_addr_copy(&hdr->daddr, final_dst);
1504 skb->priority = sk->sk_priority;
1505 skb->mark = sk->sk_mark;
1507 skb_dst_set(skb, dst_clone(&rt->u.dst));
1508 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1509 if (proto == IPPROTO_ICMPV6) {
1510 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1512 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1513 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1516 err = ip6_local_out(skb);
1517 if (err) {
1518 if (err > 0)
1519 err = np->recverr ? net_xmit_errno(err) : 0;
1520 if (err)
1521 goto error;
1524 out:
1525 ip6_cork_release(inet, np);
1526 return err;
1527 error:
1528 goto out;
1531 void ip6_flush_pending_frames(struct sock *sk)
1533 struct sk_buff *skb;
1535 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1536 if (skb_dst(skb))
1537 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1538 IPSTATS_MIB_OUTDISCARDS);
1539 kfree_skb(skb);
1542 ip6_cork_release(inet_sk(sk), inet6_sk(sk));