thinkpad-acpi: update volume documentation
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / ip6_output.c
blob6faef4ae6fac42d3b62888e9be017da538d46983
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
44 #include <net/sock.h>
45 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 static u32 ipv6_fragmentation_id = 1;
63 static DEFINE_SPINLOCK(ip6_id_lock);
65 spin_lock_bh(&ip6_id_lock);
66 fhdr->identification = htonl(ipv6_fragmentation_id);
67 if (++ipv6_fragmentation_id == 0)
68 ipv6_fragmentation_id = 1;
69 spin_unlock_bh(&ip6_id_lock);
72 int __ip6_local_out(struct sk_buff *skb)
74 int len;
76 len = skb->len - sizeof(struct ipv6hdr);
77 if (len > IPV6_MAXPLEN)
78 len = 0;
79 ipv6_hdr(skb)->payload_len = htons(len);
81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
82 dst_output);
85 int ip6_local_out(struct sk_buff *skb)
87 int err;
89 err = __ip6_local_out(skb);
90 if (likely(err == 1))
91 err = dst_output(skb);
93 return err;
95 EXPORT_SYMBOL_GPL(ip6_local_out);
97 static int ip6_output_finish(struct sk_buff *skb)
99 struct dst_entry *dst = skb->dst;
101 if (dst->hh)
102 return neigh_hh_output(dst->hh, skb);
103 else if (dst->neighbour)
104 return dst->neighbour->output(skb);
106 IP6_INC_STATS_BH(dev_net(dst->dev),
107 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
108 kfree_skb(skb);
109 return -EINVAL;
113 /* dev_loopback_xmit for use with netfilter. */
114 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
116 skb_reset_mac_header(newskb);
117 __skb_pull(newskb, skb_network_offset(newskb));
118 newskb->pkt_type = PACKET_LOOPBACK;
119 newskb->ip_summed = CHECKSUM_UNNECESSARY;
120 WARN_ON(!newskb->dst);
122 netif_rx(newskb);
123 return 0;
127 static int ip6_output2(struct sk_buff *skb)
129 struct dst_entry *dst = skb->dst;
130 struct net_device *dev = dst->dev;
132 skb->protocol = htons(ETH_P_IPV6);
133 skb->dev = dev;
135 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
136 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
137 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
139 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
140 ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
141 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
142 &ipv6_hdr(skb)->saddr))) {
143 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
145 /* Do not check for IFF_ALLMULTI; multicast routing
146 is not supported in any case.
148 if (newskb)
149 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
150 NULL, newskb->dev,
151 ip6_dev_loopback_xmit);
153 if (ipv6_hdr(skb)->hop_limit == 0) {
154 IP6_INC_STATS(dev_net(dev), idev,
155 IPSTATS_MIB_OUTDISCARDS);
156 kfree_skb(skb);
157 return 0;
161 IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS);
164 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
165 ip6_output_finish);
168 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
170 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
172 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
173 skb->dst->dev->mtu : dst_mtu(skb->dst);
176 int ip6_output(struct sk_buff *skb)
178 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
179 if (unlikely(idev->cnf.disable_ipv6)) {
180 IP6_INC_STATS(dev_net(skb->dst->dev), idev,
181 IPSTATS_MIB_OUTDISCARDS);
182 kfree_skb(skb);
183 return 0;
186 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
187 dst_allfrag(skb->dst))
188 return ip6_fragment(skb, ip6_output2);
189 else
190 return ip6_output2(skb);
194 * xmit an sk_buff (used by TCP)
197 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
198 struct ipv6_txoptions *opt, int ipfragok)
200 struct net *net = sock_net(sk);
201 struct ipv6_pinfo *np = inet6_sk(sk);
202 struct in6_addr *first_hop = &fl->fl6_dst;
203 struct dst_entry *dst = skb->dst;
204 struct ipv6hdr *hdr;
205 u8 proto = fl->proto;
206 int seg_len = skb->len;
207 int hlimit, tclass;
208 u32 mtu;
210 if (opt) {
211 unsigned int head_room;
213 /* First: exthdrs may take lots of space (~8K for now)
214 MAX_HEADER is not enough.
216 head_room = opt->opt_nflen + opt->opt_flen;
217 seg_len += head_room;
218 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
220 if (skb_headroom(skb) < head_room) {
221 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
222 if (skb2 == NULL) {
223 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
224 IPSTATS_MIB_OUTDISCARDS);
225 kfree_skb(skb);
226 return -ENOBUFS;
228 kfree_skb(skb);
229 skb = skb2;
230 if (sk)
231 skb_set_owner_w(skb, sk);
233 if (opt->opt_flen)
234 ipv6_push_frag_opts(skb, opt, &proto);
235 if (opt->opt_nflen)
236 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
239 skb_push(skb, sizeof(struct ipv6hdr));
240 skb_reset_network_header(skb);
241 hdr = ipv6_hdr(skb);
243 /* Allow local fragmentation. */
244 if (ipfragok)
245 skb->local_df = 1;
248 * Fill in the IPv6 header
251 hlimit = -1;
252 if (np)
253 hlimit = np->hop_limit;
254 if (hlimit < 0)
255 hlimit = ip6_dst_hoplimit(dst);
257 tclass = -1;
258 if (np)
259 tclass = np->tclass;
260 if (tclass < 0)
261 tclass = 0;
263 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
265 hdr->payload_len = htons(seg_len);
266 hdr->nexthdr = proto;
267 hdr->hop_limit = hlimit;
269 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
270 ipv6_addr_copy(&hdr->daddr, first_hop);
272 skb->priority = sk->sk_priority;
273 skb->mark = sk->sk_mark;
275 mtu = dst_mtu(dst);
276 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
277 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
278 IPSTATS_MIB_OUTREQUESTS);
279 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
280 dst_output);
283 if (net_ratelimit())
284 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
285 skb->dev = dst->dev;
286 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
287 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
288 kfree_skb(skb);
289 return -EMSGSIZE;
292 EXPORT_SYMBOL(ip6_xmit);
295 * To avoid extra problems ND packets are send through this
296 * routine. It's code duplication but I really want to avoid
297 * extra checks since ipv6_build_header is used by TCP (which
298 * is for us performance critical)
301 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
302 const struct in6_addr *saddr, const struct in6_addr *daddr,
303 int proto, int len)
305 struct ipv6_pinfo *np = inet6_sk(sk);
306 struct ipv6hdr *hdr;
307 int totlen;
309 skb->protocol = htons(ETH_P_IPV6);
310 skb->dev = dev;
312 totlen = len + sizeof(struct ipv6hdr);
314 skb_reset_network_header(skb);
315 skb_put(skb, sizeof(struct ipv6hdr));
316 hdr = ipv6_hdr(skb);
318 *(__be32*)hdr = htonl(0x60000000);
320 hdr->payload_len = htons(len);
321 hdr->nexthdr = proto;
322 hdr->hop_limit = np->hop_limit;
324 ipv6_addr_copy(&hdr->saddr, saddr);
325 ipv6_addr_copy(&hdr->daddr, daddr);
327 return 0;
330 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
332 struct ip6_ra_chain *ra;
333 struct sock *last = NULL;
335 read_lock(&ip6_ra_lock);
336 for (ra = ip6_ra_chain; ra; ra = ra->next) {
337 struct sock *sk = ra->sk;
338 if (sk && ra->sel == sel &&
339 (!sk->sk_bound_dev_if ||
340 sk->sk_bound_dev_if == skb->dev->ifindex)) {
341 if (last) {
342 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
343 if (skb2)
344 rawv6_rcv(last, skb2);
346 last = sk;
350 if (last) {
351 rawv6_rcv(last, skb);
352 read_unlock(&ip6_ra_lock);
353 return 1;
355 read_unlock(&ip6_ra_lock);
356 return 0;
359 static int ip6_forward_proxy_check(struct sk_buff *skb)
361 struct ipv6hdr *hdr = ipv6_hdr(skb);
362 u8 nexthdr = hdr->nexthdr;
363 int offset;
365 if (ipv6_ext_hdr(nexthdr)) {
366 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
367 if (offset < 0)
368 return 0;
369 } else
370 offset = sizeof(struct ipv6hdr);
372 if (nexthdr == IPPROTO_ICMPV6) {
373 struct icmp6hdr *icmp6;
375 if (!pskb_may_pull(skb, (skb_network_header(skb) +
376 offset + 1 - skb->data)))
377 return 0;
379 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
381 switch (icmp6->icmp6_type) {
382 case NDISC_ROUTER_SOLICITATION:
383 case NDISC_ROUTER_ADVERTISEMENT:
384 case NDISC_NEIGHBOUR_SOLICITATION:
385 case NDISC_NEIGHBOUR_ADVERTISEMENT:
386 case NDISC_REDIRECT:
387 /* For reaction involving unicast neighbor discovery
388 * message destined to the proxied address, pass it to
389 * input function.
391 return 1;
392 default:
393 break;
398 * The proxying router can't forward traffic sent to a link-local
399 * address, so signal the sender and discard the packet. This
400 * behavior is clarified by the MIPv6 specification.
402 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
403 dst_link_failure(skb);
404 return -1;
407 return 0;
410 static inline int ip6_forward_finish(struct sk_buff *skb)
412 return dst_output(skb);
415 int ip6_forward(struct sk_buff *skb)
417 struct dst_entry *dst = skb->dst;
418 struct ipv6hdr *hdr = ipv6_hdr(skb);
419 struct inet6_skb_parm *opt = IP6CB(skb);
420 struct net *net = dev_net(dst->dev);
422 if (net->ipv6.devconf_all->forwarding == 0)
423 goto error;
425 if (skb_warn_if_lro(skb))
426 goto drop;
428 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
429 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
430 goto drop;
433 skb_forward_csum(skb);
436 * We DO NOT make any processing on
437 * RA packets, pushing them to user level AS IS
438 * without ane WARRANTY that application will be able
439 * to interpret them. The reason is that we
440 * cannot make anything clever here.
442 * We are not end-node, so that if packet contains
443 * AH/ESP, we cannot make anything.
444 * Defragmentation also would be mistake, RA packets
445 * cannot be fragmented, because there is no warranty
446 * that different fragments will go along one path. --ANK
448 if (opt->ra) {
449 u8 *ptr = skb_network_header(skb) + opt->ra;
450 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
451 return 0;
455 * check and decrement ttl
457 if (hdr->hop_limit <= 1) {
458 /* Force OUTPUT device used as source address */
459 skb->dev = dst->dev;
460 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
461 0, skb->dev);
462 IP6_INC_STATS_BH(net,
463 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
465 kfree_skb(skb);
466 return -ETIMEDOUT;
469 /* XXX: idev->cnf.proxy_ndp? */
470 if (net->ipv6.devconf_all->proxy_ndp &&
471 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
472 int proxied = ip6_forward_proxy_check(skb);
473 if (proxied > 0)
474 return ip6_input(skb);
475 else if (proxied < 0) {
476 IP6_INC_STATS(net, ip6_dst_idev(dst),
477 IPSTATS_MIB_INDISCARDS);
478 goto drop;
482 if (!xfrm6_route_forward(skb)) {
483 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
484 goto drop;
486 dst = skb->dst;
488 /* IPv6 specs say nothing about it, but it is clear that we cannot
489 send redirects to source routed frames.
490 We don't send redirects to frames decapsulated from IPsec.
492 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
493 !skb->sp) {
494 struct in6_addr *target = NULL;
495 struct rt6_info *rt;
496 struct neighbour *n = dst->neighbour;
499 * incoming and outgoing devices are the same
500 * send a redirect.
503 rt = (struct rt6_info *) dst;
504 if ((rt->rt6i_flags & RTF_GATEWAY))
505 target = (struct in6_addr*)&n->primary_key;
506 else
507 target = &hdr->daddr;
509 /* Limit redirects both by destination (here)
510 and by source (inside ndisc_send_redirect)
512 if (xrlim_allow(dst, 1*HZ))
513 ndisc_send_redirect(skb, n, target);
514 } else {
515 int addrtype = ipv6_addr_type(&hdr->saddr);
517 /* This check is security critical. */
518 if (addrtype == IPV6_ADDR_ANY ||
519 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
520 goto error;
521 if (addrtype & IPV6_ADDR_LINKLOCAL) {
522 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
523 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
524 goto error;
528 if (skb->len > dst_mtu(dst)) {
529 /* Again, force OUTPUT device used as source address */
530 skb->dev = dst->dev;
531 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
532 IP6_INC_STATS_BH(net,
533 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
534 IP6_INC_STATS_BH(net,
535 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
536 kfree_skb(skb);
537 return -EMSGSIZE;
540 if (skb_cow(skb, dst->dev->hard_header_len)) {
541 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
542 goto drop;
545 hdr = ipv6_hdr(skb);
547 /* Mangling hops number delayed to point after skb COW */
549 hdr->hop_limit--;
551 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
552 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
553 ip6_forward_finish);
555 error:
556 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
557 drop:
558 kfree_skb(skb);
559 return -EINVAL;
562 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
564 to->pkt_type = from->pkt_type;
565 to->priority = from->priority;
566 to->protocol = from->protocol;
567 dst_release(to->dst);
568 to->dst = dst_clone(from->dst);
569 to->dev = from->dev;
570 to->mark = from->mark;
572 #ifdef CONFIG_NET_SCHED
573 to->tc_index = from->tc_index;
574 #endif
575 nf_copy(to, from);
576 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
577 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
578 to->nf_trace = from->nf_trace;
579 #endif
580 skb_copy_secmark(to, from);
583 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
585 u16 offset = sizeof(struct ipv6hdr);
586 struct ipv6_opt_hdr *exthdr =
587 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
588 unsigned int packet_len = skb->tail - skb->network_header;
589 int found_rhdr = 0;
590 *nexthdr = &ipv6_hdr(skb)->nexthdr;
592 while (offset + 1 <= packet_len) {
594 switch (**nexthdr) {
596 case NEXTHDR_HOP:
597 break;
598 case NEXTHDR_ROUTING:
599 found_rhdr = 1;
600 break;
601 case NEXTHDR_DEST:
602 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
603 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
604 break;
605 #endif
606 if (found_rhdr)
607 return offset;
608 break;
609 default :
610 return offset;
613 offset += ipv6_optlen(exthdr);
614 *nexthdr = &exthdr->nexthdr;
615 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
616 offset);
619 return offset;
622 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
624 struct sk_buff *frag;
625 struct rt6_info *rt = (struct rt6_info*)skb->dst;
626 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
627 struct ipv6hdr *tmp_hdr;
628 struct frag_hdr *fh;
629 unsigned int mtu, hlen, left, len;
630 __be32 frag_id = 0;
631 int ptr, offset = 0, err=0;
632 u8 *prevhdr, nexthdr = 0;
633 struct net *net = dev_net(skb->dst->dev);
635 hlen = ip6_find_1stfragopt(skb, &prevhdr);
636 nexthdr = *prevhdr;
638 mtu = ip6_skb_dst_mtu(skb);
640 /* We must not fragment if the socket is set to force MTU discovery
641 * or if the skb it not generated by a local socket. (This last
642 * check should be redundant, but it's free.)
644 if (!skb->local_df) {
645 skb->dev = skb->dst->dev;
646 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
647 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
648 IPSTATS_MIB_FRAGFAILS);
649 kfree_skb(skb);
650 return -EMSGSIZE;
653 if (np && np->frag_size < mtu) {
654 if (np->frag_size)
655 mtu = np->frag_size;
657 mtu -= hlen + sizeof(struct frag_hdr);
659 if (skb_shinfo(skb)->frag_list) {
660 int first_len = skb_pagelen(skb);
661 int truesizes = 0;
663 if (first_len - hlen > mtu ||
664 ((first_len - hlen) & 7) ||
665 skb_cloned(skb))
666 goto slow_path;
668 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
669 /* Correct geometry. */
670 if (frag->len > mtu ||
671 ((frag->len & 7) && frag->next) ||
672 skb_headroom(frag) < hlen)
673 goto slow_path;
675 /* Partially cloned skb? */
676 if (skb_shared(frag))
677 goto slow_path;
679 BUG_ON(frag->sk);
680 if (skb->sk) {
681 sock_hold(skb->sk);
682 frag->sk = skb->sk;
683 frag->destructor = sock_wfree;
684 truesizes += frag->truesize;
688 err = 0;
689 offset = 0;
690 frag = skb_shinfo(skb)->frag_list;
691 skb_shinfo(skb)->frag_list = NULL;
692 /* BUILD HEADER */
694 *prevhdr = NEXTHDR_FRAGMENT;
695 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
696 if (!tmp_hdr) {
697 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
698 IPSTATS_MIB_FRAGFAILS);
699 return -ENOMEM;
702 __skb_pull(skb, hlen);
703 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
704 __skb_push(skb, hlen);
705 skb_reset_network_header(skb);
706 memcpy(skb_network_header(skb), tmp_hdr, hlen);
708 ipv6_select_ident(skb, fh);
709 fh->nexthdr = nexthdr;
710 fh->reserved = 0;
711 fh->frag_off = htons(IP6_MF);
712 frag_id = fh->identification;
714 first_len = skb_pagelen(skb);
715 skb->data_len = first_len - skb_headlen(skb);
716 skb->truesize -= truesizes;
717 skb->len = first_len;
718 ipv6_hdr(skb)->payload_len = htons(first_len -
719 sizeof(struct ipv6hdr));
721 dst_hold(&rt->u.dst);
723 for (;;) {
724 /* Prepare header of the next frame,
725 * before previous one went down. */
726 if (frag) {
727 frag->ip_summed = CHECKSUM_NONE;
728 skb_reset_transport_header(frag);
729 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
730 __skb_push(frag, hlen);
731 skb_reset_network_header(frag);
732 memcpy(skb_network_header(frag), tmp_hdr,
733 hlen);
734 offset += skb->len - hlen - sizeof(struct frag_hdr);
735 fh->nexthdr = nexthdr;
736 fh->reserved = 0;
737 fh->frag_off = htons(offset);
738 if (frag->next != NULL)
739 fh->frag_off |= htons(IP6_MF);
740 fh->identification = frag_id;
741 ipv6_hdr(frag)->payload_len =
742 htons(frag->len -
743 sizeof(struct ipv6hdr));
744 ip6_copy_metadata(frag, skb);
747 err = output(skb);
748 if(!err)
749 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
750 IPSTATS_MIB_FRAGCREATES);
752 if (err || !frag)
753 break;
755 skb = frag;
756 frag = skb->next;
757 skb->next = NULL;
760 kfree(tmp_hdr);
762 if (err == 0) {
763 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
764 IPSTATS_MIB_FRAGOKS);
765 dst_release(&rt->u.dst);
766 return 0;
769 while (frag) {
770 skb = frag->next;
771 kfree_skb(frag);
772 frag = skb;
775 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
776 IPSTATS_MIB_FRAGFAILS);
777 dst_release(&rt->u.dst);
778 return err;
781 slow_path:
782 left = skb->len - hlen; /* Space per frame */
783 ptr = hlen; /* Where to start from */
786 * Fragment the datagram.
789 *prevhdr = NEXTHDR_FRAGMENT;
792 * Keep copying data until we run out.
794 while(left > 0) {
795 len = left;
796 /* IF: it doesn't fit, use 'mtu' - the data space left */
797 if (len > mtu)
798 len = mtu;
799 /* IF: we are not sending upto and including the packet end
800 then align the next start on an eight byte boundary */
801 if (len < left) {
802 len &= ~7;
805 * Allocate buffer.
808 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
809 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
810 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
811 IPSTATS_MIB_FRAGFAILS);
812 err = -ENOMEM;
813 goto fail;
817 * Set up data on packet
820 ip6_copy_metadata(frag, skb);
821 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
822 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
823 skb_reset_network_header(frag);
824 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
825 frag->transport_header = (frag->network_header + hlen +
826 sizeof(struct frag_hdr));
829 * Charge the memory for the fragment to any owner
830 * it might possess
832 if (skb->sk)
833 skb_set_owner_w(frag, skb->sk);
836 * Copy the packet header into the new buffer.
838 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
841 * Build fragment header.
843 fh->nexthdr = nexthdr;
844 fh->reserved = 0;
845 if (!frag_id) {
846 ipv6_select_ident(skb, fh);
847 frag_id = fh->identification;
848 } else
849 fh->identification = frag_id;
852 * Copy a block of the IP datagram.
854 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
855 BUG();
856 left -= len;
858 fh->frag_off = htons(offset);
859 if (left > 0)
860 fh->frag_off |= htons(IP6_MF);
861 ipv6_hdr(frag)->payload_len = htons(frag->len -
862 sizeof(struct ipv6hdr));
864 ptr += len;
865 offset += len;
868 * Put this fragment into the sending queue.
870 err = output(frag);
871 if (err)
872 goto fail;
874 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
875 IPSTATS_MIB_FRAGCREATES);
877 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
878 IPSTATS_MIB_FRAGOKS);
879 kfree_skb(skb);
880 return err;
882 fail:
883 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
884 IPSTATS_MIB_FRAGFAILS);
885 kfree_skb(skb);
886 return err;
889 static inline int ip6_rt_check(struct rt6key *rt_key,
890 struct in6_addr *fl_addr,
891 struct in6_addr *addr_cache)
893 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
894 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
897 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
898 struct dst_entry *dst,
899 struct flowi *fl)
901 struct ipv6_pinfo *np = inet6_sk(sk);
902 struct rt6_info *rt = (struct rt6_info *)dst;
904 if (!dst)
905 goto out;
907 /* Yes, checking route validity in not connected
908 * case is not very simple. Take into account,
909 * that we do not support routing by source, TOS,
910 * and MSG_DONTROUTE --ANK (980726)
912 * 1. ip6_rt_check(): If route was host route,
913 * check that cached destination is current.
914 * If it is network route, we still may
915 * check its validity using saved pointer
916 * to the last used address: daddr_cache.
917 * We do not want to save whole address now,
918 * (because main consumer of this service
919 * is tcp, which has not this problem),
920 * so that the last trick works only on connected
921 * sockets.
922 * 2. oif also should be the same.
924 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
925 #ifdef CONFIG_IPV6_SUBTREES
926 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
927 #endif
928 (fl->oif && fl->oif != dst->dev->ifindex)) {
929 dst_release(dst);
930 dst = NULL;
933 out:
934 return dst;
937 static int ip6_dst_lookup_tail(struct sock *sk,
938 struct dst_entry **dst, struct flowi *fl)
940 int err;
941 struct net *net = sock_net(sk);
943 if (*dst == NULL)
944 *dst = ip6_route_output(net, sk, fl);
946 if ((err = (*dst)->error))
947 goto out_err_release;
949 if (ipv6_addr_any(&fl->fl6_src)) {
950 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
951 &fl->fl6_dst,
952 sk ? inet6_sk(sk)->srcprefs : 0,
953 &fl->fl6_src);
954 if (err)
955 goto out_err_release;
958 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
960 * Here if the dst entry we've looked up
961 * has a neighbour entry that is in the INCOMPLETE
962 * state and the src address from the flow is
963 * marked as OPTIMISTIC, we release the found
964 * dst entry and replace it instead with the
965 * dst entry of the nexthop router
967 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
968 struct inet6_ifaddr *ifp;
969 struct flowi fl_gw;
970 int redirect;
972 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
973 (*dst)->dev, 1);
975 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
976 if (ifp)
977 in6_ifa_put(ifp);
979 if (redirect) {
981 * We need to get the dst entry for the
982 * default router instead
984 dst_release(*dst);
985 memcpy(&fl_gw, fl, sizeof(struct flowi));
986 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
987 *dst = ip6_route_output(net, sk, &fl_gw);
988 if ((err = (*dst)->error))
989 goto out_err_release;
992 #endif
994 return 0;
996 out_err_release:
997 if (err == -ENETUNREACH)
998 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
999 dst_release(*dst);
1000 *dst = NULL;
1001 return err;
1005 * ip6_dst_lookup - perform route lookup on flow
1006 * @sk: socket which provides route info
1007 * @dst: pointer to dst_entry * for result
1008 * @fl: flow to lookup
1010 * This function performs a route lookup on the given flow.
1012 * It returns zero on success, or a standard errno code on error.
1014 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1016 *dst = NULL;
1017 return ip6_dst_lookup_tail(sk, dst, fl);
1019 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1022 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1023 * @sk: socket which provides the dst cache and route info
1024 * @dst: pointer to dst_entry * for result
1025 * @fl: flow to lookup
1027 * This function performs a route lookup on the given flow with the
1028 * possibility of using the cached route in the socket if it is valid.
1029 * It will take the socket dst lock when operating on the dst cache.
1030 * As a result, this function can only be used in process context.
1032 * It returns zero on success, or a standard errno code on error.
1034 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1036 *dst = NULL;
1037 if (sk) {
1038 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1039 *dst = ip6_sk_dst_check(sk, *dst, fl);
1042 return ip6_dst_lookup_tail(sk, dst, fl);
1044 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1046 static inline int ip6_ufo_append_data(struct sock *sk,
1047 int getfrag(void *from, char *to, int offset, int len,
1048 int odd, struct sk_buff *skb),
1049 void *from, int length, int hh_len, int fragheaderlen,
1050 int transhdrlen, int mtu,unsigned int flags)
1053 struct sk_buff *skb;
1054 int err;
1056 /* There is support for UDP large send offload by network
1057 * device, so create one single skb packet containing complete
1058 * udp datagram
1060 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1061 skb = sock_alloc_send_skb(sk,
1062 hh_len + fragheaderlen + transhdrlen + 20,
1063 (flags & MSG_DONTWAIT), &err);
1064 if (skb == NULL)
1065 return -ENOMEM;
1067 /* reserve space for Hardware header */
1068 skb_reserve(skb, hh_len);
1070 /* create space for UDP/IP header */
1071 skb_put(skb,fragheaderlen + transhdrlen);
1073 /* initialize network header pointer */
1074 skb_reset_network_header(skb);
1076 /* initialize protocol header pointer */
1077 skb->transport_header = skb->network_header + fragheaderlen;
1079 skb->ip_summed = CHECKSUM_PARTIAL;
1080 skb->csum = 0;
1081 sk->sk_sndmsg_off = 0;
1084 err = skb_append_datato_frags(sk,skb, getfrag, from,
1085 (length - transhdrlen));
1086 if (!err) {
1087 struct frag_hdr fhdr;
1089 /* specify the length of each IP datagram fragment*/
1090 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1091 sizeof(struct frag_hdr);
1092 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1093 ipv6_select_ident(skb, &fhdr);
1094 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1095 __skb_queue_tail(&sk->sk_write_queue, skb);
1097 return 0;
1099 /* There is not enough support do UPD LSO,
1100 * so follow normal path
1102 kfree_skb(skb);
1104 return err;
1107 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1108 gfp_t gfp)
1110 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1113 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1114 gfp_t gfp)
1116 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1119 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1120 int offset, int len, int odd, struct sk_buff *skb),
1121 void *from, int length, int transhdrlen,
1122 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1123 struct rt6_info *rt, unsigned int flags)
1125 struct inet_sock *inet = inet_sk(sk);
1126 struct ipv6_pinfo *np = inet6_sk(sk);
1127 struct sk_buff *skb;
1128 unsigned int maxfraglen, fragheaderlen;
1129 int exthdrlen;
1130 int hh_len;
1131 int mtu;
1132 int copy;
1133 int err;
1134 int offset = 0;
1135 int csummode = CHECKSUM_NONE;
1137 if (flags&MSG_PROBE)
1138 return 0;
1139 if (skb_queue_empty(&sk->sk_write_queue)) {
1141 * setup for corking
1143 if (opt) {
1144 if (WARN_ON(np->cork.opt))
1145 return -EINVAL;
1147 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1148 if (unlikely(np->cork.opt == NULL))
1149 return -ENOBUFS;
1151 np->cork.opt->tot_len = opt->tot_len;
1152 np->cork.opt->opt_flen = opt->opt_flen;
1153 np->cork.opt->opt_nflen = opt->opt_nflen;
1155 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1156 sk->sk_allocation);
1157 if (opt->dst0opt && !np->cork.opt->dst0opt)
1158 return -ENOBUFS;
1160 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1161 sk->sk_allocation);
1162 if (opt->dst1opt && !np->cork.opt->dst1opt)
1163 return -ENOBUFS;
1165 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1166 sk->sk_allocation);
1167 if (opt->hopopt && !np->cork.opt->hopopt)
1168 return -ENOBUFS;
1170 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1171 sk->sk_allocation);
1172 if (opt->srcrt && !np->cork.opt->srcrt)
1173 return -ENOBUFS;
1175 /* need source address above miyazawa*/
1177 dst_hold(&rt->u.dst);
1178 inet->cork.dst = &rt->u.dst;
1179 inet->cork.fl = *fl;
1180 np->cork.hop_limit = hlimit;
1181 np->cork.tclass = tclass;
1182 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1183 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1184 if (np->frag_size < mtu) {
1185 if (np->frag_size)
1186 mtu = np->frag_size;
1188 inet->cork.fragsize = mtu;
1189 if (dst_allfrag(rt->u.dst.path))
1190 inet->cork.flags |= IPCORK_ALLFRAG;
1191 inet->cork.length = 0;
1192 sk->sk_sndmsg_page = NULL;
1193 sk->sk_sndmsg_off = 0;
1194 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1195 rt->rt6i_nfheader_len;
1196 length += exthdrlen;
1197 transhdrlen += exthdrlen;
1198 } else {
1199 rt = (struct rt6_info *)inet->cork.dst;
1200 fl = &inet->cork.fl;
1201 opt = np->cork.opt;
1202 transhdrlen = 0;
1203 exthdrlen = 0;
1204 mtu = inet->cork.fragsize;
1207 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1209 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1210 (opt ? opt->opt_nflen : 0);
1211 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1213 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1214 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1215 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1216 return -EMSGSIZE;
1221 * Let's try using as much space as possible.
1222 * Use MTU if total length of the message fits into the MTU.
1223 * Otherwise, we need to reserve fragment header and
1224 * fragment alignment (= 8-15 octects, in total).
1226 * Note that we may need to "move" the data from the tail of
1227 * of the buffer to the new fragment when we split
1228 * the message.
1230 * FIXME: It may be fragmented into multiple chunks
1231 * at once if non-fragmentable extension headers
1232 * are too large.
1233 * --yoshfuji
1236 inet->cork.length += length;
1237 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1238 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1240 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1241 fragheaderlen, transhdrlen, mtu,
1242 flags);
1243 if (err)
1244 goto error;
1245 return 0;
1248 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1249 goto alloc_new_skb;
1251 while (length > 0) {
1252 /* Check if the remaining data fits into current packet. */
1253 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1254 if (copy < length)
1255 copy = maxfraglen - skb->len;
1257 if (copy <= 0) {
1258 char *data;
1259 unsigned int datalen;
1260 unsigned int fraglen;
1261 unsigned int fraggap;
1262 unsigned int alloclen;
1263 struct sk_buff *skb_prev;
1264 alloc_new_skb:
1265 skb_prev = skb;
1267 /* There's no room in the current skb */
1268 if (skb_prev)
1269 fraggap = skb_prev->len - maxfraglen;
1270 else
1271 fraggap = 0;
1274 * If remaining data exceeds the mtu,
1275 * we know we need more fragment(s).
1277 datalen = length + fraggap;
1278 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1279 datalen = maxfraglen - fragheaderlen;
1281 fraglen = datalen + fragheaderlen;
1282 if ((flags & MSG_MORE) &&
1283 !(rt->u.dst.dev->features&NETIF_F_SG))
1284 alloclen = mtu;
1285 else
1286 alloclen = datalen + fragheaderlen;
1289 * The last fragment gets additional space at tail.
1290 * Note: we overallocate on fragments with MSG_MODE
1291 * because we have no idea if we're the last one.
1293 if (datalen == length + fraggap)
1294 alloclen += rt->u.dst.trailer_len;
1297 * We just reserve space for fragment header.
1298 * Note: this may be overallocation if the message
1299 * (without MSG_MORE) fits into the MTU.
1301 alloclen += sizeof(struct frag_hdr);
1303 if (transhdrlen) {
1304 skb = sock_alloc_send_skb(sk,
1305 alloclen + hh_len,
1306 (flags & MSG_DONTWAIT), &err);
1307 } else {
1308 skb = NULL;
1309 if (atomic_read(&sk->sk_wmem_alloc) <=
1310 2 * sk->sk_sndbuf)
1311 skb = sock_wmalloc(sk,
1312 alloclen + hh_len, 1,
1313 sk->sk_allocation);
1314 if (unlikely(skb == NULL))
1315 err = -ENOBUFS;
1317 if (skb == NULL)
1318 goto error;
1320 * Fill in the control structures
1322 skb->ip_summed = csummode;
1323 skb->csum = 0;
1324 /* reserve for fragmentation */
1325 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1328 * Find where to start putting bytes
1330 data = skb_put(skb, fraglen);
1331 skb_set_network_header(skb, exthdrlen);
1332 data += fragheaderlen;
1333 skb->transport_header = (skb->network_header +
1334 fragheaderlen);
1335 if (fraggap) {
1336 skb->csum = skb_copy_and_csum_bits(
1337 skb_prev, maxfraglen,
1338 data + transhdrlen, fraggap, 0);
1339 skb_prev->csum = csum_sub(skb_prev->csum,
1340 skb->csum);
1341 data += fraggap;
1342 pskb_trim_unique(skb_prev, maxfraglen);
1344 copy = datalen - transhdrlen - fraggap;
1345 if (copy < 0) {
1346 err = -EINVAL;
1347 kfree_skb(skb);
1348 goto error;
1349 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1350 err = -EFAULT;
1351 kfree_skb(skb);
1352 goto error;
1355 offset += copy;
1356 length -= datalen - fraggap;
1357 transhdrlen = 0;
1358 exthdrlen = 0;
1359 csummode = CHECKSUM_NONE;
1362 * Put the packet on the pending queue
1364 __skb_queue_tail(&sk->sk_write_queue, skb);
1365 continue;
1368 if (copy > length)
1369 copy = length;
1371 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1372 unsigned int off;
1374 off = skb->len;
1375 if (getfrag(from, skb_put(skb, copy),
1376 offset, copy, off, skb) < 0) {
1377 __skb_trim(skb, off);
1378 err = -EFAULT;
1379 goto error;
1381 } else {
1382 int i = skb_shinfo(skb)->nr_frags;
1383 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1384 struct page *page = sk->sk_sndmsg_page;
1385 int off = sk->sk_sndmsg_off;
1386 unsigned int left;
1388 if (page && (left = PAGE_SIZE - off) > 0) {
1389 if (copy >= left)
1390 copy = left;
1391 if (page != frag->page) {
1392 if (i == MAX_SKB_FRAGS) {
1393 err = -EMSGSIZE;
1394 goto error;
1396 get_page(page);
1397 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1398 frag = &skb_shinfo(skb)->frags[i];
1400 } else if(i < MAX_SKB_FRAGS) {
1401 if (copy > PAGE_SIZE)
1402 copy = PAGE_SIZE;
1403 page = alloc_pages(sk->sk_allocation, 0);
1404 if (page == NULL) {
1405 err = -ENOMEM;
1406 goto error;
1408 sk->sk_sndmsg_page = page;
1409 sk->sk_sndmsg_off = 0;
1411 skb_fill_page_desc(skb, i, page, 0, 0);
1412 frag = &skb_shinfo(skb)->frags[i];
1413 } else {
1414 err = -EMSGSIZE;
1415 goto error;
1417 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1418 err = -EFAULT;
1419 goto error;
1421 sk->sk_sndmsg_off += copy;
1422 frag->size += copy;
1423 skb->len += copy;
1424 skb->data_len += copy;
1425 skb->truesize += copy;
1426 atomic_add(copy, &sk->sk_wmem_alloc);
1428 offset += copy;
1429 length -= copy;
1431 return 0;
1432 error:
1433 inet->cork.length -= length;
1434 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1435 return err;
1438 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1440 if (np->cork.opt) {
1441 kfree(np->cork.opt->dst0opt);
1442 kfree(np->cork.opt->dst1opt);
1443 kfree(np->cork.opt->hopopt);
1444 kfree(np->cork.opt->srcrt);
1445 kfree(np->cork.opt);
1446 np->cork.opt = NULL;
1449 if (inet->cork.dst) {
1450 dst_release(inet->cork.dst);
1451 inet->cork.dst = NULL;
1452 inet->cork.flags &= ~IPCORK_ALLFRAG;
1454 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1457 int ip6_push_pending_frames(struct sock *sk)
1459 struct sk_buff *skb, *tmp_skb;
1460 struct sk_buff **tail_skb;
1461 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1462 struct inet_sock *inet = inet_sk(sk);
1463 struct ipv6_pinfo *np = inet6_sk(sk);
1464 struct net *net = sock_net(sk);
1465 struct ipv6hdr *hdr;
1466 struct ipv6_txoptions *opt = np->cork.opt;
1467 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1468 struct flowi *fl = &inet->cork.fl;
1469 unsigned char proto = fl->proto;
1470 int err = 0;
1472 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1473 goto out;
1474 tail_skb = &(skb_shinfo(skb)->frag_list);
1476 /* move skb->data to ip header from ext header */
1477 if (skb->data < skb_network_header(skb))
1478 __skb_pull(skb, skb_network_offset(skb));
1479 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1480 __skb_pull(tmp_skb, skb_network_header_len(skb));
1481 *tail_skb = tmp_skb;
1482 tail_skb = &(tmp_skb->next);
1483 skb->len += tmp_skb->len;
1484 skb->data_len += tmp_skb->len;
1485 skb->truesize += tmp_skb->truesize;
1486 __sock_put(tmp_skb->sk);
1487 tmp_skb->destructor = NULL;
1488 tmp_skb->sk = NULL;
1491 /* Allow local fragmentation. */
1492 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1493 skb->local_df = 1;
1495 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1496 __skb_pull(skb, skb_network_header_len(skb));
1497 if (opt && opt->opt_flen)
1498 ipv6_push_frag_opts(skb, opt, &proto);
1499 if (opt && opt->opt_nflen)
1500 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1502 skb_push(skb, sizeof(struct ipv6hdr));
1503 skb_reset_network_header(skb);
1504 hdr = ipv6_hdr(skb);
1506 *(__be32*)hdr = fl->fl6_flowlabel |
1507 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1509 hdr->hop_limit = np->cork.hop_limit;
1510 hdr->nexthdr = proto;
1511 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1512 ipv6_addr_copy(&hdr->daddr, final_dst);
1514 skb->priority = sk->sk_priority;
1515 skb->mark = sk->sk_mark;
1517 skb->dst = dst_clone(&rt->u.dst);
1518 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1519 if (proto == IPPROTO_ICMPV6) {
1520 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1522 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1523 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1526 err = ip6_local_out(skb);
1527 if (err) {
1528 if (err > 0)
1529 err = np->recverr ? net_xmit_errno(err) : 0;
1530 if (err)
1531 goto error;
1534 out:
1535 ip6_cork_release(inet, np);
1536 return err;
1537 error:
1538 goto out;
1541 void ip6_flush_pending_frames(struct sock *sk)
1543 struct sk_buff *skb;
1545 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1546 if (skb->dst)
1547 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst),
1548 IPSTATS_MIB_OUTDISCARDS);
1549 kfree_skb(skb);
1552 ip6_cork_release(inet_sk(sk), inet6_sk(sk));