xen: suppress known wrmsrs
[linux-2.6.git] / net / ipv6 / ip6_output.c
blob6811901e6b1ec94c0e3084de16c7b8c408bb0503
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
44 #include <net/sock.h>
45 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 static u32 ipv6_fragmentation_id = 1;
63 static DEFINE_SPINLOCK(ip6_id_lock);
65 spin_lock_bh(&ip6_id_lock);
66 fhdr->identification = htonl(ipv6_fragmentation_id);
67 if (++ipv6_fragmentation_id == 0)
68 ipv6_fragmentation_id = 1;
69 spin_unlock_bh(&ip6_id_lock);
72 int __ip6_local_out(struct sk_buff *skb)
74 int len;
76 len = skb->len - sizeof(struct ipv6hdr);
77 if (len > IPV6_MAXPLEN)
78 len = 0;
79 ipv6_hdr(skb)->payload_len = htons(len);
81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
82 dst_output);
85 int ip6_local_out(struct sk_buff *skb)
87 int err;
89 err = __ip6_local_out(skb);
90 if (likely(err == 1))
91 err = dst_output(skb);
93 return err;
95 EXPORT_SYMBOL_GPL(ip6_local_out);
97 static int ip6_output_finish(struct sk_buff *skb)
99 struct dst_entry *dst = skb->dst;
101 if (dst->hh)
102 return neigh_hh_output(dst->hh, skb);
103 else if (dst->neighbour)
104 return dst->neighbour->output(skb);
106 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
107 kfree_skb(skb);
108 return -EINVAL;
112 /* dev_loopback_xmit for use with netfilter. */
113 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115 skb_reset_mac_header(newskb);
116 __skb_pull(newskb, skb_network_offset(newskb));
117 newskb->pkt_type = PACKET_LOOPBACK;
118 newskb->ip_summed = CHECKSUM_UNNECESSARY;
119 WARN_ON(!newskb->dst);
121 netif_rx(newskb);
122 return 0;
126 static int ip6_output2(struct sk_buff *skb)
128 struct dst_entry *dst = skb->dst;
129 struct net_device *dev = dst->dev;
131 skb->protocol = htons(ETH_P_IPV6);
132 skb->dev = dev;
134 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
135 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
136 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
138 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
139 ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 &ipv6_hdr(skb)->saddr))) {
142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
144 /* Do not check for IFF_ALLMULTI; multicast routing
145 is not supported in any case.
147 if (newskb)
148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149 NULL, newskb->dev,
150 ip6_dev_loopback_xmit);
152 if (ipv6_hdr(skb)->hop_limit == 0) {
153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
154 kfree_skb(skb);
155 return 0;
159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
166 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171 skb->dst->dev->mtu : dst_mtu(skb->dst);
174 int ip6_output(struct sk_buff *skb)
176 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
177 if (unlikely(idev->cnf.disable_ipv6)) {
178 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
179 kfree_skb(skb);
180 return 0;
183 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
184 dst_allfrag(skb->dst))
185 return ip6_fragment(skb, ip6_output2);
186 else
187 return ip6_output2(skb);
191 * xmit an sk_buff (used by TCP)
194 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
195 struct ipv6_txoptions *opt, int ipfragok)
197 struct ipv6_pinfo *np = inet6_sk(sk);
198 struct in6_addr *first_hop = &fl->fl6_dst;
199 struct dst_entry *dst = skb->dst;
200 struct ipv6hdr *hdr;
201 u8 proto = fl->proto;
202 int seg_len = skb->len;
203 int hlimit, tclass;
204 u32 mtu;
206 if (opt) {
207 unsigned int head_room;
209 /* First: exthdrs may take lots of space (~8K for now)
210 MAX_HEADER is not enough.
212 head_room = opt->opt_nflen + opt->opt_flen;
213 seg_len += head_room;
214 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
216 if (skb_headroom(skb) < head_room) {
217 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
218 if (skb2 == NULL) {
219 IP6_INC_STATS(ip6_dst_idev(skb->dst),
220 IPSTATS_MIB_OUTDISCARDS);
221 kfree_skb(skb);
222 return -ENOBUFS;
224 kfree_skb(skb);
225 skb = skb2;
226 if (sk)
227 skb_set_owner_w(skb, sk);
229 if (opt->opt_flen)
230 ipv6_push_frag_opts(skb, opt, &proto);
231 if (opt->opt_nflen)
232 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
235 skb_push(skb, sizeof(struct ipv6hdr));
236 skb_reset_network_header(skb);
237 hdr = ipv6_hdr(skb);
240 * Fill in the IPv6 header
243 hlimit = -1;
244 if (np)
245 hlimit = np->hop_limit;
246 if (hlimit < 0)
247 hlimit = ip6_dst_hoplimit(dst);
249 tclass = -1;
250 if (np)
251 tclass = np->tclass;
252 if (tclass < 0)
253 tclass = 0;
255 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
257 hdr->payload_len = htons(seg_len);
258 hdr->nexthdr = proto;
259 hdr->hop_limit = hlimit;
261 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
262 ipv6_addr_copy(&hdr->daddr, first_hop);
264 skb->priority = sk->sk_priority;
265 skb->mark = sk->sk_mark;
267 mtu = dst_mtu(dst);
268 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
269 IP6_INC_STATS(ip6_dst_idev(skb->dst),
270 IPSTATS_MIB_OUTREQUESTS);
271 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
272 dst_output);
275 if (net_ratelimit())
276 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
277 skb->dev = dst->dev;
278 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
279 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
280 kfree_skb(skb);
281 return -EMSGSIZE;
284 EXPORT_SYMBOL(ip6_xmit);
287 * To avoid extra problems ND packets are send through this
288 * routine. It's code duplication but I really want to avoid
289 * extra checks since ipv6_build_header is used by TCP (which
290 * is for us performance critical)
293 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
294 const struct in6_addr *saddr, const struct in6_addr *daddr,
295 int proto, int len)
297 struct ipv6_pinfo *np = inet6_sk(sk);
298 struct ipv6hdr *hdr;
299 int totlen;
301 skb->protocol = htons(ETH_P_IPV6);
302 skb->dev = dev;
304 totlen = len + sizeof(struct ipv6hdr);
306 skb_reset_network_header(skb);
307 skb_put(skb, sizeof(struct ipv6hdr));
308 hdr = ipv6_hdr(skb);
310 *(__be32*)hdr = htonl(0x60000000);
312 hdr->payload_len = htons(len);
313 hdr->nexthdr = proto;
314 hdr->hop_limit = np->hop_limit;
316 ipv6_addr_copy(&hdr->saddr, saddr);
317 ipv6_addr_copy(&hdr->daddr, daddr);
319 return 0;
322 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
324 struct ip6_ra_chain *ra;
325 struct sock *last = NULL;
327 read_lock(&ip6_ra_lock);
328 for (ra = ip6_ra_chain; ra; ra = ra->next) {
329 struct sock *sk = ra->sk;
330 if (sk && ra->sel == sel &&
331 (!sk->sk_bound_dev_if ||
332 sk->sk_bound_dev_if == skb->dev->ifindex)) {
333 if (last) {
334 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
335 if (skb2)
336 rawv6_rcv(last, skb2);
338 last = sk;
342 if (last) {
343 rawv6_rcv(last, skb);
344 read_unlock(&ip6_ra_lock);
345 return 1;
347 read_unlock(&ip6_ra_lock);
348 return 0;
351 static int ip6_forward_proxy_check(struct sk_buff *skb)
353 struct ipv6hdr *hdr = ipv6_hdr(skb);
354 u8 nexthdr = hdr->nexthdr;
355 int offset;
357 if (ipv6_ext_hdr(nexthdr)) {
358 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
359 if (offset < 0)
360 return 0;
361 } else
362 offset = sizeof(struct ipv6hdr);
364 if (nexthdr == IPPROTO_ICMPV6) {
365 struct icmp6hdr *icmp6;
367 if (!pskb_may_pull(skb, (skb_network_header(skb) +
368 offset + 1 - skb->data)))
369 return 0;
371 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
373 switch (icmp6->icmp6_type) {
374 case NDISC_ROUTER_SOLICITATION:
375 case NDISC_ROUTER_ADVERTISEMENT:
376 case NDISC_NEIGHBOUR_SOLICITATION:
377 case NDISC_NEIGHBOUR_ADVERTISEMENT:
378 case NDISC_REDIRECT:
379 /* For reaction involving unicast neighbor discovery
380 * message destined to the proxied address, pass it to
381 * input function.
383 return 1;
384 default:
385 break;
390 * The proxying router can't forward traffic sent to a link-local
391 * address, so signal the sender and discard the packet. This
392 * behavior is clarified by the MIPv6 specification.
394 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
395 dst_link_failure(skb);
396 return -1;
399 return 0;
402 static inline int ip6_forward_finish(struct sk_buff *skb)
404 return dst_output(skb);
407 int ip6_forward(struct sk_buff *skb)
409 struct dst_entry *dst = skb->dst;
410 struct ipv6hdr *hdr = ipv6_hdr(skb);
411 struct inet6_skb_parm *opt = IP6CB(skb);
412 struct net *net = dev_net(dst->dev);
414 if (net->ipv6.devconf_all->forwarding == 0)
415 goto error;
417 if (skb_warn_if_lro(skb))
418 goto drop;
420 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
421 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
422 goto drop;
425 skb_forward_csum(skb);
428 * We DO NOT make any processing on
429 * RA packets, pushing them to user level AS IS
430 * without ane WARRANTY that application will be able
431 * to interpret them. The reason is that we
432 * cannot make anything clever here.
434 * We are not end-node, so that if packet contains
435 * AH/ESP, we cannot make anything.
436 * Defragmentation also would be mistake, RA packets
437 * cannot be fragmented, because there is no warranty
438 * that different fragments will go along one path. --ANK
440 if (opt->ra) {
441 u8 *ptr = skb_network_header(skb) + opt->ra;
442 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
443 return 0;
447 * check and decrement ttl
449 if (hdr->hop_limit <= 1) {
450 /* Force OUTPUT device used as source address */
451 skb->dev = dst->dev;
452 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
453 0, skb->dev);
454 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
456 kfree_skb(skb);
457 return -ETIMEDOUT;
460 /* XXX: idev->cnf.proxy_ndp? */
461 if (net->ipv6.devconf_all->proxy_ndp &&
462 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
463 int proxied = ip6_forward_proxy_check(skb);
464 if (proxied > 0)
465 return ip6_input(skb);
466 else if (proxied < 0) {
467 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
468 goto drop;
472 if (!xfrm6_route_forward(skb)) {
473 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
474 goto drop;
476 dst = skb->dst;
478 /* IPv6 specs say nothing about it, but it is clear that we cannot
479 send redirects to source routed frames.
480 We don't send redirects to frames decapsulated from IPsec.
482 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
483 !skb->sp) {
484 struct in6_addr *target = NULL;
485 struct rt6_info *rt;
486 struct neighbour *n = dst->neighbour;
489 * incoming and outgoing devices are the same
490 * send a redirect.
493 rt = (struct rt6_info *) dst;
494 if ((rt->rt6i_flags & RTF_GATEWAY))
495 target = (struct in6_addr*)&n->primary_key;
496 else
497 target = &hdr->daddr;
499 /* Limit redirects both by destination (here)
500 and by source (inside ndisc_send_redirect)
502 if (xrlim_allow(dst, 1*HZ))
503 ndisc_send_redirect(skb, n, target);
504 } else {
505 int addrtype = ipv6_addr_type(&hdr->saddr);
507 /* This check is security critical. */
508 if (addrtype == IPV6_ADDR_ANY ||
509 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
510 goto error;
511 if (addrtype & IPV6_ADDR_LINKLOCAL) {
512 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
513 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
514 goto error;
518 if (skb->len > dst_mtu(dst)) {
519 /* Again, force OUTPUT device used as source address */
520 skb->dev = dst->dev;
521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
522 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
523 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
524 kfree_skb(skb);
525 return -EMSGSIZE;
528 if (skb_cow(skb, dst->dev->hard_header_len)) {
529 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
530 goto drop;
533 hdr = ipv6_hdr(skb);
535 /* Mangling hops number delayed to point after skb COW */
537 hdr->hop_limit--;
539 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
540 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
541 ip6_forward_finish);
543 error:
544 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
545 drop:
546 kfree_skb(skb);
547 return -EINVAL;
550 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
552 to->pkt_type = from->pkt_type;
553 to->priority = from->priority;
554 to->protocol = from->protocol;
555 dst_release(to->dst);
556 to->dst = dst_clone(from->dst);
557 to->dev = from->dev;
558 to->mark = from->mark;
560 #ifdef CONFIG_NET_SCHED
561 to->tc_index = from->tc_index;
562 #endif
563 nf_copy(to, from);
564 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
565 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
566 to->nf_trace = from->nf_trace;
567 #endif
568 skb_copy_secmark(to, from);
571 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
573 u16 offset = sizeof(struct ipv6hdr);
574 struct ipv6_opt_hdr *exthdr =
575 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
576 unsigned int packet_len = skb->tail - skb->network_header;
577 int found_rhdr = 0;
578 *nexthdr = &ipv6_hdr(skb)->nexthdr;
580 while (offset + 1 <= packet_len) {
582 switch (**nexthdr) {
584 case NEXTHDR_HOP:
585 break;
586 case NEXTHDR_ROUTING:
587 found_rhdr = 1;
588 break;
589 case NEXTHDR_DEST:
590 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
591 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
592 break;
593 #endif
594 if (found_rhdr)
595 return offset;
596 break;
597 default :
598 return offset;
601 offset += ipv6_optlen(exthdr);
602 *nexthdr = &exthdr->nexthdr;
603 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
604 offset);
607 return offset;
610 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
612 struct net_device *dev;
613 struct sk_buff *frag;
614 struct rt6_info *rt = (struct rt6_info*)skb->dst;
615 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
616 struct ipv6hdr *tmp_hdr;
617 struct frag_hdr *fh;
618 unsigned int mtu, hlen, left, len;
619 __be32 frag_id = 0;
620 int ptr, offset = 0, err=0;
621 u8 *prevhdr, nexthdr = 0;
623 dev = rt->u.dst.dev;
624 hlen = ip6_find_1stfragopt(skb, &prevhdr);
625 nexthdr = *prevhdr;
627 mtu = ip6_skb_dst_mtu(skb);
629 /* We must not fragment if the socket is set to force MTU discovery
630 * or if the skb it not generated by a local socket. (This last
631 * check should be redundant, but it's free.)
633 if (!skb->local_df) {
634 skb->dev = skb->dst->dev;
635 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
636 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
637 kfree_skb(skb);
638 return -EMSGSIZE;
641 if (np && np->frag_size < mtu) {
642 if (np->frag_size)
643 mtu = np->frag_size;
645 mtu -= hlen + sizeof(struct frag_hdr);
647 if (skb_shinfo(skb)->frag_list) {
648 int first_len = skb_pagelen(skb);
649 int truesizes = 0;
651 if (first_len - hlen > mtu ||
652 ((first_len - hlen) & 7) ||
653 skb_cloned(skb))
654 goto slow_path;
656 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
657 /* Correct geometry. */
658 if (frag->len > mtu ||
659 ((frag->len & 7) && frag->next) ||
660 skb_headroom(frag) < hlen)
661 goto slow_path;
663 /* Partially cloned skb? */
664 if (skb_shared(frag))
665 goto slow_path;
667 BUG_ON(frag->sk);
668 if (skb->sk) {
669 sock_hold(skb->sk);
670 frag->sk = skb->sk;
671 frag->destructor = sock_wfree;
672 truesizes += frag->truesize;
676 err = 0;
677 offset = 0;
678 frag = skb_shinfo(skb)->frag_list;
679 skb_shinfo(skb)->frag_list = NULL;
680 /* BUILD HEADER */
682 *prevhdr = NEXTHDR_FRAGMENT;
683 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
684 if (!tmp_hdr) {
685 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
686 return -ENOMEM;
689 __skb_pull(skb, hlen);
690 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
691 __skb_push(skb, hlen);
692 skb_reset_network_header(skb);
693 memcpy(skb_network_header(skb), tmp_hdr, hlen);
695 ipv6_select_ident(skb, fh);
696 fh->nexthdr = nexthdr;
697 fh->reserved = 0;
698 fh->frag_off = htons(IP6_MF);
699 frag_id = fh->identification;
701 first_len = skb_pagelen(skb);
702 skb->data_len = first_len - skb_headlen(skb);
703 skb->truesize -= truesizes;
704 skb->len = first_len;
705 ipv6_hdr(skb)->payload_len = htons(first_len -
706 sizeof(struct ipv6hdr));
708 dst_hold(&rt->u.dst);
710 for (;;) {
711 /* Prepare header of the next frame,
712 * before previous one went down. */
713 if (frag) {
714 frag->ip_summed = CHECKSUM_NONE;
715 skb_reset_transport_header(frag);
716 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
717 __skb_push(frag, hlen);
718 skb_reset_network_header(frag);
719 memcpy(skb_network_header(frag), tmp_hdr,
720 hlen);
721 offset += skb->len - hlen - sizeof(struct frag_hdr);
722 fh->nexthdr = nexthdr;
723 fh->reserved = 0;
724 fh->frag_off = htons(offset);
725 if (frag->next != NULL)
726 fh->frag_off |= htons(IP6_MF);
727 fh->identification = frag_id;
728 ipv6_hdr(frag)->payload_len =
729 htons(frag->len -
730 sizeof(struct ipv6hdr));
731 ip6_copy_metadata(frag, skb);
734 err = output(skb);
735 if(!err)
736 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
738 if (err || !frag)
739 break;
741 skb = frag;
742 frag = skb->next;
743 skb->next = NULL;
746 kfree(tmp_hdr);
748 if (err == 0) {
749 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
750 dst_release(&rt->u.dst);
751 return 0;
754 while (frag) {
755 skb = frag->next;
756 kfree_skb(frag);
757 frag = skb;
760 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
761 dst_release(&rt->u.dst);
762 return err;
765 slow_path:
766 left = skb->len - hlen; /* Space per frame */
767 ptr = hlen; /* Where to start from */
770 * Fragment the datagram.
773 *prevhdr = NEXTHDR_FRAGMENT;
776 * Keep copying data until we run out.
778 while(left > 0) {
779 len = left;
780 /* IF: it doesn't fit, use 'mtu' - the data space left */
781 if (len > mtu)
782 len = mtu;
783 /* IF: we are not sending upto and including the packet end
784 then align the next start on an eight byte boundary */
785 if (len < left) {
786 len &= ~7;
789 * Allocate buffer.
792 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
793 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
794 IP6_INC_STATS(ip6_dst_idev(skb->dst),
795 IPSTATS_MIB_FRAGFAILS);
796 err = -ENOMEM;
797 goto fail;
801 * Set up data on packet
804 ip6_copy_metadata(frag, skb);
805 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
806 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
807 skb_reset_network_header(frag);
808 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
809 frag->transport_header = (frag->network_header + hlen +
810 sizeof(struct frag_hdr));
813 * Charge the memory for the fragment to any owner
814 * it might possess
816 if (skb->sk)
817 skb_set_owner_w(frag, skb->sk);
820 * Copy the packet header into the new buffer.
822 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
825 * Build fragment header.
827 fh->nexthdr = nexthdr;
828 fh->reserved = 0;
829 if (!frag_id) {
830 ipv6_select_ident(skb, fh);
831 frag_id = fh->identification;
832 } else
833 fh->identification = frag_id;
836 * Copy a block of the IP datagram.
838 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
839 BUG();
840 left -= len;
842 fh->frag_off = htons(offset);
843 if (left > 0)
844 fh->frag_off |= htons(IP6_MF);
845 ipv6_hdr(frag)->payload_len = htons(frag->len -
846 sizeof(struct ipv6hdr));
848 ptr += len;
849 offset += len;
852 * Put this fragment into the sending queue.
854 err = output(frag);
855 if (err)
856 goto fail;
858 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
860 IP6_INC_STATS(ip6_dst_idev(skb->dst),
861 IPSTATS_MIB_FRAGOKS);
862 kfree_skb(skb);
863 return err;
865 fail:
866 IP6_INC_STATS(ip6_dst_idev(skb->dst),
867 IPSTATS_MIB_FRAGFAILS);
868 kfree_skb(skb);
869 return err;
872 static inline int ip6_rt_check(struct rt6key *rt_key,
873 struct in6_addr *fl_addr,
874 struct in6_addr *addr_cache)
876 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
880 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 struct dst_entry *dst,
882 struct flowi *fl)
884 struct ipv6_pinfo *np = inet6_sk(sk);
885 struct rt6_info *rt = (struct rt6_info *)dst;
887 if (!dst)
888 goto out;
890 /* Yes, checking route validity in not connected
891 * case is not very simple. Take into account,
892 * that we do not support routing by source, TOS,
893 * and MSG_DONTROUTE --ANK (980726)
895 * 1. ip6_rt_check(): If route was host route,
896 * check that cached destination is current.
897 * If it is network route, we still may
898 * check its validity using saved pointer
899 * to the last used address: daddr_cache.
900 * We do not want to save whole address now,
901 * (because main consumer of this service
902 * is tcp, which has not this problem),
903 * so that the last trick works only on connected
904 * sockets.
905 * 2. oif also should be the same.
907 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
908 #ifdef CONFIG_IPV6_SUBTREES
909 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
910 #endif
911 (fl->oif && fl->oif != dst->dev->ifindex)) {
912 dst_release(dst);
913 dst = NULL;
916 out:
917 return dst;
920 static int ip6_dst_lookup_tail(struct sock *sk,
921 struct dst_entry **dst, struct flowi *fl)
923 int err;
924 struct net *net = sock_net(sk);
926 if (*dst == NULL)
927 *dst = ip6_route_output(net, sk, fl);
929 if ((err = (*dst)->error))
930 goto out_err_release;
932 if (ipv6_addr_any(&fl->fl6_src)) {
933 err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
934 &fl->fl6_dst,
935 sk ? inet6_sk(sk)->srcprefs : 0,
936 &fl->fl6_src);
937 if (err)
938 goto out_err_release;
941 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
943 * Here if the dst entry we've looked up
944 * has a neighbour entry that is in the INCOMPLETE
945 * state and the src address from the flow is
946 * marked as OPTIMISTIC, we release the found
947 * dst entry and replace it instead with the
948 * dst entry of the nexthop router
950 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
951 struct inet6_ifaddr *ifp;
952 struct flowi fl_gw;
953 int redirect;
955 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
956 (*dst)->dev, 1);
958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959 if (ifp)
960 in6_ifa_put(ifp);
962 if (redirect) {
964 * We need to get the dst entry for the
965 * default router instead
967 dst_release(*dst);
968 memcpy(&fl_gw, fl, sizeof(struct flowi));
969 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
970 *dst = ip6_route_output(net, sk, &fl_gw);
971 if ((err = (*dst)->error))
972 goto out_err_release;
975 #endif
977 return 0;
979 out_err_release:
980 if (err == -ENETUNREACH)
981 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
982 dst_release(*dst);
983 *dst = NULL;
984 return err;
988 * ip6_dst_lookup - perform route lookup on flow
989 * @sk: socket which provides route info
990 * @dst: pointer to dst_entry * for result
991 * @fl: flow to lookup
993 * This function performs a route lookup on the given flow.
995 * It returns zero on success, or a standard errno code on error.
997 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
999 *dst = NULL;
1000 return ip6_dst_lookup_tail(sk, dst, fl);
1002 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1005 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1006 * @sk: socket which provides the dst cache and route info
1007 * @dst: pointer to dst_entry * for result
1008 * @fl: flow to lookup
1010 * This function performs a route lookup on the given flow with the
1011 * possibility of using the cached route in the socket if it is valid.
1012 * It will take the socket dst lock when operating on the dst cache.
1013 * As a result, this function can only be used in process context.
1015 * It returns zero on success, or a standard errno code on error.
1017 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1019 *dst = NULL;
1020 if (sk) {
1021 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1022 *dst = ip6_sk_dst_check(sk, *dst, fl);
1025 return ip6_dst_lookup_tail(sk, dst, fl);
1027 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1029 static inline int ip6_ufo_append_data(struct sock *sk,
1030 int getfrag(void *from, char *to, int offset, int len,
1031 int odd, struct sk_buff *skb),
1032 void *from, int length, int hh_len, int fragheaderlen,
1033 int transhdrlen, int mtu,unsigned int flags)
1036 struct sk_buff *skb;
1037 int err;
1039 /* There is support for UDP large send offload by network
1040 * device, so create one single skb packet containing complete
1041 * udp datagram
1043 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1044 skb = sock_alloc_send_skb(sk,
1045 hh_len + fragheaderlen + transhdrlen + 20,
1046 (flags & MSG_DONTWAIT), &err);
1047 if (skb == NULL)
1048 return -ENOMEM;
1050 /* reserve space for Hardware header */
1051 skb_reserve(skb, hh_len);
1053 /* create space for UDP/IP header */
1054 skb_put(skb,fragheaderlen + transhdrlen);
1056 /* initialize network header pointer */
1057 skb_reset_network_header(skb);
1059 /* initialize protocol header pointer */
1060 skb->transport_header = skb->network_header + fragheaderlen;
1062 skb->ip_summed = CHECKSUM_PARTIAL;
1063 skb->csum = 0;
1064 sk->sk_sndmsg_off = 0;
1067 err = skb_append_datato_frags(sk,skb, getfrag, from,
1068 (length - transhdrlen));
1069 if (!err) {
1070 struct frag_hdr fhdr;
1072 /* specify the length of each IP datagram fragment*/
1073 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1074 sizeof(struct frag_hdr);
1075 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1076 ipv6_select_ident(skb, &fhdr);
1077 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1078 __skb_queue_tail(&sk->sk_write_queue, skb);
1080 return 0;
1082 /* There is not enough support do UPD LSO,
1083 * so follow normal path
1085 kfree_skb(skb);
1087 return err;
1090 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1091 int offset, int len, int odd, struct sk_buff *skb),
1092 void *from, int length, int transhdrlen,
1093 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1094 struct rt6_info *rt, unsigned int flags)
1096 struct inet_sock *inet = inet_sk(sk);
1097 struct ipv6_pinfo *np = inet6_sk(sk);
1098 struct sk_buff *skb;
1099 unsigned int maxfraglen, fragheaderlen;
1100 int exthdrlen;
1101 int hh_len;
1102 int mtu;
1103 int copy;
1104 int err;
1105 int offset = 0;
1106 int csummode = CHECKSUM_NONE;
1108 if (flags&MSG_PROBE)
1109 return 0;
1110 if (skb_queue_empty(&sk->sk_write_queue)) {
1112 * setup for corking
1114 if (opt) {
1115 if (np->cork.opt == NULL) {
1116 np->cork.opt = kmalloc(opt->tot_len,
1117 sk->sk_allocation);
1118 if (unlikely(np->cork.opt == NULL))
1119 return -ENOBUFS;
1120 } else if (np->cork.opt->tot_len < opt->tot_len) {
1121 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1122 return -EINVAL;
1124 memcpy(np->cork.opt, opt, opt->tot_len);
1125 inet->cork.flags |= IPCORK_OPT;
1126 /* need source address above miyazawa*/
1128 dst_hold(&rt->u.dst);
1129 inet->cork.dst = &rt->u.dst;
1130 inet->cork.fl = *fl;
1131 np->cork.hop_limit = hlimit;
1132 np->cork.tclass = tclass;
1133 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1134 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1135 if (np->frag_size < mtu) {
1136 if (np->frag_size)
1137 mtu = np->frag_size;
1139 inet->cork.fragsize = mtu;
1140 if (dst_allfrag(rt->u.dst.path))
1141 inet->cork.flags |= IPCORK_ALLFRAG;
1142 inet->cork.length = 0;
1143 sk->sk_sndmsg_page = NULL;
1144 sk->sk_sndmsg_off = 0;
1145 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1146 rt->rt6i_nfheader_len;
1147 length += exthdrlen;
1148 transhdrlen += exthdrlen;
1149 } else {
1150 rt = (struct rt6_info *)inet->cork.dst;
1151 fl = &inet->cork.fl;
1152 if (inet->cork.flags & IPCORK_OPT)
1153 opt = np->cork.opt;
1154 transhdrlen = 0;
1155 exthdrlen = 0;
1156 mtu = inet->cork.fragsize;
1159 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1161 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1162 (opt ? opt->opt_nflen : 0);
1163 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1165 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1166 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1167 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1168 return -EMSGSIZE;
1173 * Let's try using as much space as possible.
1174 * Use MTU if total length of the message fits into the MTU.
1175 * Otherwise, we need to reserve fragment header and
1176 * fragment alignment (= 8-15 octects, in total).
1178 * Note that we may need to "move" the data from the tail of
1179 * of the buffer to the new fragment when we split
1180 * the message.
1182 * FIXME: It may be fragmented into multiple chunks
1183 * at once if non-fragmentable extension headers
1184 * are too large.
1185 * --yoshfuji
1188 inet->cork.length += length;
1189 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1190 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1192 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1193 fragheaderlen, transhdrlen, mtu,
1194 flags);
1195 if (err)
1196 goto error;
1197 return 0;
1200 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1201 goto alloc_new_skb;
1203 while (length > 0) {
1204 /* Check if the remaining data fits into current packet. */
1205 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1206 if (copy < length)
1207 copy = maxfraglen - skb->len;
1209 if (copy <= 0) {
1210 char *data;
1211 unsigned int datalen;
1212 unsigned int fraglen;
1213 unsigned int fraggap;
1214 unsigned int alloclen;
1215 struct sk_buff *skb_prev;
1216 alloc_new_skb:
1217 skb_prev = skb;
1219 /* There's no room in the current skb */
1220 if (skb_prev)
1221 fraggap = skb_prev->len - maxfraglen;
1222 else
1223 fraggap = 0;
1226 * If remaining data exceeds the mtu,
1227 * we know we need more fragment(s).
1229 datalen = length + fraggap;
1230 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1231 datalen = maxfraglen - fragheaderlen;
1233 fraglen = datalen + fragheaderlen;
1234 if ((flags & MSG_MORE) &&
1235 !(rt->u.dst.dev->features&NETIF_F_SG))
1236 alloclen = mtu;
1237 else
1238 alloclen = datalen + fragheaderlen;
1241 * The last fragment gets additional space at tail.
1242 * Note: we overallocate on fragments with MSG_MODE
1243 * because we have no idea if we're the last one.
1245 if (datalen == length + fraggap)
1246 alloclen += rt->u.dst.trailer_len;
1249 * We just reserve space for fragment header.
1250 * Note: this may be overallocation if the message
1251 * (without MSG_MORE) fits into the MTU.
1253 alloclen += sizeof(struct frag_hdr);
1255 if (transhdrlen) {
1256 skb = sock_alloc_send_skb(sk,
1257 alloclen + hh_len,
1258 (flags & MSG_DONTWAIT), &err);
1259 } else {
1260 skb = NULL;
1261 if (atomic_read(&sk->sk_wmem_alloc) <=
1262 2 * sk->sk_sndbuf)
1263 skb = sock_wmalloc(sk,
1264 alloclen + hh_len, 1,
1265 sk->sk_allocation);
1266 if (unlikely(skb == NULL))
1267 err = -ENOBUFS;
1269 if (skb == NULL)
1270 goto error;
1272 * Fill in the control structures
1274 skb->ip_summed = csummode;
1275 skb->csum = 0;
1276 /* reserve for fragmentation */
1277 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1280 * Find where to start putting bytes
1282 data = skb_put(skb, fraglen);
1283 skb_set_network_header(skb, exthdrlen);
1284 data += fragheaderlen;
1285 skb->transport_header = (skb->network_header +
1286 fragheaderlen);
1287 if (fraggap) {
1288 skb->csum = skb_copy_and_csum_bits(
1289 skb_prev, maxfraglen,
1290 data + transhdrlen, fraggap, 0);
1291 skb_prev->csum = csum_sub(skb_prev->csum,
1292 skb->csum);
1293 data += fraggap;
1294 pskb_trim_unique(skb_prev, maxfraglen);
1296 copy = datalen - transhdrlen - fraggap;
1297 if (copy < 0) {
1298 err = -EINVAL;
1299 kfree_skb(skb);
1300 goto error;
1301 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1302 err = -EFAULT;
1303 kfree_skb(skb);
1304 goto error;
1307 offset += copy;
1308 length -= datalen - fraggap;
1309 transhdrlen = 0;
1310 exthdrlen = 0;
1311 csummode = CHECKSUM_NONE;
1314 * Put the packet on the pending queue
1316 __skb_queue_tail(&sk->sk_write_queue, skb);
1317 continue;
1320 if (copy > length)
1321 copy = length;
1323 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1324 unsigned int off;
1326 off = skb->len;
1327 if (getfrag(from, skb_put(skb, copy),
1328 offset, copy, off, skb) < 0) {
1329 __skb_trim(skb, off);
1330 err = -EFAULT;
1331 goto error;
1333 } else {
1334 int i = skb_shinfo(skb)->nr_frags;
1335 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1336 struct page *page = sk->sk_sndmsg_page;
1337 int off = sk->sk_sndmsg_off;
1338 unsigned int left;
1340 if (page && (left = PAGE_SIZE - off) > 0) {
1341 if (copy >= left)
1342 copy = left;
1343 if (page != frag->page) {
1344 if (i == MAX_SKB_FRAGS) {
1345 err = -EMSGSIZE;
1346 goto error;
1348 get_page(page);
1349 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1350 frag = &skb_shinfo(skb)->frags[i];
1352 } else if(i < MAX_SKB_FRAGS) {
1353 if (copy > PAGE_SIZE)
1354 copy = PAGE_SIZE;
1355 page = alloc_pages(sk->sk_allocation, 0);
1356 if (page == NULL) {
1357 err = -ENOMEM;
1358 goto error;
1360 sk->sk_sndmsg_page = page;
1361 sk->sk_sndmsg_off = 0;
1363 skb_fill_page_desc(skb, i, page, 0, 0);
1364 frag = &skb_shinfo(skb)->frags[i];
1365 } else {
1366 err = -EMSGSIZE;
1367 goto error;
1369 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1370 err = -EFAULT;
1371 goto error;
1373 sk->sk_sndmsg_off += copy;
1374 frag->size += copy;
1375 skb->len += copy;
1376 skb->data_len += copy;
1377 skb->truesize += copy;
1378 atomic_add(copy, &sk->sk_wmem_alloc);
1380 offset += copy;
1381 length -= copy;
1383 return 0;
1384 error:
1385 inet->cork.length -= length;
1386 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1387 return err;
1390 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1392 inet->cork.flags &= ~IPCORK_OPT;
1393 kfree(np->cork.opt);
1394 np->cork.opt = NULL;
1395 if (inet->cork.dst) {
1396 dst_release(inet->cork.dst);
1397 inet->cork.dst = NULL;
1398 inet->cork.flags &= ~IPCORK_ALLFRAG;
1400 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1403 int ip6_push_pending_frames(struct sock *sk)
1405 struct sk_buff *skb, *tmp_skb;
1406 struct sk_buff **tail_skb;
1407 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1408 struct inet_sock *inet = inet_sk(sk);
1409 struct ipv6_pinfo *np = inet6_sk(sk);
1410 struct ipv6hdr *hdr;
1411 struct ipv6_txoptions *opt = np->cork.opt;
1412 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1413 struct flowi *fl = &inet->cork.fl;
1414 unsigned char proto = fl->proto;
1415 int err = 0;
1417 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1418 goto out;
1419 tail_skb = &(skb_shinfo(skb)->frag_list);
1421 /* move skb->data to ip header from ext header */
1422 if (skb->data < skb_network_header(skb))
1423 __skb_pull(skb, skb_network_offset(skb));
1424 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1425 __skb_pull(tmp_skb, skb_network_header_len(skb));
1426 *tail_skb = tmp_skb;
1427 tail_skb = &(tmp_skb->next);
1428 skb->len += tmp_skb->len;
1429 skb->data_len += tmp_skb->len;
1430 skb->truesize += tmp_skb->truesize;
1431 __sock_put(tmp_skb->sk);
1432 tmp_skb->destructor = NULL;
1433 tmp_skb->sk = NULL;
1436 /* Allow local fragmentation. */
1437 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1438 skb->local_df = 1;
1440 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1441 __skb_pull(skb, skb_network_header_len(skb));
1442 if (opt && opt->opt_flen)
1443 ipv6_push_frag_opts(skb, opt, &proto);
1444 if (opt && opt->opt_nflen)
1445 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1447 skb_push(skb, sizeof(struct ipv6hdr));
1448 skb_reset_network_header(skb);
1449 hdr = ipv6_hdr(skb);
1451 *(__be32*)hdr = fl->fl6_flowlabel |
1452 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1454 hdr->hop_limit = np->cork.hop_limit;
1455 hdr->nexthdr = proto;
1456 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1457 ipv6_addr_copy(&hdr->daddr, final_dst);
1459 skb->priority = sk->sk_priority;
1460 skb->mark = sk->sk_mark;
1462 skb->dst = dst_clone(&rt->u.dst);
1463 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1464 if (proto == IPPROTO_ICMPV6) {
1465 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1467 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1468 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1471 err = ip6_local_out(skb);
1472 if (err) {
1473 if (err > 0)
1474 err = np->recverr ? net_xmit_errno(err) : 0;
1475 if (err)
1476 goto error;
1479 out:
1480 ip6_cork_release(inet, np);
1481 return err;
1482 error:
1483 goto out;
1486 void ip6_flush_pending_frames(struct sock *sk)
1488 struct sk_buff *skb;
1490 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1491 if (skb->dst)
1492 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1493 IPSTATS_MIB_OUTDISCARDS);
1494 kfree_skb(skb);
1497 ip6_cork_release(inet_sk(sk), inet6_sk(sk));