release/src-rt/linux/linux-2.6/net/ipv6/ip6_output.c

   1 /*
   2  *      IPv6 output functions
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *      Based on linux/net/ipv4/ip_output.c
  11  *
  12  *      This program is free software; you can redistribute it and/or
  13  *      modify it under the terms of the GNU General Public License
  14  *      as published by the Free Software Foundation; either version
  15  *      2 of the License, or (at your option) any later version.
  16  *
  17  *      Changes:
  18  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  19  *                              extension headers are implemented.
  20  *                              route changes now work.
  21  *                              ip6_forward does not confuse sniffers.
  22  *                              etc.
  23  *
  24  *      H. von Brand    :       Added missing #include <linux/string.h>
  25  *      Imran Patel     :       frag id should be in NBO
  26  *      Kazunori MIYAZAWA @USAGI
  27  *                      :       add ip6_append_data and related functions
  28  *                              for datagram xmit
  29  */
  30
  31 #include <linux/errno.h>
  32 #include <linux/types.h>
  33 #include <linux/string.h>
  34 #include <linux/socket.h>
  35 #include <linux/net.h>
  36 #include <linux/netdevice.h>
  37 #include <linux/if_arp.h>
  38 #include <linux/in6.h>
  39 #include <linux/tcp.h>
  40 #include <linux/route.h>
  41 #include <linux/module.h>
  42
  43 #include <linux/netfilter.h>
  44 #include <linux/netfilter_ipv6.h>
  45
  46 #include <net/sock.h>
  47 #include <net/snmp.h>
  48
  49 #include <net/ipv6.h>
  50 #include <net/ndisc.h>
  51 #include <net/protocol.h>
  52 #include <net/ip6_route.h>
  53 #include <net/addrconf.h>
  54 #include <net/rawv6.h>
  55 #include <net/icmp.h>
  56 #include <net/xfrm.h>
  57 #include <net/checksum.h>
  58 #include <linux/mroute6.h>
  59
  60 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  61
  62 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
  63 {
  64         static u32 ipv6_fragmentation_id = 1;
  65         static DEFINE_SPINLOCK(ip6_id_lock);
  66
  67         spin_lock_bh(&ip6_id_lock);
  68         fhdr->identification = htonl(ipv6_fragmentation_id);
  69         if (++ipv6_fragmentation_id == 0)
  70                 ipv6_fragmentation_id = 1;
  71         spin_unlock_bh(&ip6_id_lock);
  72 }
  73
  74 static inline int ip6_output_finish(struct sk_buff *skb)
  75 {
  76         struct dst_entry *dst = skb->dst;
  77
  78         if (dst->hh)
  79                 return neigh_hh_output(dst->hh, skb);
  80         else if (dst->neighbour)
  81                 return dst->neighbour->output(skb);
  82
  83         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
  84         kfree_skb(skb);
  85         return -EINVAL;
  86
  87 }
  88
  89 /* dev_loopback_xmit for use with netfilter. */
  90 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
  91 {
  92         skb_reset_mac_header(newskb);
  93         __skb_pull(newskb, skb_network_offset(newskb));
  94         newskb->pkt_type = PACKET_LOOPBACK;
  95         newskb->ip_summed = CHECKSUM_UNNECESSARY;
  96         BUG_TRAP(newskb->dst);
  97
  98         netif_rx(newskb);
  99         return 0;
 100 }
 101
 102
 103 static int ip6_output2(struct sk_buff *skb)
 104 {
 105         struct dst_entry *dst = skb->dst;
 106         struct net_device *dev = dst->dev;
 107
 108         skb->protocol = htons(ETH_P_IPV6);
 109         skb->dev = dev;
 110
 111         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 112                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 113                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 114
 115                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
 116                     ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 117                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 118                                          &ipv6_hdr(skb)->saddr))) {
 119                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 120
 121                         /* Do not check for IFF_ALLMULTI; multicast routing
 122                            is not supported in any case.
 123                          */
 124                         if (newskb)
 125                                 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
 126                                         newskb->dev,
 127                                         ip6_dev_loopback_xmit);
 128
 129                         if (ipv6_hdr(skb)->hop_limit == 0) {
 130                                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
 131                                 kfree_skb(skb);
 132                                 return 0;
 133                         }
 134                 }
 135
 136                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
 137         }
 138
 139         return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
 140 }
 141
 142 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 143 {
 144         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 145
 146         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
 147                skb->dst->dev->mtu : dst_mtu(skb->dst);
 148 }
 149
 150 int ip6_output(struct sk_buff *skb)
 151 {
 152         struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 153         if (unlikely(idev->cnf.disable_ipv6)) {
 154                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
 155                 kfree_skb(skb);
 156                 return 0;
 157         }
 158
 159         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 160                                 dst_allfrag(skb->dst))
 161                 return ip6_fragment(skb, ip6_output2);
 162         else
 163                 return ip6_output2(skb);
 164 }
 165
 166 /*
 167  *      xmit an sk_buff (used by TCP)
 168  */
 169
 170 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 171              struct ipv6_txoptions *opt, int ipfragok)
 172 {
 173         struct ipv6_pinfo *np = inet6_sk(sk);
 174         struct in6_addr *first_hop = &fl->fl6_dst;
 175         struct dst_entry *dst = skb->dst;
 176         struct ipv6hdr *hdr;
 177         u8  proto = fl->proto;
 178         int seg_len = skb->len;
 179         int hlimit, tclass;
 180         u32 mtu;
 181
 182         if (opt) {
 183                 int head_room;
 184
 185                 /* First: exthdrs may take lots of space (~8K for now)
 186                    MAX_HEADER is not enough.
 187                  */
 188                 head_room = opt->opt_nflen + opt->opt_flen;
 189                 seg_len += head_room;
 190                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 191
 192                 if (skb_headroom(skb) < head_room) {
 193                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 194                         if (skb2 == NULL) {
 195                                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
 196                                               IPSTATS_MIB_OUTDISCARDS);
 197                                 kfree_skb(skb);
 198                                 return -ENOBUFS;
 199                         }
 200                         kfree_skb(skb);
 201                         skb = skb2;
 202                         if (sk)
 203                                 skb_set_owner_w(skb, sk);
 204                 }
 205                 if (opt->opt_flen)
 206                         ipv6_push_frag_opts(skb, opt, &proto);
 207                 if (opt->opt_nflen)
 208                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 209         }
 210
 211         skb_push(skb, sizeof(struct ipv6hdr));
 212         skb_reset_network_header(skb);
 213         hdr = ipv6_hdr(skb);
 214
 215         /*
 216          *      Fill in the IPv6 header
 217          */
 218
 219         hlimit = -1;
 220         if (np)
 221                 hlimit = np->hop_limit;
 222         if (hlimit < 0)
 223                 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
 224         if (hlimit < 0)
 225                 hlimit = ipv6_get_hoplimit(dst->dev);
 226
 227         tclass = -1;
 228         if (np)
 229                 tclass = np->tclass;
 230         if (tclass < 0)
 231                 tclass = 0;
 232
 233         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
 234
 235         hdr->payload_len = htons(seg_len);
 236         hdr->nexthdr = proto;
 237         hdr->hop_limit = hlimit;
 238
 239         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 240         ipv6_addr_copy(&hdr->daddr, first_hop);
 241
 242         skb->priority = sk->sk_priority;
 243
 244         mtu = dst_mtu(dst);
 245         if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
 246                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
 247                               IPSTATS_MIB_OUTREQUESTS);
 248                 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
 249                                 dst_output);
 250         }
 251
 252         if (net_ratelimit())
 253                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 254         skb->dev = dst->dev;
 255         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 256         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 257         kfree_skb(skb);
 258         return -EMSGSIZE;
 259 }
 260
 261 EXPORT_SYMBOL(ip6_xmit);
 262
 263 /*
 264  *      To avoid extra problems ND packets are send through this
 265  *      routine. It's code duplication but I really want to avoid
 266  *      extra checks since ipv6_build_header is used by TCP (which
 267  *      is for us performance critical)
 268  */
 269
 270 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 271                struct in6_addr *saddr, struct in6_addr *daddr,
 272                int proto, int len)
 273 {
 274         struct ipv6_pinfo *np = inet6_sk(sk);
 275         struct ipv6hdr *hdr;
 276         int totlen;
 277
 278         skb->protocol = htons(ETH_P_IPV6);
 279         skb->dev = dev;
 280
 281         totlen = len + sizeof(struct ipv6hdr);
 282
 283         skb_reset_network_header(skb);
 284         skb_put(skb, sizeof(struct ipv6hdr));
 285         hdr = ipv6_hdr(skb);
 286
 287         *(__be32*)hdr = htonl(0x60000000);
 288
 289         hdr->payload_len = htons(len);
 290         hdr->nexthdr = proto;
 291         hdr->hop_limit = np->hop_limit;
 292
 293         ipv6_addr_copy(&hdr->saddr, saddr);
 294         ipv6_addr_copy(&hdr->daddr, daddr);
 295
 296         return 0;
 297 }
 298
 299 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 300 {
 301         struct ip6_ra_chain *ra;
 302         struct sock *last = NULL;
 303
 304         read_lock(&ip6_ra_lock);
 305         for (ra = ip6_ra_chain; ra; ra = ra->next) {
 306                 struct sock *sk = ra->sk;
 307                 if (sk && ra->sel == sel &&
 308                     (!sk->sk_bound_dev_if ||
 309                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
 310                         if (last) {
 311                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 312                                 if (skb2)
 313                                         rawv6_rcv(last, skb2);
 314                         }
 315                         last = sk;
 316                 }
 317         }
 318
 319         if (last) {
 320                 rawv6_rcv(last, skb);
 321                 read_unlock(&ip6_ra_lock);
 322                 return 1;
 323         }
 324         read_unlock(&ip6_ra_lock);
 325         return 0;
 326 }
 327
 328 static int ip6_forward_proxy_check(struct sk_buff *skb)
 329 {
 330         struct ipv6hdr *hdr = ipv6_hdr(skb);
 331         u8 nexthdr = hdr->nexthdr;
 332         int offset;
 333
 334         if (ipv6_ext_hdr(nexthdr)) {
 335                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
 336                 if (offset < 0)
 337                         return 0;
 338         } else
 339                 offset = sizeof(struct ipv6hdr);
 340
 341         if (nexthdr == IPPROTO_ICMPV6) {
 342                 struct icmp6hdr *icmp6;
 343
 344                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
 345                                          offset + 1 - skb->data)))
 346                         return 0;
 347
 348                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 349
 350                 switch (icmp6->icmp6_type) {
 351                 case NDISC_ROUTER_SOLICITATION:
 352                 case NDISC_ROUTER_ADVERTISEMENT:
 353                 case NDISC_NEIGHBOUR_SOLICITATION:
 354                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
 355                 case NDISC_REDIRECT:
 356                         /* For reaction involving unicast neighbor discovery
 357                          * message destined to the proxied address, pass it to
 358                          * input function.
 359                          */
 360                         return 1;
 361                 default:
 362                         break;
 363                 }
 364         }
 365
 366         /*
 367          * The proxying router can't forward traffic sent to a link-local
 368          * address, so signal the sender and discard the packet. This
 369          * behavior is clarified by the MIPv6 specification.
 370          */
 371         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 372                 dst_link_failure(skb);
 373                 return -1;
 374         }
 375
 376         return 0;
 377 }
 378
 379 static inline int ip6_forward_finish(struct sk_buff *skb)
 380 {
 381         return dst_output(skb);
 382 }
 383
 384 int ip6_forward(struct sk_buff *skb)
 385 {
 386         struct dst_entry *dst = skb->dst;
 387         struct ipv6hdr *hdr = ipv6_hdr(skb);
 388         struct inet6_skb_parm *opt = IP6CB(skb);
 389
 390         if (ipv6_devconf.forwarding == 0)
 391                 goto error;
 392
 393         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 394                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 395                 goto drop;
 396         }
 397
 398         skb_forward_csum(skb);
 399
 400         /*
 401          *      We DO NOT make any processing on
 402          *      RA packets, pushing them to user level AS IS
 403          *      without ane WARRANTY that application will be able
 404          *      to interpret them. The reason is that we
 405          *      cannot make anything clever here.
 406          *
 407          *      We are not end-node, so that if packet contains
 408          *      AH/ESP, we cannot make anything.
 409          *      Defragmentation also would be mistake, RA packets
 410          *      cannot be fragmented, because there is no warranty
 411          *      that different fragments will go along one path. --ANK
 412          */
 413         if (opt->ra) {
 414                 u8 *ptr = skb_network_header(skb) + opt->ra;
 415                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 416                         return 0;
 417         }
 418
 419         /*
 420          *      check and decrement ttl
 421          */
 422         if (hdr->hop_limit <= 1) {
 423                 /* Force OUTPUT device used as source address */
 424                 skb->dev = dst->dev;
 425                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 426                             0, skb->dev);
 427                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 428
 429                 kfree_skb(skb);
 430                 return -ETIMEDOUT;
 431         }
 432
 433         /* XXX: idev->cnf.proxy_ndp? */
 434         if (ipv6_devconf.proxy_ndp &&
 435             pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
 436                 int proxied = ip6_forward_proxy_check(skb);
 437                 if (proxied > 0)
 438                         return ip6_input(skb);
 439                 else if (proxied < 0) {
 440                         IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 441                         goto drop;
 442                 }
 443         }
 444
 445         if (!xfrm6_route_forward(skb)) {
 446                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 447                 goto drop;
 448         }
 449         dst = skb->dst;
 450
 451         /* IPv6 specs say nothing about it, but it is clear that we cannot
 452            send redirects to source routed frames.
 453            We don't send redirects to frames decapsulated from IPsec.
 454          */
 455         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
 456             !skb->sp) {
 457                 struct in6_addr *target = NULL;
 458                 struct rt6_info *rt;
 459                 struct neighbour *n = dst->neighbour;
 460
 461                 /*
 462                  *      incoming and outgoing devices are the same
 463                  *      send a redirect.
 464                  */
 465
 466                 rt = (struct rt6_info *) dst;
 467                 if ((rt->rt6i_flags & RTF_GATEWAY))
 468                         target = (struct in6_addr*)&n->primary_key;
 469                 else
 470                         target = &hdr->daddr;
 471
 472                 /* Limit redirects both by destination (here)
 473                    and by source (inside ndisc_send_redirect)
 474                  */
 475                 if (xrlim_allow(dst, 1*HZ))
 476                         ndisc_send_redirect(skb, n, target);
 477         } else {
 478                 int addrtype = ipv6_addr_type(&hdr->saddr);
 479
 480                 /* This check is security critical. */
 481                 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
 482                         goto error;
 483                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
 484                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 485                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
 486                         goto error;
 487                 }
 488         }
 489
 490         if (skb->len > dst_mtu(dst) && !skb_is_gso(skb)) {
 491                 /* Again, force OUTPUT device used as source address */
 492                 skb->dev = dst->dev;
 493                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
 494                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 495                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 496                 kfree_skb(skb);
 497                 return -EMSGSIZE;
 498         }
 499
 500         if (skb_cow(skb, dst->dev->hard_header_len)) {
 501                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 502                 goto drop;
 503         }
 504
 505         hdr = ipv6_hdr(skb);
 506
 507         /* Mangling hops number delayed to point after skb COW */
 508
 509         hdr->hop_limit--;
 510
 511         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 512         return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
 513
 514 error:
 515         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 516 drop:
 517         kfree_skb(skb);
 518         return -EINVAL;
 519 }
 520
 521 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 522 {
 523         to->pkt_type = from->pkt_type;
 524         to->priority = from->priority;
 525         to->protocol = from->protocol;
 526         dst_release(to->dst);
 527         to->dst = dst_clone(from->dst);
 528         to->dev = from->dev;
 529         to->mark = from->mark;
 530
 531 #ifdef CONFIG_NET_SCHED
 532         to->tc_index = from->tc_index;
 533 #endif
 534         nf_copy(to, from);
 535         skb_copy_secmark(to, from);
 536 }
 537
 538 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 539 {
 540         u16 offset = sizeof(struct ipv6hdr);
 541         struct ipv6_opt_hdr *exthdr =
 542                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 543         unsigned int packet_len = skb->tail - skb->network_header;
 544         int found_rhdr = 0;
 545         *nexthdr = &ipv6_hdr(skb)->nexthdr;
 546
 547         while (offset + 1 <= packet_len) {
 548
 549                 switch (**nexthdr) {
 550
 551                 case NEXTHDR_HOP:
 552                         break;
 553                 case NEXTHDR_ROUTING:
 554                         found_rhdr = 1;
 555                         break;
 556                 case NEXTHDR_DEST:
 557 #ifdef CONFIG_IPV6_MIP6
 558                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 559                                 break;
 560 #endif
 561                         if (found_rhdr)
 562                                 return offset;
 563                         break;
 564                 default :
 565                         return offset;
 566                 }
 567
 568                 offset += ipv6_optlen(exthdr);
 569                 *nexthdr = &exthdr->nexthdr;
 570                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 571                                                  offset);
 572         }
 573
 574         return offset;
 575 }
 576 EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
 577
 578 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 579 {
 580         struct net_device *dev;
 581         struct sk_buff *frag;
 582         struct rt6_info *rt = (struct rt6_info*)skb->dst;
 583         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 584         struct ipv6hdr *tmp_hdr;
 585         struct frag_hdr *fh;
 586         unsigned int mtu, hlen, left, len;
 587         __be32 frag_id = 0;
 588         int ptr, offset = 0, err=0;
 589         u8 *prevhdr, nexthdr = 0;
 590
 591         dev = rt->u.dst.dev;
 592         hlen = ip6_find_1stfragopt(skb, &prevhdr);
 593         nexthdr = *prevhdr;
 594
 595         mtu = ip6_skb_dst_mtu(skb);
 596
 597         /* We must not fragment if the socket is set to force MTU discovery
 598          * or if the skb it not generated by a local socket.  (This last
 599          * check should be redundant, but it's free.)
 600          */
 601         if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
 602                 skb->dev = skb->dst->dev;
 603                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 604                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 605                 kfree_skb(skb);
 606                 return -EMSGSIZE;
 607         }
 608
 609         if (np && np->frag_size < mtu) {
 610                 if (np->frag_size)
 611                         mtu = np->frag_size;
 612         }
 613         mtu -= hlen + sizeof(struct frag_hdr);
 614
 615         if (skb_shinfo(skb)->frag_list) {
 616                 int first_len = skb_pagelen(skb);
 617                 int truesizes = 0;
 618
 619                 if (first_len - hlen > mtu ||
 620                     ((first_len - hlen) & 7) ||
 621                     skb_cloned(skb))
 622                         goto slow_path;
 623
 624                 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
 625                         /* Correct geometry. */
 626                         if (frag->len > mtu ||
 627                             ((frag->len & 7) && frag->next) ||
 628                             skb_headroom(frag) < hlen)
 629                             goto slow_path;
 630
 631                         /* Partially cloned skb? */
 632                         if (skb_shared(frag))
 633                                 goto slow_path;
 634
 635                         BUG_ON(frag->sk);
 636                         if (skb->sk) {
 637                                 sock_hold(skb->sk);
 638                                 frag->sk = skb->sk;
 639                                 frag->destructor = sock_wfree;
 640                                 truesizes += frag->truesize;
 641                         }
 642                 }
 643
 644                 err = 0;
 645                 offset = 0;
 646                 frag = skb_shinfo(skb)->frag_list;
 647                 skb_shinfo(skb)->frag_list = NULL;
 648                 /* BUILD HEADER */
 649
 650                 *prevhdr = NEXTHDR_FRAGMENT;
 651                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 652                 if (!tmp_hdr) {
 653                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 654                         return -ENOMEM;
 655                 }
 656
 657                 __skb_pull(skb, hlen);
 658                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 659                 __skb_push(skb, hlen);
 660                 skb_reset_network_header(skb);
 661                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
 662
 663                 ipv6_select_ident(skb, fh);
 664                 fh->nexthdr = nexthdr;
 665                 fh->reserved = 0;
 666                 fh->frag_off = htons(IP6_MF);
 667                 frag_id = fh->identification;
 668
 669                 first_len = skb_pagelen(skb);
 670                 skb->data_len = first_len - skb_headlen(skb);
 671                 skb->truesize -= truesizes;
 672                 skb->len = first_len;
 673                 ipv6_hdr(skb)->payload_len = htons(first_len -
 674                                                    sizeof(struct ipv6hdr));
 675
 676                 dst_hold(&rt->u.dst);
 677
 678                 for (;;) {
 679                         /* Prepare header of the next frame,
 680                          * before previous one went down. */
 681                         if (frag) {
 682                                 frag->ip_summed = CHECKSUM_NONE;
 683                                 skb_reset_transport_header(frag);
 684                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 685                                 __skb_push(frag, hlen);
 686                                 skb_reset_network_header(frag);
 687                                 memcpy(skb_network_header(frag), tmp_hdr,
 688                                        hlen);
 689                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
 690                                 fh->nexthdr = nexthdr;
 691                                 fh->reserved = 0;
 692                                 fh->frag_off = htons(offset);
 693                                 if (frag->next != NULL)
 694                                         fh->frag_off |= htons(IP6_MF);
 695                                 fh->identification = frag_id;
 696                                 ipv6_hdr(frag)->payload_len =
 697                                                 htons(frag->len -
 698                                                       sizeof(struct ipv6hdr));
 699                                 ip6_copy_metadata(frag, skb);
 700                         }
 701
 702                         err = output(skb);
 703                         if(!err)
 704                                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
 705
 706                         if (err || !frag)
 707                                 break;
 708
 709                         skb = frag;
 710                         frag = skb->next;
 711                         skb->next = NULL;
 712                 }
 713
 714                 kfree(tmp_hdr);
 715
 716                 if (err == 0) {
 717                         IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
 718                         dst_release(&rt->u.dst);
 719                         return 0;
 720                 }
 721
 722                 while (frag) {
 723                         skb = frag->next;
 724                         kfree_skb(frag);
 725                         frag = skb;
 726                 }
 727
 728                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
 729                 dst_release(&rt->u.dst);
 730                 return err;
 731         }
 732
 733 slow_path:
 734         left = skb->len - hlen;         /* Space per frame */
 735         ptr = hlen;                     /* Where to start from */
 736
 737         /*
 738          *      Fragment the datagram.
 739          */
 740
 741         *prevhdr = NEXTHDR_FRAGMENT;
 742
 743         /*
 744          *      Keep copying data until we run out.
 745          */
 746         while(left > 0) {
 747                 len = left;
 748                 /* IF: it doesn't fit, use 'mtu' - the data space left */
 749                 if (len > mtu)
 750                         len = mtu;
 751                 /* IF: we are not sending upto and including the packet end
 752                    then align the next start on an eight byte boundary */
 753                 if (len < left) {
 754                         len &= ~7;
 755                 }
 756                 /*
 757                  *      Allocate buffer.
 758                  */
 759
 760                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 761                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 762                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 763                                       IPSTATS_MIB_FRAGFAILS);
 764                         err = -ENOMEM;
 765                         goto fail;
 766                 }
 767
 768                 /*
 769                  *      Set up data on packet
 770                  */
 771
 772                 ip6_copy_metadata(frag, skb);
 773                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 774                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 775                 skb_reset_network_header(frag);
 776                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 777                 frag->transport_header = (frag->network_header + hlen +
 778                                           sizeof(struct frag_hdr));
 779
 780                 /*
 781                  *      Charge the memory for the fragment to any owner
 782                  *      it might possess
 783                  */
 784                 if (skb->sk)
 785                         skb_set_owner_w(frag, skb->sk);
 786
 787                 /*
 788                  *      Copy the packet header into the new buffer.
 789                  */
 790                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 791
 792                 /*
 793                  *      Build fragment header.
 794                  */
 795                 fh->nexthdr = nexthdr;
 796                 fh->reserved = 0;
 797                 if (!frag_id) {
 798                         ipv6_select_ident(skb, fh);
 799                         frag_id = fh->identification;
 800                 } else
 801                         fh->identification = frag_id;
 802
 803                 /*
 804                  *      Copy a block of the IP datagram.
 805                  */
 806                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 807                         BUG();
 808                 left -= len;
 809
 810                 fh->frag_off = htons(offset);
 811                 if (left > 0)
 812                         fh->frag_off |= htons(IP6_MF);
 813                 ipv6_hdr(frag)->payload_len = htons(frag->len -
 814                                                     sizeof(struct ipv6hdr));
 815
 816                 ptr += len;
 817                 offset += len;
 818
 819                 /*
 820                  *      Put this fragment into the sending queue.
 821                  */
 822                 err = output(frag);
 823                 if (err)
 824                         goto fail;
 825
 826                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
 827         }
 828         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 829                       IPSTATS_MIB_FRAGOKS);
 830         kfree_skb(skb);
 831         return err;
 832
 833 fail:
 834         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 835                       IPSTATS_MIB_FRAGFAILS);
 836         kfree_skb(skb);
 837         return err;
 838 }
 839
 840 static inline int ip6_rt_check(struct rt6key *rt_key,
 841                                struct in6_addr *fl_addr,
 842                                struct in6_addr *addr_cache)
 843 {
 844         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 845                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
 846 }
 847
 848 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 849                                           struct dst_entry *dst,
 850                                           struct flowi *fl)
 851 {
 852         struct ipv6_pinfo *np = inet6_sk(sk);
 853         struct rt6_info *rt = (struct rt6_info *)dst;
 854
 855         if (!dst)
 856                 goto out;
 857
 858         /* Yes, checking route validity in not connected
 859          * case is not very simple. Take into account,
 860          * that we do not support routing by source, TOS,
 861          * and MSG_DONTROUTE            --ANK (980726)
 862          *
 863          * 1. ip6_rt_check(): If route was host route,
 864          *    check that cached destination is current.
 865          *    If it is network route, we still may
 866          *    check its validity using saved pointer
 867          *    to the last used address: daddr_cache.
 868          *    We do not want to save whole address now,
 869          *    (because main consumer of this service
 870          *    is tcp, which has not this problem),
 871          *    so that the last trick works only on connected
 872          *    sockets.
 873          * 2. oif also should be the same.
 874          */
 875         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
 876 #ifdef CONFIG_IPV6_SUBTREES
 877             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
 878 #endif
 879             (fl->oif && fl->oif != dst->dev->ifindex)) {
 880                 dst_release(dst);
 881                 dst = NULL;
 882         }
 883
 884 out:
 885         return dst;
 886 }
 887
 888 static int ip6_dst_lookup_tail(struct sock *sk,
 889                                struct dst_entry **dst, struct flowi *fl)
 890 {
 891         int err;
 892
 893         if (*dst == NULL)
 894                 *dst = ip6_route_output(sk, fl);
 895
 896         if ((err = (*dst)->error))
 897                 goto out_err_release;
 898
 899         if (ipv6_addr_any(&fl->fl6_src)) {
 900                 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
 901                 if (err)
 902                         goto out_err_release;
 903         }
 904
 905 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 906                 /*
 907                  * Here if the dst entry we've looked up
 908                  * has a neighbour entry that is in the INCOMPLETE
 909                  * state and the src address from the flow is
 910                  * marked as OPTIMISTIC, we release the found
 911                  * dst entry and replace it instead with the
 912                  * dst entry of the nexthop router
 913                  */
 914                 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
 915                         struct inet6_ifaddr *ifp;
 916                         struct flowi fl_gw;
 917                         int redirect;
 918
 919                         ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
 920
 921                         redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 922                         if (ifp)
 923                                 in6_ifa_put(ifp);
 924
 925                         if (redirect) {
 926                                 /*
 927                                  * We need to get the dst entry for the
 928                                  * default router instead
 929                                  */
 930                                 dst_release(*dst);
 931                                 memcpy(&fl_gw, fl, sizeof(struct flowi));
 932                                 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
 933                                 *dst = ip6_route_output(sk, &fl_gw);
 934                                 if ((err = (*dst)->error))
 935                                         goto out_err_release;
 936                         }
 937                 }
 938 #endif
 939
 940         return 0;
 941
 942 out_err_release:
 943         dst_release(*dst);
 944         *dst = NULL;
 945         return err;
 946 }
 947
 948 /**
 949  *      ip6_dst_lookup - perform route lookup on flow
 950  *      @sk: socket which provides route info
 951  *      @dst: pointer to dst_entry * for result
 952  *      @fl: flow to lookup
 953  *
 954  *      This function performs a route lookup on the given flow.
 955  *
 956  *      It returns zero on success, or a standard errno code on error.
 957  */
 958 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 959 {
 960         *dst = NULL;
 961         return ip6_dst_lookup_tail(sk, dst, fl);
 962 }
 963 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
 964
 965 /**
 966  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
 967  *      @sk: socket which provides the dst cache and route info
 968  *      @dst: pointer to dst_entry * for result
 969  *      @fl: flow to lookup
 970  *
 971  *      This function performs a route lookup on the given flow with the
 972  *      possibility of using the cached route in the socket if it is valid.
 973  *      It will take the socket dst lock when operating on the dst cache.
 974  *      As a result, this function can only be used in process context.
 975  *
 976  *      It returns zero on success, or a standard errno code on error.
 977  */
 978 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 979 {
 980         *dst = NULL;
 981         if (sk) {
 982                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
 983                 *dst = ip6_sk_dst_check(sk, *dst, fl);
 984         }
 985
 986         return ip6_dst_lookup_tail(sk, dst, fl);
 987 }
 988 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
 989
 990 static inline int ip6_ufo_append_data(struct sock *sk,
 991                         int getfrag(void *from, char *to, int offset, int len,
 992                         int odd, struct sk_buff *skb),
 993                         void *from, int length, int hh_len, int fragheaderlen,
 994                         int transhdrlen, int mtu,unsigned int flags)
 995
 996 {
 997         struct sk_buff *skb;
 998         int err;
 999
1000         /* There is support for UDP large send offload by network
1001          * device, so create one single skb packet containing complete
1002          * udp datagram
1003          */
1004         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1005                 skb = sock_alloc_send_skb(sk,
1006                         hh_len + fragheaderlen + transhdrlen + 20,
1007                         (flags & MSG_DONTWAIT), &err);
1008                 if (skb == NULL)
1009                         return -ENOMEM;
1010
1011                 /* reserve space for Hardware header */
1012                 skb_reserve(skb, hh_len);
1013
1014                 /* create space for UDP/IP header */
1015                 skb_put(skb,fragheaderlen + transhdrlen);
1016
1017                 /* initialize network header pointer */
1018                 skb_reset_network_header(skb);
1019
1020                 /* initialize protocol header pointer */
1021                 skb->transport_header = skb->network_header + fragheaderlen;
1022
1023                 skb->ip_summed = CHECKSUM_PARTIAL;
1024                 skb->csum = 0;
1025                 sk->sk_sndmsg_off = 0;
1026         }
1027
1028         err = skb_append_datato_frags(sk,skb, getfrag, from,
1029                                       (length - transhdrlen));
1030         if (!err) {
1031                 struct frag_hdr fhdr;
1032
1033                 /* specify the length of each IP datagram fragment*/
1034                 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1035                                             sizeof(struct frag_hdr);
1036                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1037                 ipv6_select_ident(skb, &fhdr);
1038                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1039                 __skb_queue_tail(&sk->sk_write_queue, skb);
1040
1041                 return 0;
1042         }
1043         /* There is not enough support do UPD LSO,
1044          * so follow normal path
1045          */
1046         kfree_skb(skb);
1047
1048         return err;
1049 }
1050
1051 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1052         int offset, int len, int odd, struct sk_buff *skb),
1053         void *from, int length, int transhdrlen,
1054         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1055         struct rt6_info *rt, unsigned int flags)
1056 {
1057         struct inet_sock *inet = inet_sk(sk);
1058         struct ipv6_pinfo *np = inet6_sk(sk);
1059         struct sk_buff *skb;
1060         unsigned int maxfraglen, fragheaderlen;
1061         int exthdrlen;
1062         int hh_len;
1063         int mtu;
1064         int copy;
1065         int err;
1066         int offset = 0;
1067         int csummode = CHECKSUM_NONE;
1068
1069         if (flags&MSG_PROBE)
1070                 return 0;
1071         if (skb_queue_empty(&sk->sk_write_queue)) {
1072                 /*
1073                  * setup for corking
1074                  */
1075                 if (opt) {
1076                         if (np->cork.opt == NULL) {
1077                                 np->cork.opt = kmalloc(opt->tot_len,
1078                                                        sk->sk_allocation);
1079                                 if (unlikely(np->cork.opt == NULL))
1080                                         return -ENOBUFS;
1081                         } else if (np->cork.opt->tot_len < opt->tot_len) {
1082                                 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1083                                 return -EINVAL;
1084                         }
1085                         memcpy(np->cork.opt, opt, opt->tot_len);
1086                         inet->cork.flags |= IPCORK_OPT;
1087                         /* need source address above miyazawa*/
1088                 }
1089                 dst_hold(&rt->u.dst);
1090                 np->cork.rt = rt;
1091                 inet->cork.fl = *fl;
1092                 np->cork.hop_limit = hlimit;
1093                 np->cork.tclass = tclass;
1094                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1095                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1096                 if (np->frag_size < mtu) {
1097                         if (np->frag_size)
1098                                 mtu = np->frag_size;
1099                 }
1100                 inet->cork.fragsize = mtu;
1101                 if (dst_allfrag(rt->u.dst.path))
1102                         inet->cork.flags |= IPCORK_ALLFRAG;
1103                 inet->cork.length = 0;
1104                 sk->sk_sndmsg_page = NULL;
1105                 sk->sk_sndmsg_off = 0;
1106                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
1107                 length += exthdrlen;
1108                 transhdrlen += exthdrlen;
1109         } else {
1110                 rt = np->cork.rt;
1111                 fl = &inet->cork.fl;
1112                 if (inet->cork.flags & IPCORK_OPT)
1113                         opt = np->cork.opt;
1114                 transhdrlen = 0;
1115                 exthdrlen = 0;
1116                 mtu = inet->cork.fragsize;
1117         }
1118
1119         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1120
1121         fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
1122         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1123
1124         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1125                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1126                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1127                         return -EMSGSIZE;
1128                 }
1129         }
1130
1131         /*
1132          * Let's try using as much space as possible.
1133          * Use MTU if total length of the message fits into the MTU.
1134          * Otherwise, we need to reserve fragment header and
1135          * fragment alignment (= 8-15 octects, in total).
1136          *
1137          * Note that we may need to "move" the data from the tail of
1138          * of the buffer to the new fragment when we split
1139          * the message.
1140          *
1141          * FIXME: It may be fragmented into multiple chunks
1142          *        at once if non-fragmentable extension headers
1143          *        are too large.
1144          * --yoshfuji
1145          */
1146
1147         inet->cork.length += length;
1148         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1149             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1150
1151                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1152                                           fragheaderlen, transhdrlen, mtu,
1153                                           flags);
1154                 if (err)
1155                         goto error;
1156                 return 0;
1157         }
1158
1159         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1160                 goto alloc_new_skb;
1161
1162         while (length > 0) {
1163                 /* Check if the remaining data fits into current packet. */
1164                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1165                 if (copy < length)
1166                         copy = maxfraglen - skb->len;
1167
1168                 if (copy <= 0) {
1169                         char *data;
1170                         unsigned int datalen;
1171                         unsigned int fraglen;
1172                         unsigned int fraggap;
1173                         unsigned int alloclen;
1174                         struct sk_buff *skb_prev;
1175 alloc_new_skb:
1176                         skb_prev = skb;
1177
1178                         /* There's no room in the current skb */
1179                         if (skb_prev)
1180                                 fraggap = skb_prev->len - maxfraglen;
1181                         else
1182                                 fraggap = 0;
1183
1184                         /*
1185                          * If remaining data exceeds the mtu,
1186                          * we know we need more fragment(s).
1187                          */
1188                         datalen = length + fraggap;
1189                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1190                                 datalen = maxfraglen - fragheaderlen;
1191
1192                         fraglen = datalen + fragheaderlen;
1193                         if ((flags & MSG_MORE) &&
1194                             !(rt->u.dst.dev->features&NETIF_F_SG))
1195                                 alloclen = mtu;
1196                         else
1197                                 alloclen = datalen + fragheaderlen;
1198
1199                         /*
1200                          * The last fragment gets additional space at tail.
1201                          * Note: we overallocate on fragments with MSG_MODE
1202                          * because we have no idea if we're the last one.
1203                          */
1204                         if (datalen == length + fraggap)
1205                                 alloclen += rt->u.dst.trailer_len;
1206
1207                         /*
1208                          * We just reserve space for fragment header.
1209                          * Note: this may be overallocation if the message
1210                          * (without MSG_MORE) fits into the MTU.
1211                          */
1212                         alloclen += sizeof(struct frag_hdr);
1213
1214                         if (transhdrlen) {
1215                                 skb = sock_alloc_send_skb(sk,
1216                                                 alloclen + hh_len,
1217                                                 (flags & MSG_DONTWAIT), &err);
1218                         } else {
1219                                 skb = NULL;
1220                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1221                                     2 * sk->sk_sndbuf)
1222                                         skb = sock_wmalloc(sk,
1223                                                            alloclen + hh_len, 1,
1224                                                            sk->sk_allocation);
1225                                 if (unlikely(skb == NULL))
1226                                         err = -ENOBUFS;
1227                         }
1228                         if (skb == NULL)
1229                                 goto error;
1230                         /*
1231                          *      Fill in the control structures
1232                          */
1233                         skb->ip_summed = csummode;
1234                         skb->csum = 0;
1235                         /* reserve for fragmentation */
1236                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1237
1238                         /*
1239                          *      Find where to start putting bytes
1240                          */
1241                         data = skb_put(skb, fraglen);
1242                         skb_set_network_header(skb, exthdrlen);
1243                         data += fragheaderlen;
1244                         skb->transport_header = (skb->network_header +
1245                                                  fragheaderlen);
1246                         if (fraggap) {
1247                                 skb->csum = skb_copy_and_csum_bits(
1248                                         skb_prev, maxfraglen,
1249                                         data + transhdrlen, fraggap, 0);
1250                                 skb_prev->csum = csum_sub(skb_prev->csum,
1251                                                           skb->csum);
1252                                 data += fraggap;
1253                                 pskb_trim_unique(skb_prev, maxfraglen);
1254                         }
1255                         copy = datalen - transhdrlen - fraggap;
1256                         if (copy < 0) {
1257                                 err = -EINVAL;
1258                                 kfree_skb(skb);
1259                                 goto error;
1260                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1261                                 err = -EFAULT;
1262                                 kfree_skb(skb);
1263                                 goto error;
1264                         }
1265
1266                         offset += copy;
1267                         length -= datalen - fraggap;
1268                         transhdrlen = 0;
1269                         exthdrlen = 0;
1270                         csummode = CHECKSUM_NONE;
1271
1272                         /*
1273                          * Put the packet on the pending queue
1274                          */
1275                         __skb_queue_tail(&sk->sk_write_queue, skb);
1276                         continue;
1277                 }
1278
1279                 if (copy > length)
1280                         copy = length;
1281
1282                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1283                         unsigned int off;
1284
1285                         off = skb->len;
1286                         if (getfrag(from, skb_put(skb, copy),
1287                                                 offset, copy, off, skb) < 0) {
1288                                 __skb_trim(skb, off);
1289                                 err = -EFAULT;
1290                                 goto error;
1291                         }
1292                 } else {
1293                         int i = skb_shinfo(skb)->nr_frags;
1294                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1295                         struct page *page = sk->sk_sndmsg_page;
1296                         int off = sk->sk_sndmsg_off;
1297                         unsigned int left;
1298
1299                         if (page && (left = PAGE_SIZE - off) > 0) {
1300                                 if (copy >= left)
1301                                         copy = left;
1302                                 if (page != frag->page) {
1303                                         if (i == MAX_SKB_FRAGS) {
1304                                                 err = -EMSGSIZE;
1305                                                 goto error;
1306                                         }
1307                                         get_page(page);
1308                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1309                                         frag = &skb_shinfo(skb)->frags[i];
1310                                 }
1311                         } else if(i < MAX_SKB_FRAGS) {
1312                                 if (copy > PAGE_SIZE)
1313                                         copy = PAGE_SIZE;
1314                                 page = alloc_pages(sk->sk_allocation, 0);
1315                                 if (page == NULL) {
1316                                         err = -ENOMEM;
1317                                         goto error;
1318                                 }
1319                                 sk->sk_sndmsg_page = page;
1320                                 sk->sk_sndmsg_off = 0;
1321
1322                                 skb_fill_page_desc(skb, i, page, 0, 0);
1323                                 frag = &skb_shinfo(skb)->frags[i];
1324                         } else {
1325                                 err = -EMSGSIZE;
1326                                 goto error;
1327                         }
1328                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1329                                 err = -EFAULT;
1330                                 goto error;
1331                         }
1332                         sk->sk_sndmsg_off += copy;
1333                         frag->size += copy;
1334                         skb->len += copy;
1335                         skb->data_len += copy;
1336                         skb->truesize += copy;
1337                         atomic_add(copy, &sk->sk_wmem_alloc);
1338                 }
1339                 offset += copy;
1340                 length -= copy;
1341         }
1342         return 0;
1343 error:
1344         inet->cork.length -= length;
1345         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1346         return err;
1347 }
1348
1349 int ip6_push_pending_frames(struct sock *sk)
1350 {
1351         struct sk_buff *skb, *tmp_skb;
1352         struct sk_buff **tail_skb;
1353         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1354         struct inet_sock *inet = inet_sk(sk);
1355         struct ipv6_pinfo *np = inet6_sk(sk);
1356         struct ipv6hdr *hdr;
1357         struct ipv6_txoptions *opt = np->cork.opt;
1358         struct rt6_info *rt = np->cork.rt;
1359         struct flowi *fl = &inet->cork.fl;
1360         unsigned char proto = fl->proto;
1361         int err = 0;
1362
1363         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1364                 goto out;
1365         tail_skb = &(skb_shinfo(skb)->frag_list);
1366
1367         /* move skb->data to ip header from ext header */
1368         if (skb->data < skb_network_header(skb))
1369                 __skb_pull(skb, skb_network_offset(skb));
1370         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1371                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1372                 *tail_skb = tmp_skb;
1373                 tail_skb = &(tmp_skb->next);
1374                 skb->len += tmp_skb->len;
1375                 skb->data_len += tmp_skb->len;
1376                 skb->truesize += tmp_skb->truesize;
1377                 __sock_put(tmp_skb->sk);
1378                 tmp_skb->destructor = NULL;
1379                 tmp_skb->sk = NULL;
1380         }
1381
1382         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1383         __skb_pull(skb, skb_network_header_len(skb));
1384         if (opt && opt->opt_flen)
1385                 ipv6_push_frag_opts(skb, opt, &proto);
1386         if (opt && opt->opt_nflen)
1387                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1388
1389         skb_push(skb, sizeof(struct ipv6hdr));
1390         skb_reset_network_header(skb);
1391         hdr = ipv6_hdr(skb);
1392
1393         *(__be32*)hdr = fl->fl6_flowlabel |
1394                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1395
1396         if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1397                 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1398         else
1399                 hdr->payload_len = 0;
1400         hdr->hop_limit = np->cork.hop_limit;
1401         hdr->nexthdr = proto;
1402         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1403         ipv6_addr_copy(&hdr->daddr, final_dst);
1404
1405         skb->priority = sk->sk_priority;
1406
1407         skb->dst = dst_clone(&rt->u.dst);
1408         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1409         err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1410         if (err) {
1411                 if (err > 0)
1412                         err = np->recverr ? net_xmit_errno(err) : 0;
1413                 if (err)
1414                         goto error;
1415         }
1416
1417 out:
1418         inet->cork.flags &= ~IPCORK_OPT;
1419         kfree(np->cork.opt);
1420         np->cork.opt = NULL;
1421         if (np->cork.rt) {
1422                 dst_release(&np->cork.rt->u.dst);
1423                 np->cork.rt = NULL;
1424                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1425         }
1426         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1427         return err;
1428 error:
1429         goto out;
1430 }
1431
1432 void ip6_flush_pending_frames(struct sock *sk)
1433 {
1434         struct inet_sock *inet = inet_sk(sk);
1435         struct ipv6_pinfo *np = inet6_sk(sk);
1436         struct sk_buff *skb;
1437
1438         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1439                 if (skb->dst)
1440                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
1441                                       IPSTATS_MIB_OUTDISCARDS);
1442                 kfree_skb(skb);
1443         }
1444
1445         inet->cork.flags &= ~IPCORK_OPT;
1446
1447         kfree(np->cork.opt);
1448         np->cork.opt = NULL;
1449         if (np->cork.rt) {
1450                 dst_release(&np->cork.rt->u.dst);
1451                 np->cork.rt = NULL;
1452                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1453         }
1454         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1455 }