net/ipv6/ip6_flowlabel.c

   1 /*
   2  *      ip6_flowlabel.c         IPv6 flowlabel manager.
   3  *
   4  *      This program is free software; you can redistribute it and/or
   5  *      modify it under the terms of the GNU General Public License
   6  *      as published by the Free Software Foundation; either version
   7  *      2 of the License, or (at your option) any later version.
   8  *
   9  *      Authors:        Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  */
  11
  12 #include <linux/capability.h>
  13 #include <linux/errno.h>
  14 #include <linux/types.h>
  15 #include <linux/socket.h>
  16 #include <linux/net.h>
  17 #include <linux/netdevice.h>
  18 #include <linux/if_arp.h>
  19 #include <linux/in6.h>
  20 #include <linux/route.h>
  21 #include <linux/proc_fs.h>
  22 #include <linux/seq_file.h>
  23 #include <linux/slab.h>
  24 #include <linux/export.h>
  25 #include <linux/pid_namespace.h>
  26
  27 #include <net/net_namespace.h>
  28 #include <net/sock.h>
  29
  30 #include <net/ipv6.h>
  31 #include <net/ndisc.h>
  32 #include <net/protocol.h>
  33 #include <net/ip6_route.h>
  34 #include <net/addrconf.h>
  35 #include <net/rawv6.h>
  36 #include <net/icmp.h>
  37 #include <net/transp_v6.h>
  38
  39 #include <asm/uaccess.h>
  40
  41 #define FL_MIN_LINGER   6       /* Minimal linger. It is set to 6sec specified
  42                                    in old IPv6 RFC. Well, it was reasonable value.
  43                                  */
  44 #define FL_MAX_LINGER   60      /* Maximal linger timeout */
  45
  46 /* FL hash table */
  47
  48 #define FL_MAX_PER_SOCK 32
  49 #define FL_MAX_SIZE     4096
  50 #define FL_HASH_MASK    255
  51 #define FL_HASH(l)      (ntohl(l)&FL_HASH_MASK)
  52
  53 static atomic_t fl_size = ATOMIC_INIT(0);
  54 static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1];
  55
  56 static void ip6_fl_gc(unsigned long dummy);
  57 static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
  58
  59 /* FL hash table lock: it protects only of GC */
  60
  61 static DEFINE_RWLOCK(ip6_fl_lock);
  62
  63 /* Big socket sock */
  64
  65 static DEFINE_RWLOCK(ip6_sk_fl_lock);
  66
  67
  68 static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
  69 {
  70         struct ip6_flowlabel *fl;
  71
  72         for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
  73                 if (fl->label == label && net_eq(fl->fl_net, net))
  74                         return fl;
  75         }
  76         return NULL;
  77 }
  78
  79 static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
  80 {
  81         struct ip6_flowlabel *fl;
  82
  83         read_lock_bh(&ip6_fl_lock);
  84         fl = __fl_lookup(net, label);
  85         if (fl)
  86                 atomic_inc(&fl->users);
  87         read_unlock_bh(&ip6_fl_lock);
  88         return fl;
  89 }
  90
  91
  92 static void fl_free(struct ip6_flowlabel *fl)
  93 {
  94         if (fl) {
  95                 if (fl->share == IPV6_FL_S_PROCESS)
  96                         put_pid(fl->owner.pid);
  97                 release_net(fl->fl_net);
  98                 kfree(fl->opt);
  99         }
 100         kfree(fl);
 101 }
 102
 103 static void fl_release(struct ip6_flowlabel *fl)
 104 {
 105         write_lock_bh(&ip6_fl_lock);
 106
 107         fl->lastuse = jiffies;
 108         if (atomic_dec_and_test(&fl->users)) {
 109                 unsigned long ttd = fl->lastuse + fl->linger;
 110                 if (time_after(ttd, fl->expires))
 111                         fl->expires = ttd;
 112                 ttd = fl->expires;
 113                 if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
 114                         struct ipv6_txoptions *opt = fl->opt;
 115                         fl->opt = NULL;
 116                         kfree(opt);
 117                 }
 118                 if (!timer_pending(&ip6_fl_gc_timer) ||
 119                     time_after(ip6_fl_gc_timer.expires, ttd))
 120                         mod_timer(&ip6_fl_gc_timer, ttd);
 121         }
 122         write_unlock_bh(&ip6_fl_lock);
 123 }
 124
 125 static void ip6_fl_gc(unsigned long dummy)
 126 {
 127         int i;
 128         unsigned long now = jiffies;
 129         unsigned long sched = 0;
 130
 131         write_lock(&ip6_fl_lock);
 132
 133         for (i=0; i<=FL_HASH_MASK; i++) {
 134                 struct ip6_flowlabel *fl, **flp;
 135                 flp = &fl_ht[i];
 136                 while ((fl=*flp) != NULL) {
 137                         if (atomic_read(&fl->users) == 0) {
 138                                 unsigned long ttd = fl->lastuse + fl->linger;
 139                                 if (time_after(ttd, fl->expires))
 140                                         fl->expires = ttd;
 141                                 ttd = fl->expires;
 142                                 if (time_after_eq(now, ttd)) {
 143                                         *flp = fl->next;
 144                                         fl_free(fl);
 145                                         atomic_dec(&fl_size);
 146                                         continue;
 147                                 }
 148                                 if (!sched || time_before(ttd, sched))
 149                                         sched = ttd;
 150                         }
 151                         flp = &fl->next;
 152                 }
 153         }
 154         if (!sched && atomic_read(&fl_size))
 155                 sched = now + FL_MAX_LINGER;
 156         if (sched) {
 157                 mod_timer(&ip6_fl_gc_timer, sched);
 158         }
 159         write_unlock(&ip6_fl_lock);
 160 }
 161
 162 static void __net_exit ip6_fl_purge(struct net *net)
 163 {
 164         int i;
 165
 166         write_lock(&ip6_fl_lock);
 167         for (i = 0; i <= FL_HASH_MASK; i++) {
 168                 struct ip6_flowlabel *fl, **flp;
 169                 flp = &fl_ht[i];
 170                 while ((fl = *flp) != NULL) {
 171                         if (net_eq(fl->fl_net, net) &&
 172                             atomic_read(&fl->users) == 0) {
 173                                 *flp = fl->next;
 174                                 fl_free(fl);
 175                                 atomic_dec(&fl_size);
 176                                 continue;
 177                         }
 178                         flp = &fl->next;
 179                 }
 180         }
 181         write_unlock(&ip6_fl_lock);
 182 }
 183
 184 static struct ip6_flowlabel *fl_intern(struct net *net,
 185                                        struct ip6_flowlabel *fl, __be32 label)
 186 {
 187         struct ip6_flowlabel *lfl;
 188
 189         fl->label = label & IPV6_FLOWLABEL_MASK;
 190
 191         write_lock_bh(&ip6_fl_lock);
 192         if (label == 0) {
 193                 for (;;) {
 194                         fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
 195                         if (fl->label) {
 196                                 lfl = __fl_lookup(net, fl->label);
 197                                 if (lfl == NULL)
 198                                         break;
 199                         }
 200                 }
 201         } else {
 202                 /*
 203                  * we dropper the ip6_fl_lock, so this entry could reappear
 204                  * and we need to recheck with it.
 205                  *
 206                  * OTOH no need to search the active socket first, like it is
 207                  * done in ipv6_flowlabel_opt - sock is locked, so new entry
 208                  * with the same label can only appear on another sock
 209                  */
 210                 lfl = __fl_lookup(net, fl->label);
 211                 if (lfl != NULL) {
 212                         atomic_inc(&lfl->users);
 213                         write_unlock_bh(&ip6_fl_lock);
 214                         return lfl;
 215                 }
 216         }
 217
 218         fl->lastuse = jiffies;
 219         fl->next = fl_ht[FL_HASH(fl->label)];
 220         fl_ht[FL_HASH(fl->label)] = fl;
 221         atomic_inc(&fl_size);
 222         write_unlock_bh(&ip6_fl_lock);
 223         return NULL;
 224 }
 225
 226
 227
 228 /* Socket flowlabel lists */
 229
 230 struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
 231 {
 232         struct ipv6_fl_socklist *sfl;
 233         struct ipv6_pinfo *np = inet6_sk(sk);
 234
 235         label &= IPV6_FLOWLABEL_MASK;
 236
 237         read_lock_bh(&ip6_sk_fl_lock);
 238         for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
 239                 struct ip6_flowlabel *fl = sfl->fl;
 240                 if (fl->label == label) {
 241                         fl->lastuse = jiffies;
 242                         atomic_inc(&fl->users);
 243                         read_unlock_bh(&ip6_sk_fl_lock);
 244                         return fl;
 245                 }
 246         }
 247         read_unlock_bh(&ip6_sk_fl_lock);
 248         return NULL;
 249 }
 250
 251 EXPORT_SYMBOL_GPL(fl6_sock_lookup);
 252
 253 void fl6_free_socklist(struct sock *sk)
 254 {
 255         struct ipv6_pinfo *np = inet6_sk(sk);
 256         struct ipv6_fl_socklist *sfl;
 257
 258         while ((sfl = np->ipv6_fl_list) != NULL) {
 259                 np->ipv6_fl_list = sfl->next;
 260                 fl_release(sfl->fl);
 261                 kfree(sfl);
 262         }
 263 }
 264
 265 /* Service routines */
 266
 267
 268 /*
 269    It is the only difficult place. flowlabel enforces equal headers
 270    before and including routing header, however user may supply options
 271    following rthdr.
 272  */
 273
 274 struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
 275                                          struct ip6_flowlabel * fl,
 276                                          struct ipv6_txoptions * fopt)
 277 {
 278         struct ipv6_txoptions * fl_opt = fl->opt;
 279
 280         if (fopt == NULL || fopt->opt_flen == 0)
 281                 return fl_opt;
 282
 283         if (fl_opt != NULL) {
 284                 opt_space->hopopt = fl_opt->hopopt;
 285                 opt_space->dst0opt = fl_opt->dst0opt;
 286                 opt_space->srcrt = fl_opt->srcrt;
 287                 opt_space->opt_nflen = fl_opt->opt_nflen;
 288         } else {
 289                 if (fopt->opt_nflen == 0)
 290                         return fopt;
 291                 opt_space->hopopt = NULL;
 292                 opt_space->dst0opt = NULL;
 293                 opt_space->srcrt = NULL;
 294                 opt_space->opt_nflen = 0;
 295         }
 296         opt_space->dst1opt = fopt->dst1opt;
 297         opt_space->opt_flen = fopt->opt_flen;
 298         return opt_space;
 299 }
 300 EXPORT_SYMBOL_GPL(fl6_merge_options);
 301
 302 static unsigned long check_linger(unsigned long ttl)
 303 {
 304         if (ttl < FL_MIN_LINGER)
 305                 return FL_MIN_LINGER*HZ;
 306         if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
 307                 return 0;
 308         return ttl*HZ;
 309 }
 310
 311 static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
 312 {
 313         linger = check_linger(linger);
 314         if (!linger)
 315                 return -EPERM;
 316         expires = check_linger(expires);
 317         if (!expires)
 318                 return -EPERM;
 319         fl->lastuse = jiffies;
 320         if (time_before(fl->linger, linger))
 321                 fl->linger = linger;
 322         if (time_before(expires, fl->linger))
 323                 expires = fl->linger;
 324         if (time_before(fl->expires, fl->lastuse + expires))
 325                 fl->expires = fl->lastuse + expires;
 326         return 0;
 327 }
 328
 329 static struct ip6_flowlabel *
 330 fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 331           char __user *optval, int optlen, int *err_p)
 332 {
 333         struct ip6_flowlabel *fl = NULL;
 334         int olen;
 335         int addr_type;
 336         int err;
 337
 338         olen = optlen - CMSG_ALIGN(sizeof(*freq));
 339         err = -EINVAL;
 340         if (olen > 64 * 1024)
 341                 goto done;
 342
 343         err = -ENOMEM;
 344         fl = kzalloc(sizeof(*fl), GFP_KERNEL);
 345         if (fl == NULL)
 346                 goto done;
 347
 348         if (olen > 0) {
 349                 struct msghdr msg;
 350                 struct flowi6 flowi6;
 351                 int junk;
 352
 353                 err = -ENOMEM;
 354                 fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
 355                 if (fl->opt == NULL)
 356                         goto done;
 357
 358                 memset(fl->opt, 0, sizeof(*fl->opt));
 359                 fl->opt->tot_len = sizeof(*fl->opt) + olen;
 360                 err = -EFAULT;
 361                 if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
 362                         goto done;
 363
 364                 msg.msg_controllen = olen;
 365                 msg.msg_control = (void*)(fl->opt+1);
 366                 memset(&flowi6, 0, sizeof(flowi6));
 367
 368                 err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk,
 369                                         &junk, &junk);
 370                 if (err)
 371                         goto done;
 372                 err = -EINVAL;
 373                 if (fl->opt->opt_flen)
 374                         goto done;
 375                 if (fl->opt->opt_nflen == 0) {
 376                         kfree(fl->opt);
 377                         fl->opt = NULL;
 378                 }
 379         }
 380
 381         fl->fl_net = hold_net(net);
 382         fl->expires = jiffies;
 383         err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
 384         if (err)
 385                 goto done;
 386         fl->share = freq->flr_share;
 387         addr_type = ipv6_addr_type(&freq->flr_dst);
 388         if ((addr_type & IPV6_ADDR_MAPPED) ||
 389             addr_type == IPV6_ADDR_ANY) {
 390                 err = -EINVAL;
 391                 goto done;
 392         }
 393         fl->dst = freq->flr_dst;
 394         atomic_set(&fl->users, 1);
 395         switch (fl->share) {
 396         case IPV6_FL_S_EXCL:
 397         case IPV6_FL_S_ANY:
 398                 break;
 399         case IPV6_FL_S_PROCESS:
 400                 fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
 401                 break;
 402         case IPV6_FL_S_USER:
 403                 fl->owner.uid = current_euid();
 404                 break;
 405         default:
 406                 err = -EINVAL;
 407                 goto done;
 408         }
 409         return fl;
 410
 411 done:
 412         fl_free(fl);
 413         *err_p = err;
 414         return NULL;
 415 }
 416
 417 static int mem_check(struct sock *sk)
 418 {
 419         struct ipv6_pinfo *np = inet6_sk(sk);
 420         struct ipv6_fl_socklist *sfl;
 421         int room = FL_MAX_SIZE - atomic_read(&fl_size);
 422         int count = 0;
 423
 424         if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
 425                 return 0;
 426
 427         for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
 428                 count++;
 429
 430         if (room <= 0 ||
 431             ((count >= FL_MAX_PER_SOCK ||
 432               (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
 433              !capable(CAP_NET_ADMIN)))
 434                 return -ENOBUFS;
 435
 436         return 0;
 437 }
 438
 439 static bool ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
 440 {
 441         if (h1 == h2)
 442                 return false;
 443         if (h1 == NULL || h2 == NULL)
 444                 return true;
 445         if (h1->hdrlen != h2->hdrlen)
 446                 return true;
 447         return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
 448 }
 449
 450 static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
 451 {
 452         if (o1 == o2)
 453                 return false;
 454         if (o1 == NULL || o2 == NULL)
 455                 return true;
 456         if (o1->opt_nflen != o2->opt_nflen)
 457                 return true;
 458         if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
 459                 return true;
 460         if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
 461                 return true;
 462         if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
 463                 return true;
 464         return false;
 465 }
 466
 467 static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
 468                 struct ip6_flowlabel *fl)
 469 {
 470         write_lock_bh(&ip6_sk_fl_lock);
 471         sfl->fl = fl;
 472         sfl->next = np->ipv6_fl_list;
 473         np->ipv6_fl_list = sfl;
 474         write_unlock_bh(&ip6_sk_fl_lock);
 475 }
 476
 477 int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 478 {
 479         int uninitialized_var(err);
 480         struct net *net = sock_net(sk);
 481         struct ipv6_pinfo *np = inet6_sk(sk);
 482         struct in6_flowlabel_req freq;
 483         struct ipv6_fl_socklist *sfl1=NULL;
 484         struct ipv6_fl_socklist *sfl, **sflp;
 485         struct ip6_flowlabel *fl, *fl1 = NULL;
 486
 487
 488         if (optlen < sizeof(freq))
 489                 return -EINVAL;
 490
 491         if (copy_from_user(&freq, optval, sizeof(freq)))
 492                 return -EFAULT;
 493
 494         switch (freq.flr_action) {
 495         case IPV6_FL_A_PUT:
 496                 write_lock_bh(&ip6_sk_fl_lock);
 497                 for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
 498                         if (sfl->fl->label == freq.flr_label) {
 499                                 if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
 500                                         np->flow_label &= ~IPV6_FLOWLABEL_MASK;
 501                                 *sflp = sfl->next;
 502                                 write_unlock_bh(&ip6_sk_fl_lock);
 503                                 fl_release(sfl->fl);
 504                                 kfree(sfl);
 505                                 return 0;
 506                         }
 507                 }
 508                 write_unlock_bh(&ip6_sk_fl_lock);
 509                 return -ESRCH;
 510
 511         case IPV6_FL_A_RENEW:
 512                 read_lock_bh(&ip6_sk_fl_lock);
 513                 for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
 514                         if (sfl->fl->label == freq.flr_label) {
 515                                 err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
 516                                 read_unlock_bh(&ip6_sk_fl_lock);
 517                                 return err;
 518                         }
 519                 }
 520                 read_unlock_bh(&ip6_sk_fl_lock);
 521
 522                 if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) {
 523                         fl = fl_lookup(net, freq.flr_label);
 524                         if (fl) {
 525                                 err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
 526                                 fl_release(fl);
 527                                 return err;
 528                         }
 529                 }
 530                 return -ESRCH;
 531
 532         case IPV6_FL_A_GET:
 533                 if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
 534                         return -EINVAL;
 535
 536                 fl = fl_create(net, sk, &freq, optval, optlen, &err);
 537                 if (fl == NULL)
 538                         return err;
 539                 sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
 540
 541                 if (freq.flr_label) {
 542                         err = -EEXIST;
 543                         read_lock_bh(&ip6_sk_fl_lock);
 544                         for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
 545                                 if (sfl->fl->label == freq.flr_label) {
 546                                         if (freq.flr_flags&IPV6_FL_F_EXCL) {
 547                                                 read_unlock_bh(&ip6_sk_fl_lock);
 548                                                 goto done;
 549                                         }
 550                                         fl1 = sfl->fl;
 551                                         atomic_inc(&fl1->users);
 552                                         break;
 553                                 }
 554                         }
 555                         read_unlock_bh(&ip6_sk_fl_lock);
 556
 557                         if (fl1 == NULL)
 558                                 fl1 = fl_lookup(net, freq.flr_label);
 559                         if (fl1) {
 560 recheck:
 561                                 err = -EEXIST;
 562                                 if (freq.flr_flags&IPV6_FL_F_EXCL)
 563                                         goto release;
 564                                 err = -EPERM;
 565                                 if (fl1->share == IPV6_FL_S_EXCL ||
 566                                     fl1->share != fl->share ||
 567                                     ((fl1->share == IPV6_FL_S_PROCESS) &&
 568                                      (fl1->owner.pid == fl->owner.pid)) ||
 569                                     ((fl1->share == IPV6_FL_S_USER) &&
 570                                      uid_eq(fl1->owner.uid, fl->owner.uid)))
 571                                         goto release;
 572
 573                                 err = -EINVAL;
 574                                 if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
 575                                     ipv6_opt_cmp(fl1->opt, fl->opt))
 576                                         goto release;
 577
 578                                 err = -ENOMEM;
 579                                 if (sfl1 == NULL)
 580                                         goto release;
 581                                 if (fl->linger > fl1->linger)
 582                                         fl1->linger = fl->linger;
 583                                 if ((long)(fl->expires - fl1->expires) > 0)
 584                                         fl1->expires = fl->expires;
 585                                 fl_link(np, sfl1, fl1);
 586                                 fl_free(fl);
 587                                 return 0;
 588
 589 release:
 590                                 fl_release(fl1);
 591                                 goto done;
 592                         }
 593                 }
 594                 err = -ENOENT;
 595                 if (!(freq.flr_flags&IPV6_FL_F_CREATE))
 596                         goto done;
 597
 598                 err = -ENOMEM;
 599                 if (sfl1 == NULL || (err = mem_check(sk)) != 0)
 600                         goto done;
 601
 602                 fl1 = fl_intern(net, fl, freq.flr_label);
 603                 if (fl1 != NULL)
 604                         goto recheck;
 605
 606                 if (!freq.flr_label) {
 607                         if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
 608                                          &fl->label, sizeof(fl->label))) {
 609                                 /* Intentionally ignore fault. */
 610                         }
 611                 }
 612
 613                 fl_link(np, sfl1, fl);
 614                 return 0;
 615
 616         default:
 617                 return -EINVAL;
 618         }
 619
 620 done:
 621         fl_free(fl);
 622         kfree(sfl1);
 623         return err;
 624 }
 625
 626 #ifdef CONFIG_PROC_FS
 627
 628 struct ip6fl_iter_state {
 629         struct seq_net_private p;
 630         struct pid_namespace *pid_ns;
 631         int bucket;
 632 };
 633
 634 #define ip6fl_seq_private(seq)  ((struct ip6fl_iter_state *)(seq)->private)
 635
 636 static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
 637 {
 638         struct ip6_flowlabel *fl = NULL;
 639         struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
 640         struct net *net = seq_file_net(seq);
 641
 642         for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
 643                 fl = fl_ht[state->bucket];
 644
 645                 while (fl && !net_eq(fl->fl_net, net))
 646                         fl = fl->next;
 647                 if (fl)
 648                         break;
 649         }
 650         return fl;
 651 }
 652
 653 static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
 654 {
 655         struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
 656         struct net *net = seq_file_net(seq);
 657
 658         fl = fl->next;
 659 try_again:
 660         while (fl && !net_eq(fl->fl_net, net))
 661                 fl = fl->next;
 662
 663         while (!fl) {
 664                 if (++state->bucket <= FL_HASH_MASK) {
 665                         fl = fl_ht[state->bucket];
 666                         goto try_again;
 667                 } else
 668                         break;
 669         }
 670         return fl;
 671 }
 672
 673 static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
 674 {
 675         struct ip6_flowlabel *fl = ip6fl_get_first(seq);
 676         if (fl)
 677                 while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
 678                         --pos;
 679         return pos ? NULL : fl;
 680 }
 681
 682 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
 683         __acquires(ip6_fl_lock)
 684 {
 685         read_lock_bh(&ip6_fl_lock);
 686         return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 687 }
 688
 689 static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 690 {
 691         struct ip6_flowlabel *fl;
 692
 693         if (v == SEQ_START_TOKEN)
 694                 fl = ip6fl_get_first(seq);
 695         else
 696                 fl = ip6fl_get_next(seq, v);
 697         ++*pos;
 698         return fl;
 699 }
 700
 701 static void ip6fl_seq_stop(struct seq_file *seq, void *v)
 702         __releases(ip6_fl_lock)
 703 {
 704         read_unlock_bh(&ip6_fl_lock);
 705 }
 706
 707 static int ip6fl_seq_show(struct seq_file *seq, void *v)
 708 {
 709         struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
 710         if (v == SEQ_START_TOKEN)
 711                 seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
 712                            "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
 713         else {
 714                 struct ip6_flowlabel *fl = v;
 715                 seq_printf(seq,
 716                            "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
 717                            (unsigned int)ntohl(fl->label),
 718                            fl->share,
 719                            ((fl->share == IPV6_FL_S_PROCESS) ?
 720                             pid_nr_ns(fl->owner.pid, state->pid_ns) :
 721                             ((fl->share == IPV6_FL_S_USER) ?
 722                              from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
 723                              0)),
 724                            atomic_read(&fl->users),
 725                            fl->linger/HZ,
 726                            (long)(fl->expires - jiffies)/HZ,
 727                            &fl->dst,
 728                            fl->opt ? fl->opt->opt_nflen : 0);
 729         }
 730         return 0;
 731 }
 732
 733 static const struct seq_operations ip6fl_seq_ops = {
 734         .start  =       ip6fl_seq_start,
 735         .next   =       ip6fl_seq_next,
 736         .stop   =       ip6fl_seq_stop,
 737         .show   =       ip6fl_seq_show,
 738 };
 739
 740 static int ip6fl_seq_open(struct inode *inode, struct file *file)
 741 {
 742         struct seq_file *seq;
 743         struct ip6fl_iter_state *state;
 744         int err;
 745
 746         err = seq_open_net(inode, file, &ip6fl_seq_ops,
 747                            sizeof(struct ip6fl_iter_state));
 748
 749         if (!err) {
 750                 seq = file->private_data;
 751                 state = ip6fl_seq_private(seq);
 752                 rcu_read_lock();
 753                 state->pid_ns = get_pid_ns(task_active_pid_ns(current));
 754                 rcu_read_unlock();
 755         }
 756         return err;
 757 }
 758
 759 static int ip6fl_seq_release(struct inode *inode, struct file *file)
 760 {
 761         struct seq_file *seq = file->private_data;
 762         struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
 763         put_pid_ns(state->pid_ns);
 764         return seq_release_net(inode, file);
 765 }
 766
 767 static const struct file_operations ip6fl_seq_fops = {
 768         .owner          =       THIS_MODULE,
 769         .open           =       ip6fl_seq_open,
 770         .read           =       seq_read,
 771         .llseek         =       seq_lseek,
 772         .release        =       ip6fl_seq_release,
 773 };
 774
 775 static int __net_init ip6_flowlabel_proc_init(struct net *net)
 776 {
 777         if (!proc_net_fops_create(net, "ip6_flowlabel",
 778                                   S_IRUGO, &ip6fl_seq_fops))
 779                 return -ENOMEM;
 780         return 0;
 781 }
 782
 783 static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
 784 {
 785         proc_net_remove(net, "ip6_flowlabel");
 786 }
 787 #else
 788 static inline int ip6_flowlabel_proc_init(struct net *net)
 789 {
 790         return 0;
 791 }
 792 static inline void ip6_flowlabel_proc_fini(struct net *net)
 793 {
 794 }
 795 #endif
 796
 797 static void __net_exit ip6_flowlabel_net_exit(struct net *net)
 798 {
 799         ip6_fl_purge(net);
 800         ip6_flowlabel_proc_fini(net);
 801 }
 802
 803 static struct pernet_operations ip6_flowlabel_net_ops = {
 804         .init = ip6_flowlabel_proc_init,
 805         .exit = ip6_flowlabel_net_exit,
 806 };
 807
 808 int ip6_flowlabel_init(void)
 809 {
 810         return register_pernet_subsys(&ip6_flowlabel_net_ops);
 811 }
 812
 813 void ip6_flowlabel_cleanup(void)
 814 {
 815         del_timer(&ip6_fl_gc_timer);
 816         unregister_pernet_subsys(&ip6_flowlabel_net_ops);
 817 }