net/ipv6/ip6_flowlabel.c

   1 /*
   2  *      ip6_flowlabel.c         IPv6 flowlabel manager.
   3  *
   4  *      This program is free software; you can redistribute it and/or
   5  *      modify it under the terms of the GNU General Public License
   6  *      as published by the Free Software Foundation; either version
   7  *      2 of the License, or (at your option) any later version.
   8  *
   9  *      Authors:        Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  */
  11
  12 #include <linux/capability.h>
  13 #include <linux/errno.h>
  14 #include <linux/types.h>
  15 #include <linux/socket.h>
  16 #include <linux/net.h>
  17 #include <linux/netdevice.h>
  18 #include <linux/if_arp.h>
  19 #include <linux/in6.h>
  20 #include <linux/route.h>
  21 #include <linux/proc_fs.h>
  22 #include <linux/seq_file.h>
  23 #include <linux/slab.h>
  24 #include <linux/export.h>
  25 #include <linux/pid_namespace.h>
  26
  27 #include <net/net_namespace.h>
  28 #include <net/sock.h>
  29
  30 #include <net/ipv6.h>
  31 #include <net/ndisc.h>
  32 #include <net/protocol.h>
  33 #include <net/ip6_route.h>
  34 #include <net/addrconf.h>
  35 #include <net/rawv6.h>
  36 #include <net/icmp.h>
  37 #include <net/transp_v6.h>
  38
  39 #include <asm/uaccess.h>
  40
  41 #define FL_MIN_LINGER   6       /* Minimal linger. It is set to 6sec specified
  42                                    in old IPv6 RFC. Well, it was reasonable value.
  43                                  */
  44 #define FL_MAX_LINGER   60      /* Maximal linger timeout */
  45
  46 /* FL hash table */
  47
  48 #define FL_MAX_PER_SOCK 32
  49 #define FL_MAX_SIZE     4096
  50 #define FL_HASH_MASK    255
  51 #define FL_HASH(l)      (ntohl(l)&FL_HASH_MASK)
  52
  53 static atomic_t fl_size = ATOMIC_INIT(0);
  54 static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1];
  55
  56 static void ip6_fl_gc(unsigned long dummy);
  57 static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
  58
  59 /* FL hash table lock: it protects only of GC */
  60
  61 static DEFINE_RWLOCK(ip6_fl_lock);
  62
  63 /* Big socket sock */
  64
  65 static DEFINE_RWLOCK(ip6_sk_fl_lock);
  66
  67
  68 static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
  69 {
  70         struct ip6_flowlabel *fl;
  71
  72         for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
  73                 if (fl->label == label && net_eq(fl->fl_net, net))
  74                         return fl;
  75         }
  76         return NULL;
  77 }
  78
  79 static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
  80 {
  81         struct ip6_flowlabel *fl;
  82
  83         read_lock_bh(&ip6_fl_lock);
  84         fl = __fl_lookup(net, label);
  85         if (fl)
  86                 atomic_inc(&fl->users);
  87         read_unlock_bh(&ip6_fl_lock);
  88         return fl;
  89 }
  90
  91
  92 static void fl_free(struct ip6_flowlabel *fl)
  93 {
  94         if (fl) {
  95                 if (fl->share == IPV6_FL_S_PROCESS)
  96                         put_pid(fl->owner.pid);
  97                 release_net(fl->fl_net);
  98                 kfree(fl->opt);
  99         }
 100         kfree(fl);
 101 }
 102
 103 static void fl_release(struct ip6_flowlabel *fl)
 104 {
 105         write_lock_bh(&ip6_fl_lock);
 106
 107         fl->lastuse = jiffies;
 108         if (atomic_dec_and_test(&fl->users)) {
 109                 unsigned long ttd = fl->lastuse + fl->linger;
 110                 if (time_after(ttd, fl->expires))
 111                         fl->expires = ttd;
 112                 ttd = fl->expires;
 113                 if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
 114                         struct ipv6_txoptions *opt = fl->opt;
 115                         fl->opt = NULL;
 116                         kfree(opt);
 117                 }
 118                 if (!timer_pending(&ip6_fl_gc_timer) ||
 119                     time_after(ip6_fl_gc_timer.expires, ttd))
 120                         mod_timer(&ip6_fl_gc_timer, ttd);
 121         }
 122         write_unlock_bh(&ip6_fl_lock);
 123 }
 124
 125 static void ip6_fl_gc(unsigned long dummy)
 126 {
 127         int i;
 128         unsigned long now = jiffies;
 129         unsigned long sched = 0;
 130
 131         write_lock(&ip6_fl_lock);
 132
 133         for (i=0; i<=FL_HASH_MASK; i++) {
 134                 struct ip6_flowlabel *fl, **flp;
 135                 flp = &fl_ht[i];
 136                 while ((fl=*flp) != NULL) {
 137                         if (atomic_read(&fl->users) == 0) {
 138                                 unsigned long ttd = fl->lastuse + fl->linger;
 139                                 if (time_after(ttd, fl->expires))
 140                                         fl->expires = ttd;
 141                                 ttd = fl->expires;
 142                                 if (time_after_eq(now, ttd)) {
 143                                         *flp = fl->next;
 144                                         fl_free(fl);
 145                                         atomic_dec(&fl_size);
 146                                         continue;
 147                                 }
 148                                 if (!sched || time_before(ttd, sched))
 149                                         sched = ttd;
 150                         }
 151                         flp = &fl->next;
 152                 }
 153         }
 154         if (!sched && atomic_read(&fl_size))
 155                 sched = now + FL_MAX_LINGER;
 156         if (sched) {
 157                 mod_timer(&ip6_fl_gc_timer, sched);
 158         }
 159         write_unlock(&ip6_fl_lock);
 160 }
 161
 162 static void __net_exit ip6_fl_purge(struct net *net)
 163 {
 164         int i;
 165
 166         write_lock(&ip6_fl_lock);
 167         for (i = 0; i <= FL_HASH_MASK; i++) {
 168                 struct ip6_flowlabel *fl, **flp;
 169                 flp = &fl_ht[i];
 170                 while ((fl = *flp) != NULL) {
 171                         if (net_eq(fl->fl_net, net) &&
 172                             atomic_read(&fl->users) == 0) {
 173                                 *flp = fl->next;
 174                                 fl_free(fl);
 175                                 atomic_dec(&fl_size);
 176                                 continue;
 177                         }
 178                         flp = &fl->next;
 179                 }
 180         }
 181         write_unlock(&ip6_fl_lock);
 182 }
 183
 184 static struct ip6_flowlabel *fl_intern(struct net *net,
 185                                        struct ip6_flowlabel *fl, __be32 label)
 186 {
 187         struct ip6_flowlabel *lfl;
 188
 189         fl->label = label & IPV6_FLOWLABEL_MASK;
 190
 191         write_lock_bh(&ip6_fl_lock);
 192         if (label == 0) {
 193                 for (;;) {
 194                         fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
 195                         if (fl->label) {
 196                                 lfl = __fl_lookup(net, fl->label);
 197                                 if (lfl == NULL)
 198                                         break;
 199                         }
 200                 }
 201         } else {
 202                 /*
 203                  * we dropper the ip6_fl_lock, so this entry could reappear
 204                  * and we need to recheck with it.
 205                  *
 206                  * OTOH no need to search the active socket first, like it is
 207                  * done in ipv6_flowlabel_opt - sock is locked, so new entry
 208                  * with the same label can only appear on another sock
 209                  */
 210                 lfl = __fl_lookup(net, fl->label);
 211                 if (lfl != NULL) {
 212                         atomic_inc(&lfl->users);
 213                         write_unlock_bh(&ip6_fl_lock);
 214                         return lfl;
 215                 }
 216         }
 217
 218         fl->lastuse = jiffies;
 219         fl->next = fl_ht[FL_HASH(fl->label)];
 220         fl_ht[FL_HASH(fl->label)] = fl;
 221         atomic_inc(&fl_size);
 222         write_unlock_bh(&ip6_fl_lock);
 223         return NULL;
 224 }
 225
 226
 227
 228 /* Socket flowlabel lists */
 229
 230 struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
 231 {
 232         struct ipv6_fl_socklist *sfl;
 233         struct ipv6_pinfo *np = inet6_sk(sk);
 234
 235         label &= IPV6_FLOWLABEL_MASK;
 236
 237         read_lock_bh(&ip6_sk_fl_lock);
 238         for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
 239                 struct ip6_flowlabel *fl = sfl->fl;
 240                 if (fl->label == label) {
 241                         fl->lastuse = jiffies;
 242                         atomic_inc(&fl->users);
 243                         read_unlock_bh(&ip6_sk_fl_lock);
 244                         return fl;
 245                 }
 246         }
 247         read_unlock_bh(&ip6_sk_fl_lock);
 248         return NULL;
 249 }
 250
 251 EXPORT_SYMBOL_GPL(fl6_sock_lookup);
 252
 253 void fl6_free_socklist(struct sock *sk)
 254 {
 255         struct ipv6_pinfo *np = inet6_sk(sk);
 256         struct ipv6_fl_socklist *sfl;
 257
 258         while ((sfl = np->ipv6_fl_list) != NULL) {
 259                 np->ipv6_fl_list = sfl->next;
 260                 fl_release(sfl->fl);
 261                 kfree(sfl);
 262         }
 263 }
 264
 265 /* Service routines */
 266
 267
 268 /*
 269    It is the only difficult place. flowlabel enforces equal headers
 270    before and including routing header, however user may supply options
 271    following rthdr.
 272  */
 273
 274 struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
 275                                          struct ip6_flowlabel * fl,
 276                                          struct ipv6_txoptions * fopt)
 277 {
 278         struct ipv6_txoptions * fl_opt = fl->opt;
 279
 280         if (fopt == NULL || fopt->opt_flen == 0)
 281                 return fl_opt;
 282
 283         if (fl_opt != NULL) {
 284                 opt_space->hopopt = fl_opt->hopopt;
 285                 opt_space->dst0opt = fl_opt->dst0opt;
 286                 opt_space->srcrt = fl_opt->srcrt;
 287                 opt_space->opt_nflen = fl_opt->opt_nflen;
 288         } else {
 289                 if (fopt->opt_nflen == 0)
 290                         return fopt;
 291                 opt_space->hopopt = NULL;
 292                 opt_space->dst0opt = NULL;
 293                 opt_space->srcrt = NULL;
 294                 opt_space->opt_nflen = 0;
 295         }
 296         opt_space->dst1opt = fopt->dst1opt;
 297         opt_space->opt_flen = fopt->opt_flen;
 298         return opt_space;
 299 }
 300 EXPORT_SYMBOL_GPL(fl6_merge_options);
 301
 302 static unsigned long check_linger(unsigned long ttl)
 303 {
 304         if (ttl < FL_MIN_LINGER)
 305                 return FL_MIN_LINGER*HZ;
 306         if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
 307                 return 0;
 308         return ttl*HZ;
 309 }
 310
 311 static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
 312 {
 313         linger = check_linger(linger);
 314         if (!linger)
 315                 return -EPERM;
 316         expires = check_linger(expires);
 317         if (!expires)
 318                 return -EPERM;
 319         fl->lastuse = jiffies;
 320         if (time_before(fl->linger, linger))
 321                 fl->linger = linger;
 322         if (time_before(expires, fl->linger))
 323                 expires = fl->linger;
 324         if (time_before(fl->expires, fl->lastuse + expires))
 325                 fl->expires = fl->lastuse + expires;
 326         return 0;
 327 }
 328
 329 static struct ip6_flowlabel *
 330 fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 331           char __user *optval, int optlen, int *err_p)
 332 {
 333         struct ip6_flowlabel *fl = NULL;
 334         int olen;
 335         int addr_type;
 336         int err;
 337
 338         olen = optlen - CMSG_ALIGN(sizeof(*freq));
 339         err = -EINVAL;
 340         if (olen > 64 * 1024)
 341                 goto done;
 342
 343         err = -ENOMEM;
 344         fl = kzalloc(sizeof(*fl), GFP_KERNEL);
 345         if (fl == NULL)
 346                 goto done;
 347
 348         if (olen > 0) {
 349                 struct msghdr msg;
 350                 struct flowi6 flowi6;
 351                 int junk;
 352
 353                 err = -ENOMEM;
 354                 fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
 355                 if (fl->opt == NULL)
 356                         goto done;
 357
 358                 memset(fl->opt, 0, sizeof(*fl->opt));
 359                 fl->opt->tot_len = sizeof(*fl->opt) + olen;
 360                 err = -EFAULT;
 361                 if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
 362                         goto done;
 363
 364                 msg.msg_controllen = olen;
 365                 msg.msg_control = (void*)(fl->opt+1);
 366                 memset(&flowi6, 0, sizeof(flowi6));
 367
 368                 err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
 369                                             &junk, &junk, &junk);
 370                 if (err)
 371                         goto done;
 372                 err = -EINVAL;
 373                 if (fl->opt->opt_flen)
 374                         goto done;
 375                 if (fl->opt->opt_nflen == 0) {
 376                         kfree(fl->opt);
 377                         fl->opt = NULL;
 378                 }
 379         }
 380
 381         fl->fl_net = hold_net(net);
 382         fl->expires = jiffies;
 383         err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
 384         if (err)
 385                 goto done;
 386         fl->share = freq->flr_share;
 387         addr_type = ipv6_addr_type(&freq->flr_dst);
 388         if ((addr_type & IPV6_ADDR_MAPPED) ||
 389             addr_type == IPV6_ADDR_ANY) {
 390                 err = -EINVAL;
 391                 goto done;
 392         }
 393         fl->dst = freq->flr_dst;
 394         atomic_set(&fl->users, 1);
 395         switch (fl->share) {
 396         case IPV6_FL_S_EXCL:
 397         case IPV6_FL_S_ANY:
 398                 break;
 399         case IPV6_FL_S_PROCESS:
 400                 fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
 401                 break;
 402         case IPV6_FL_S_USER:
 403                 fl->owner.uid = current_euid();
 404                 break;
 405         default:
 406                 err = -EINVAL;
 407                 goto done;
 408         }
 409         return fl;
 410
 411 done:
 412         fl_free(fl);
 413         *err_p = err;
 414         return NULL;
 415 }
 416
 417 static int mem_check(struct sock *sk)
 418 {
 419         struct ipv6_pinfo *np = inet6_sk(sk);
 420         struct ipv6_fl_socklist *sfl;
 421         int room = FL_MAX_SIZE - atomic_read(&fl_size);
 422         int count = 0;
 423
 424         if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
 425                 return 0;
 426
 427         for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
 428                 count++;
 429
 430         if (room <= 0 ||
 431             ((count >= FL_MAX_PER_SOCK ||
 432               (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
 433              !capable(CAP_NET_ADMIN)))
 434                 return -ENOBUFS;
 435
 436         return 0;
 437 }
 438
 439 static bool ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
 440 {
 441         if (h1 == h2)
 442                 return false;
 443         if (h1 == NULL || h2 == NULL)
 444                 return true;
 445         if (h1->hdrlen != h2->hdrlen)
 446                 return true;
 447         return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
 448 }
 449
 450 static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
 451 {
 452         if (o1 == o2)
 453                 return false;
 454         if (o1 == NULL || o2 == NULL)
 455                 return true;
 456         if (o1->opt_nflen != o2->opt_nflen)
 457                 return true;
 458         if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
 459                 return true;
 460         if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
 461                 return true;
 462         if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
 463                 return true;
 464         return false;
 465 }
 466
 467 static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
 468                 struct ip6_flowlabel *fl)
 469 {
 470         write_lock_bh(&ip6_sk_fl_lock);
 471         sfl->fl = fl;
 472         sfl->next = np->ipv6_fl_list;
 473         np->ipv6_fl_list = sfl;
 474         write_unlock_bh(&ip6_sk_fl_lock);
 475 }
 476
 477 int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 478 {
 479         int uninitialized_var(err);
 480         struct net *net = sock_net(sk);
 481         struct ipv6_pinfo *np = inet6_sk(sk);
 482         struct in6_flowlabel_req freq;
 483         struct ipv6_fl_socklist *sfl1=NULL;
 484         struct ipv6_fl_socklist *sfl, **sflp;
 485         struct ip6_flowlabel *fl, *fl1 = NULL;
 486
 487
 488         if (optlen < sizeof(freq))
 489                 return -EINVAL;
 490
 491         if (copy_from_user(&freq, optval, sizeof(freq)))
 492                 return -EFAULT;
 493
 494         switch (freq.flr_action) {
 495         case IPV6_FL_A_PUT:
 496                 write_lock_bh(&ip6_sk_fl_lock);
 497                 for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
 498                         if (sfl->fl->label == freq.flr_label) {
 499                                 if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
 500                                         np->flow_label &= ~IPV6_FLOWLABEL_MASK;
 501                                 *sflp = sfl->next;
 502                                 write_unlock_bh(&ip6_sk_fl_lock);
 503                                 fl_release(sfl->fl);
 504                                 kfree(sfl);
 505                                 return 0;
 506                         }
 507                 }
 508                 write_unlock_bh(&ip6_sk_fl_lock);
 509                 return -ESRCH;
 510
 511         case IPV6_FL_A_RENEW:
 512                 read_lock_bh(&ip6_sk_fl_lock);
 513                 for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
 514                         if (sfl->fl->label == freq.flr_label) {
 515                                 err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
 516                                 read_unlock_bh(&ip6_sk_fl_lock);
 517                                 return err;
 518                         }
 519                 }
 520                 read_unlock_bh(&ip6_sk_fl_lock);
 521
 522                 if (freq.flr_share == IPV6_FL_S_NONE &&
 523                     ns_capable(net->user_ns, CAP_NET_ADMIN)) {
 524                         fl = fl_lookup(net, freq.flr_label);
 525                         if (fl) {
 526                                 err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
 527                                 fl_release(fl);
 528                                 return err;
 529                         }
 530                 }
 531                 return -ESRCH;
 532
 533         case IPV6_FL_A_GET:
 534                 if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
 535                         return -EINVAL;
 536
 537                 fl = fl_create(net, sk, &freq, optval, optlen, &err);
 538                 if (fl == NULL)
 539                         return err;
 540                 sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
 541
 542                 if (freq.flr_label) {
 543                         err = -EEXIST;
 544                         read_lock_bh(&ip6_sk_fl_lock);
 545                         for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
 546                                 if (sfl->fl->label == freq.flr_label) {
 547                                         if (freq.flr_flags&IPV6_FL_F_EXCL) {
 548                                                 read_unlock_bh(&ip6_sk_fl_lock);
 549                                                 goto done;
 550                                         }
 551                                         fl1 = sfl->fl;
 552                                         atomic_inc(&fl1->users);
 553                                         break;
 554                                 }
 555                         }
 556                         read_unlock_bh(&ip6_sk_fl_lock);
 557
 558                         if (fl1 == NULL)
 559                                 fl1 = fl_lookup(net, freq.flr_label);
 560                         if (fl1) {
 561 recheck:
 562                                 err = -EEXIST;
 563                                 if (freq.flr_flags&IPV6_FL_F_EXCL)
 564                                         goto release;
 565                                 err = -EPERM;
 566                                 if (fl1->share == IPV6_FL_S_EXCL ||
 567                                     fl1->share != fl->share ||
 568                                     ((fl1->share == IPV6_FL_S_PROCESS) &&
 569                                      (fl1->owner.pid == fl->owner.pid)) ||
 570                                     ((fl1->share == IPV6_FL_S_USER) &&
 571                                      uid_eq(fl1->owner.uid, fl->owner.uid)))
 572                                         goto release;
 573
 574                                 err = -EINVAL;
 575                                 if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
 576                                     ipv6_opt_cmp(fl1->opt, fl->opt))
 577                                         goto release;
 578
 579                                 err = -ENOMEM;
 580                                 if (sfl1 == NULL)
 581                                         goto release;
 582                                 if (fl->linger > fl1->linger)
 583                                         fl1->linger = fl->linger;
 584                                 if ((long)(fl->expires - fl1->expires) > 0)
 585                                         fl1->expires = fl->expires;
 586                                 fl_link(np, sfl1, fl1);
 587                                 fl_free(fl);
 588                                 return 0;
 589
 590 release:
 591                                 fl_release(fl1);
 592                                 goto done;
 593                         }
 594                 }
 595                 err = -ENOENT;
 596                 if (!(freq.flr_flags&IPV6_FL_F_CREATE))
 597                         goto done;
 598
 599                 err = -ENOMEM;
 600                 if (sfl1 == NULL || (err = mem_check(sk)) != 0)
 601                         goto done;
 602
 603                 fl1 = fl_intern(net, fl, freq.flr_label);
 604                 if (fl1 != NULL)
 605                         goto recheck;
 606
 607                 if (!freq.flr_label) {
 608                         if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
 609                                          &fl->label, sizeof(fl->label))) {
 610                                 /* Intentionally ignore fault. */
 611                         }
 612                 }
 613
 614                 fl_link(np, sfl1, fl);
 615                 return 0;
 616
 617         default:
 618                 return -EINVAL;
 619         }
 620
 621 done:
 622         fl_free(fl);
 623         kfree(sfl1);
 624         return err;
 625 }
 626
 627 #ifdef CONFIG_PROC_FS
 628
 629 struct ip6fl_iter_state {
 630         struct seq_net_private p;
 631         struct pid_namespace *pid_ns;
 632         int bucket;
 633 };
 634
 635 #define ip6fl_seq_private(seq)  ((struct ip6fl_iter_state *)(seq)->private)
 636
 637 static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
 638 {
 639         struct ip6_flowlabel *fl = NULL;
 640         struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
 641         struct net *net = seq_file_net(seq);
 642
 643         for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
 644                 fl = fl_ht[state->bucket];
 645
 646                 while (fl && !net_eq(fl->fl_net, net))
 647                         fl = fl->next;
 648                 if (fl)
 649                         break;
 650         }
 651         return fl;
 652 }
 653
 654 static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
 655 {
 656         struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
 657         struct net *net = seq_file_net(seq);
 658
 659         fl = fl->next;
 660 try_again:
 661         while (fl && !net_eq(fl->fl_net, net))
 662                 fl = fl->next;
 663
 664         while (!fl) {
 665                 if (++state->bucket <= FL_HASH_MASK) {
 666                         fl = fl_ht[state->bucket];
 667                         goto try_again;
 668                 } else
 669                         break;
 670         }
 671         return fl;
 672 }
 673
 674 static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
 675 {
 676         struct ip6_flowlabel *fl = ip6fl_get_first(seq);
 677         if (fl)
 678                 while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
 679                         --pos;
 680         return pos ? NULL : fl;
 681 }
 682
 683 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
 684         __acquires(ip6_fl_lock)
 685 {
 686         read_lock_bh(&ip6_fl_lock);
 687         return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 688 }
 689
 690 static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 691 {
 692         struct ip6_flowlabel *fl;
 693
 694         if (v == SEQ_START_TOKEN)
 695                 fl = ip6fl_get_first(seq);
 696         else
 697                 fl = ip6fl_get_next(seq, v);
 698         ++*pos;
 699         return fl;
 700 }
 701
 702 static void ip6fl_seq_stop(struct seq_file *seq, void *v)
 703         __releases(ip6_fl_lock)
 704 {
 705         read_unlock_bh(&ip6_fl_lock);
 706 }
 707
 708 static int ip6fl_seq_show(struct seq_file *seq, void *v)
 709 {
 710         struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
 711         if (v == SEQ_START_TOKEN)
 712                 seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
 713                            "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
 714         else {
 715                 struct ip6_flowlabel *fl = v;
 716                 seq_printf(seq,
 717                            "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
 718                            (unsigned int)ntohl(fl->label),
 719                            fl->share,
 720                            ((fl->share == IPV6_FL_S_PROCESS) ?
 721                             pid_nr_ns(fl->owner.pid, state->pid_ns) :
 722                             ((fl->share == IPV6_FL_S_USER) ?
 723                              from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
 724                              0)),
 725                            atomic_read(&fl->users),
 726                            fl->linger/HZ,
 727                            (long)(fl->expires - jiffies)/HZ,
 728                            &fl->dst,
 729                            fl->opt ? fl->opt->opt_nflen : 0);
 730         }
 731         return 0;
 732 }
 733
 734 static const struct seq_operations ip6fl_seq_ops = {
 735         .start  =       ip6fl_seq_start,
 736         .next   =       ip6fl_seq_next,
 737         .stop   =       ip6fl_seq_stop,
 738         .show   =       ip6fl_seq_show,
 739 };
 740
 741 static int ip6fl_seq_open(struct inode *inode, struct file *file)
 742 {
 743         struct seq_file *seq;
 744         struct ip6fl_iter_state *state;
 745         int err;
 746
 747         err = seq_open_net(inode, file, &ip6fl_seq_ops,
 748                            sizeof(struct ip6fl_iter_state));
 749
 750         if (!err) {
 751                 seq = file->private_data;
 752                 state = ip6fl_seq_private(seq);
 753                 rcu_read_lock();
 754                 state->pid_ns = get_pid_ns(task_active_pid_ns(current));
 755                 rcu_read_unlock();
 756         }
 757         return err;
 758 }
 759
 760 static int ip6fl_seq_release(struct inode *inode, struct file *file)
 761 {
 762         struct seq_file *seq = file->private_data;
 763         struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
 764         put_pid_ns(state->pid_ns);
 765         return seq_release_net(inode, file);
 766 }
 767
 768 static const struct file_operations ip6fl_seq_fops = {
 769         .owner          =       THIS_MODULE,
 770         .open           =       ip6fl_seq_open,
 771         .read           =       seq_read,
 772         .llseek         =       seq_lseek,
 773         .release        =       ip6fl_seq_release,
 774 };
 775
 776 static int __net_init ip6_flowlabel_proc_init(struct net *net)
 777 {
 778         if (!proc_net_fops_create(net, "ip6_flowlabel",
 779                                   S_IRUGO, &ip6fl_seq_fops))
 780                 return -ENOMEM;
 781         return 0;
 782 }
 783
 784 static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
 785 {
 786         proc_net_remove(net, "ip6_flowlabel");
 787 }
 788 #else
 789 static inline int ip6_flowlabel_proc_init(struct net *net)
 790 {
 791         return 0;
 792 }
 793 static inline void ip6_flowlabel_proc_fini(struct net *net)
 794 {
 795 }
 796 #endif
 797
 798 static void __net_exit ip6_flowlabel_net_exit(struct net *net)
 799 {
 800         ip6_fl_purge(net);
 801         ip6_flowlabel_proc_fini(net);
 802 }
 803
 804 static struct pernet_operations ip6_flowlabel_net_ops = {
 805         .init = ip6_flowlabel_proc_init,
 806         .exit = ip6_flowlabel_net_exit,
 807 };
 808
 809 int ip6_flowlabel_init(void)
 810 {
 811         return register_pernet_subsys(&ip6_flowlabel_net_ops);
 812 }
 813
 814 void ip6_flowlabel_cleanup(void)
 815 {
 816         del_timer(&ip6_fl_gc_timer);
 817         unregister_pernet_subsys(&ip6_flowlabel_net_ops);
 818 }