usr/src/uts/common/syscall/lgrpsys.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28
  29 /*
  30  * lgroup system calls
  31  */
  32
  33 #include <sys/types.h>
  34 #include <sys/errno.h>
  35 #include <sys/sunddi.h>
  36 #include <sys/systm.h>
  37 #include <sys/mman.h>
  38 #include <sys/cpupart.h>
  39 #include <sys/lgrp.h>
  40 #include <sys/lgrp_user.h>
  41 #include <sys/promif.h>         /* for prom_printf() */
  42 #include <sys/sysmacros.h>
  43
  44 #include <vm/as.h>
  45
  46
  47 /* definitions for mi_validity */
  48 #define VALID_ADDR      1
  49 #define VALID_REQ       2
  50
  51 /*
  52  * run through the given number of addresses and requests and return the
  53  * corresponding memory information for each address
  54  */
  55 static int
  56 meminfo(int addr_count, struct meminfo *mip)
  57 {
  58         size_t          in_size, out_size, req_size, val_size;
  59         struct as       *as;
  60         struct hat      *hat;
  61         int             i, j, out_idx, info_count;
  62         lgrp_t          *lgrp;
  63         pfn_t           pfn;
  64         ssize_t         pgsz;
  65         int             *req_array, *val_array;
  66         uint64_t        *in_array, *out_array;
  67         uint64_t        addr, paddr;
  68         uintptr_t       vaddr;
  69         int             ret = 0;
  70         struct meminfo minfo;
  71 #if defined(_SYSCALL32_IMPL)
  72         struct meminfo32 minfo32;
  73 #endif
  74
  75         /*
  76          * Make sure that there is at least one address to translate and
  77          * limit how many virtual addresses the kernel can do per call
  78          */
  79         if (addr_count < 1)
  80                 return (set_errno(EINVAL));
  81         else if (addr_count > MAX_MEMINFO_CNT)
  82                 addr_count = MAX_MEMINFO_CNT;
  83
  84         if (get_udatamodel() == DATAMODEL_NATIVE) {
  85                 if (copyin(mip, &minfo, sizeof (struct meminfo)))
  86                         return (set_errno(EFAULT));
  87         }
  88 #if defined(_SYSCALL32_IMPL)
  89         else {
  90                 bzero(&minfo, sizeof (minfo));
  91                 if (copyin(mip, &minfo32, sizeof (struct meminfo32)))
  92                         return (set_errno(EFAULT));
  93                 minfo.mi_inaddr = (const uint64_t *)(uintptr_t)
  94                     minfo32.mi_inaddr;
  95                 minfo.mi_info_req = (const uint_t *)(uintptr_t)
  96                     minfo32.mi_info_req;
  97                 minfo.mi_info_count = minfo32.mi_info_count;
  98                 minfo.mi_outdata = (uint64_t *)(uintptr_t)
  99                     minfo32.mi_outdata;
 100                 minfo.mi_validity = (uint_t *)(uintptr_t)
 101                     minfo32.mi_validity;
 102         }
 103 #endif
 104         /*
 105          * all the input parameters have been copied in:-
 106          * addr_count - number of input addresses
 107          * minfo.mi_inaddr - array of input addresses
 108          * minfo.mi_info_req - array of types of information requested
 109          * minfo.mi_info_count - no. of pieces of info requested for each addr
 110          * minfo.mi_outdata - array into which the results are placed
 111          * minfo.mi_validity -  array containing bitwise result codes; 0th bit
 112          *                      evaluates validity of corresponding input
 113          *                      address, 1st bit validity of response to first
 114          *                      member of info_req, etc.
 115          */
 116
 117         /* make sure mi_info_count is within limit */
 118         info_count = minfo.mi_info_count;
 119         if (info_count < 1 || info_count > MAX_MEMINFO_REQ)
 120                 return (set_errno(EINVAL));
 121
 122         /*
 123          * allocate buffer in_array for the input addresses and copy them in
 124          */
 125         in_size = sizeof (uint64_t) * addr_count;
 126         in_array = kmem_alloc(in_size, KM_SLEEP);
 127         if (copyin(minfo.mi_inaddr, in_array, in_size)) {
 128                 kmem_free(in_array, in_size);
 129                 return (set_errno(EFAULT));
 130         }
 131
 132         /*
 133          * allocate buffer req_array for the input info_reqs and copy them in
 134          */
 135         req_size = sizeof (uint_t) * info_count;
 136         req_array = kmem_alloc(req_size, KM_SLEEP);
 137         if (copyin(minfo.mi_info_req, req_array, req_size)) {
 138                 kmem_free(req_array, req_size);
 139                 kmem_free(in_array, in_size);
 140                 return (set_errno(EFAULT));
 141         }
 142
 143         /*
 144          * allocate buffer out_array which holds the results and will have
 145          * to be copied out later
 146          */
 147         out_size = sizeof (uint64_t) * addr_count * info_count;
 148         out_array = kmem_alloc(out_size, KM_SLEEP);
 149
 150         /*
 151          * allocate buffer val_array which holds the validity bits and will
 152          * have to be copied out later
 153          */
 154         val_size = sizeof (uint_t) * addr_count;
 155         val_array = kmem_alloc(val_size, KM_SLEEP);
 156
 157         if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) {
 158                 /* find the corresponding lgroup for each physical address */
 159                 for (i = 0; i < addr_count; i++) {
 160                         paddr = in_array[i];
 161                         pfn = btop(paddr);
 162                         lgrp = lgrp_pfn_to_lgrp(pfn);
 163                         if (lgrp) {
 164                                 out_array[i] = lgrp->lgrp_id;
 165                                 val_array[i] = VALID_ADDR | VALID_REQ;
 166                         } else {
 167                                 out_array[i] = NULL;
 168                                 val_array[i] = 0;
 169                         }
 170                 }
 171         } else {
 172                 /* get the corresponding memory info for each virtual address */
 173                 as = curproc->p_as;
 174
 175                 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 176                 hat = as->a_hat;
 177                 for (i = out_idx = 0; i < addr_count; i++, out_idx +=
 178                     info_count) {
 179                         addr = in_array[i];
 180                         vaddr = (uintptr_t)(addr & ~PAGEOFFSET);
 181                         if (!as_segat(as, (caddr_t)vaddr)) {
 182                                 val_array[i] = 0;
 183                                 continue;
 184                         }
 185                         val_array[i] = VALID_ADDR;
 186                         pfn = hat_getpfnum(hat, (caddr_t)vaddr);
 187                         if (pfn != PFN_INVALID) {
 188                                 paddr = (uint64_t)((pfn << PAGESHIFT) |
 189                                         (addr & PAGEOFFSET));
 190                                 for (j = 0; j < info_count; j++) {
 191                                         switch (req_array[j] & MEMINFO_MASK) {
 192                                         case MEMINFO_VPHYSICAL:
 193                                                 /*
 194                                                  * return the physical address
 195                                                  * corresponding to the input
 196                                                  * virtual address
 197                                                  */
 198                                                 out_array[out_idx + j] = paddr;
 199                                                 val_array[i] |= VALID_REQ << j;
 200                                                 break;
 201                                         case MEMINFO_VLGRP:
 202                                                 /*
 203                                                  * return the lgroup of physical
 204                                                  * page corresponding to the
 205                                                  * input virtual address
 206                                                  */
 207                                                 lgrp = lgrp_pfn_to_lgrp(pfn);
 208                                                 if (lgrp) {
 209                                                         out_array[out_idx + j] =
 210                                                                 lgrp->lgrp_id;
 211                                                         val_array[i] |=
 212                                                                 VALID_REQ << j;
 213                                                 }
 214                                                 break;
 215                                         case MEMINFO_VPAGESIZE:
 216                                                 /*
 217                                                  * return the size of physical
 218                                                  * page corresponding to the
 219                                                  * input virtual address
 220                                                  */
 221                                                 pgsz = hat_getpagesize(hat,
 222                                                         (caddr_t)vaddr);
 223                                                 if (pgsz != -1) {
 224                                                         out_array[out_idx + j] =
 225                                                                         pgsz;
 226                                                         val_array[i] |=
 227                                                                 VALID_REQ << j;
 228                                                 }
 229                                                 break;
 230                                         case MEMINFO_VREPLCNT:
 231                                                 /*
 232                                                  * for future use:-
 233                                                  * return the no. replicated
 234                                                  * physical pages corresponding
 235                                                  * to the input virtual address,
 236                                                  * so it is always 0 at the
 237                                                  * moment
 238                                                  */
 239                                                 out_array[out_idx + j] = 0;
 240                                                 val_array[i] |= VALID_REQ << j;
 241                                                 break;
 242                                         case MEMINFO_VREPL:
 243                                                 /*
 244                                                  * for future use:-
 245                                                  * return the nth physical
 246                                                  * replica of the specified
 247                                                  * virtual address
 248                                                  */
 249                                                 break;
 250                                         case MEMINFO_VREPL_LGRP:
 251                                                 /*
 252                                                  * for future use:-
 253                                                  * return the lgroup of nth
 254                                                  * physical replica of the
 255                                                  * specified virtual address
 256                                                  */
 257                                                 break;
 258                                         case MEMINFO_PLGRP:
 259                                                 /*
 260                                                  * this is for physical address
 261                                                  * only, shouldn't mix with
 262                                                  * virtual address
 263                                                  */
 264                                                 break;
 265                                         default:
 266                                                 break;
 267                                         }
 268                                 }
 269                         }
 270                 }
 271                 AS_LOCK_EXIT(as, &as->a_lock);
 272         }
 273
 274         /* copy out the results and validity bits and free the buffers */
 275         if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) ||
 276                 (copyout(val_array, minfo.mi_validity, val_size) != 0))
 277                 ret = set_errno(EFAULT);
 278
 279         kmem_free(in_array, in_size);
 280         kmem_free(out_array, out_size);
 281         kmem_free(req_array, req_size);
 282         kmem_free(val_array, val_size);
 283
 284         return (ret);
 285 }
 286
 287
 288 /*
 289  * Initialize lgroup affinities for thread
 290  */
 291 void
 292 lgrp_affinity_init(lgrp_affinity_t **bufaddr)
 293 {
 294         if (bufaddr)
 295                 *bufaddr = NULL;
 296 }
 297
 298
 299 /*
 300  * Free lgroup affinities for thread and set to NULL
 301  * just in case thread gets recycled
 302  */
 303 void
 304 lgrp_affinity_free(lgrp_affinity_t **bufaddr)
 305 {
 306         if (bufaddr && *bufaddr) {
 307                 kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t));
 308                 *bufaddr = NULL;
 309         }
 310 }
 311
 312
 313 #define P_ANY   -2      /* cookie specifying any ID */
 314
 315
 316 /*
 317  * Find LWP with given ID in specified process and get its affinity for
 318  * specified lgroup
 319  */
 320 lgrp_affinity_t
 321 lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp)
 322 {
 323         lgrp_affinity_t aff;
 324         int             found;
 325         kthread_t       *t;
 326
 327         ASSERT(MUTEX_HELD(&p->p_lock));
 328
 329         aff = LGRP_AFF_NONE;
 330         found = 0;
 331         t = p->p_tlist;
 332         /*
 333          * The process may be executing in proc_exit() and its p->p_list may be
 334          * already NULL.
 335          */
 336         if (t == NULL)
 337                 return (set_errno(ESRCH));
 338
 339         do {
 340                 if (t->t_tid == lwpid || lwpid == P_ANY) {
 341                         thread_lock(t);
 342                         /*
 343                          * Check to see whether caller has permission to set
 344                          * affinity for LWP
 345                          */
 346                         if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
 347                                 thread_unlock(t);
 348                                 return (set_errno(EPERM));
 349                         }
 350
 351                         if (t->t_lgrp_affinity)
 352                                 aff = t->t_lgrp_affinity[lgrp];
 353                         thread_unlock(t);
 354                         found = 1;
 355                         break;
 356                 }
 357         } while ((t = t->t_forw) != p->p_tlist);
 358         if (!found)
 359                 aff = set_errno(ESRCH);
 360
 361         return (aff);
 362 }
 363
 364
 365 /*
 366  * Get lgroup affinity for given LWP
 367  */
 368 lgrp_affinity_t
 369 lgrp_affinity_get(lgrp_affinity_args_t *ap)
 370 {
 371         lgrp_affinity_t         aff;
 372         lgrp_affinity_args_t    args;
 373         id_t                    id;
 374         idtype_t                idtype;
 375         lgrp_id_t               lgrp;
 376         proc_t                  *p;
 377         kthread_t               *t;
 378
 379         /*
 380          * Copyin arguments
 381          */
 382         if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
 383                 return (set_errno(EFAULT));
 384
 385         id = args.id;
 386         idtype = args.idtype;
 387         lgrp = args.lgrp;
 388
 389         /*
 390          * Check for invalid lgroup
 391          */
 392         if (lgrp < 0 || lgrp == LGRP_NONE)
 393                 return (set_errno(EINVAL));
 394
 395         /*
 396          * Check for existing lgroup
 397          */
 398         if (lgrp > lgrp_alloc_max)
 399                 return (set_errno(ESRCH));
 400
 401         /*
 402          * Get lgroup affinity for given LWP or process
 403          */
 404         switch (idtype) {
 405
 406         case P_LWPID:
 407                 /*
 408                  * LWP in current process
 409                  */
 410                 p = curproc;
 411                 mutex_enter(&p->p_lock);
 412                 if (id != P_MYID)       /* different thread */
 413                         aff = lgrp_affinity_get_thread(p, id, lgrp);
 414                 else {                  /* current thread */
 415                         aff = LGRP_AFF_NONE;
 416                         t = curthread;
 417                         thread_lock(t);
 418                         if (t->t_lgrp_affinity)
 419                                 aff = t->t_lgrp_affinity[lgrp];
 420                         thread_unlock(t);
 421                 }
 422                 mutex_exit(&p->p_lock);
 423                 break;
 424
 425         case P_PID:
 426                 /*
 427                  * Process
 428                  */
 429                 mutex_enter(&pidlock);
 430
 431                 if (id == P_MYID)
 432                         p = curproc;
 433                 else {
 434                         p = prfind(id);
 435                         if (p == NULL) {
 436                                 mutex_exit(&pidlock);
 437                                 return (set_errno(ESRCH));
 438                         }
 439                 }
 440
 441                 mutex_enter(&p->p_lock);
 442                 aff = lgrp_affinity_get_thread(p, P_ANY, lgrp);
 443                 mutex_exit(&p->p_lock);
 444
 445                 mutex_exit(&pidlock);
 446                 break;
 447
 448         default:
 449                 aff = set_errno(EINVAL);
 450                 break;
 451         }
 452
 453         return (aff);
 454 }
 455
 456
 457 /*
 458  * Find lgroup for which this thread has most affinity in specified partition
 459  * starting from home lgroup unless specified starting lgroup is preferred
 460  */
 461 lpl_t *
 462 lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start,
 463     boolean_t prefer_start)
 464 {
 465         lgrp_affinity_t *affs;
 466         lgrp_affinity_t best_aff;
 467         lpl_t           *best_lpl;
 468         lgrp_id_t       finish;
 469         lgrp_id_t       home;
 470         lgrp_id_t       lgrpid;
 471         lpl_t           *lpl;
 472
 473         ASSERT(t != NULL);
 474         ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) ||
 475             (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t)));
 476         ASSERT(cpupart != NULL);
 477
 478         if (t->t_lgrp_affinity == NULL)
 479                 return (NULL);
 480
 481         affs = t->t_lgrp_affinity;
 482
 483         /*
 484          * Thread bound to CPU
 485          */
 486         if (t->t_bind_cpu != PBIND_NONE) {
 487                 cpu_t   *cp;
 488
 489                 /*
 490                  * Find which lpl has most affinity among leaf lpl directly
 491                  * containing CPU and its ancestor lpls
 492                  */
 493                 cp = cpu[t->t_bind_cpu];
 494
 495                 best_lpl = lpl = cp->cpu_lpl;
 496                 best_aff = affs[best_lpl->lpl_lgrpid];
 497                 while (lpl->lpl_parent != NULL) {
 498                         lpl = lpl->lpl_parent;
 499                         lgrpid = lpl->lpl_lgrpid;
 500                         if (affs[lgrpid] > best_aff) {
 501                                 best_lpl = lpl;
 502                                 best_aff = affs[lgrpid];
 503                         }
 504                 }
 505                 return (best_lpl);
 506         }
 507
 508         /*
 509          * Start searching from home lgroup unless given starting lgroup is
 510          * preferred or home lgroup isn't in given pset.  Use root lgroup as
 511          * starting point if both home and starting lgroups aren't in given
 512          * pset.
 513          */
 514         ASSERT(start >= 0 && start <= lgrp_alloc_max);
 515         home = t->t_lpl->lpl_lgrpid;
 516         if (!prefer_start && LGRP_CPUS_IN_PART(home, cpupart))
 517                 lgrpid = home;
 518         else if (start != LGRP_NONE && LGRP_CPUS_IN_PART(start, cpupart))
 519                 lgrpid = start;
 520         else
 521                 lgrpid = LGRP_ROOTID;
 522
 523         best_lpl = &cpupart->cp_lgrploads[lgrpid];
 524         best_aff = affs[lgrpid];
 525         finish = lgrpid;
 526         do {
 527                 /*
 528                  * Skip any lgroups that don't have CPU resources
 529                  * in this processor set.
 530                  */
 531                 if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) {
 532                         if (++lgrpid > lgrp_alloc_max)
 533                                 lgrpid = 0;     /* wrap the search */
 534                         continue;
 535                 }
 536
 537                 /*
 538                  * Find lgroup with most affinity
 539                  */
 540                 lpl = &cpupart->cp_lgrploads[lgrpid];
 541                 if (affs[lgrpid] > best_aff) {
 542                         best_aff = affs[lgrpid];
 543                         best_lpl = lpl;
 544                 }
 545
 546                 if (++lgrpid > lgrp_alloc_max)
 547                         lgrpid = 0;     /* wrap the search */
 548
 549         } while (lgrpid != finish);
 550
 551         /*
 552          * No lgroup (in this pset) with any affinity
 553          */
 554         if (best_aff == LGRP_AFF_NONE)
 555                 return (NULL);
 556
 557         lgrpid = best_lpl->lpl_lgrpid;
 558         ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0);
 559
 560         return (best_lpl);
 561 }
 562
 563
 564 /*
 565  * Set thread's affinity for given lgroup
 566  */
 567 int
 568 lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff,
 569     lgrp_affinity_t **aff_buf)
 570 {
 571         lgrp_affinity_t *affs;
 572         lgrp_id_t       best;
 573         lpl_t           *best_lpl;
 574         lgrp_id_t       home;
 575         int             retval;
 576
 577         ASSERT(t != NULL);
 578         ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
 579
 580         retval = 0;
 581
 582         thread_lock(t);
 583
 584         /*
 585          * Check to see whether caller has permission to set affinity for
 586          * thread
 587          */
 588         if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
 589                 thread_unlock(t);
 590                 return (set_errno(EPERM));
 591         }
 592
 593         if (t->t_lgrp_affinity == NULL) {
 594                 if (aff == LGRP_AFF_NONE) {
 595                         thread_unlock(t);
 596                         return (0);
 597                 }
 598                 ASSERT(aff_buf != NULL && *aff_buf != NULL);
 599                 t->t_lgrp_affinity = *aff_buf;
 600                 *aff_buf = NULL;
 601         }
 602
 603         affs = t->t_lgrp_affinity;
 604         affs[lgrp] = aff;
 605
 606         /*
 607          * Find lgroup for which thread has most affinity,
 608          * starting with lgroup for which affinity being set
 609          */
 610         best_lpl = lgrp_affinity_best(t, t->t_cpupart, lgrp, B_TRUE);
 611
 612         /*
 613          * Rehome if found lgroup with more affinity than home or lgroup for
 614          * which affinity is being set has same affinity as home
 615          */
 616         home = t->t_lpl->lpl_lgrpid;
 617         if (best_lpl != NULL && best_lpl != t->t_lpl) {
 618                 best = best_lpl->lpl_lgrpid;
 619                 if (affs[best] > affs[home] || (affs[best] == affs[home] &&
 620                     best == lgrp))
 621                         lgrp_move_thread(t, best_lpl, 1);
 622         }
 623
 624         thread_unlock(t);
 625
 626         return (retval);
 627 }
 628
 629
 630 /*
 631  * Set process' affinity for specified lgroup
 632  */
 633 int
 634 lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff,
 635     lgrp_affinity_t **aff_buf_array)
 636 {
 637         lgrp_affinity_t *buf;
 638         int             err = 0;
 639         int             i;
 640         int             retval;
 641         kthread_t       *t;
 642
 643         ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock));
 644         ASSERT(aff_buf_array != NULL);
 645
 646         i = 0;
 647         t = p->p_tlist;
 648         if (t != NULL) {
 649                 do {
 650                         /*
 651                          * Set lgroup affinity for thread
 652                          */
 653                         buf = aff_buf_array[i];
 654                         retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf);
 655
 656                         if (err == 0 && retval != 0)
 657                                 err = retval;
 658
 659                         /*
 660                          * Advance pointer to next buffer
 661                          */
 662                         if (buf == NULL) {
 663                                 ASSERT(i < p->p_lwpcnt);
 664                                 aff_buf_array[i] = NULL;
 665                                 i++;
 666                         }
 667
 668                 } while ((t = t->t_forw) != p->p_tlist);
 669         }
 670         return (err);
 671 }
 672
 673
 674 /*
 675  * Set LWP's or process' affinity for specified lgroup
 676  *
 677  * When setting affinities, pidlock, process p_lock, and thread_lock()
 678  * need to be held in that order to protect target thread's pset, process,
 679  * process contents, and thread contents.  thread_lock() does splhigh(),
 680  * so it ends up having similiar effect as kpreempt_disable(), so it will
 681  * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes.
 682  */
 683 int
 684 lgrp_affinity_set(lgrp_affinity_args_t *ap)
 685 {
 686         lgrp_affinity_t         aff;
 687         lgrp_affinity_t         *aff_buf;
 688         lgrp_affinity_args_t    args;
 689         id_t                    id;
 690         idtype_t                idtype;
 691         lgrp_id_t               lgrp;
 692         int                     nthreads;
 693         proc_t                  *p;
 694         int                     retval;
 695
 696         /*
 697          * Copyin arguments
 698          */
 699         if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
 700                 return (set_errno(EFAULT));
 701
 702         idtype = args.idtype;
 703         id = args.id;
 704         lgrp = args.lgrp;
 705         aff = args.aff;
 706
 707         /*
 708          * Check for invalid lgroup
 709          */
 710         if (lgrp < 0 || lgrp == LGRP_NONE)
 711                 return (set_errno(EINVAL));
 712
 713         /*
 714          * Check for existing lgroup
 715          */
 716         if (lgrp > lgrp_alloc_max)
 717                 return (set_errno(ESRCH));
 718
 719         /*
 720          * Check for legal affinity
 721          */
 722         if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK &&
 723             aff != LGRP_AFF_STRONG)
 724                 return (set_errno(EINVAL));
 725
 726         /*
 727          * Must be process or LWP ID
 728          */
 729         if (idtype != P_LWPID && idtype != P_PID)
 730                 return (set_errno(EINVAL));
 731
 732         /*
 733          * Set given LWP's or process' affinity for specified lgroup
 734          */
 735         switch (idtype) {
 736
 737         case P_LWPID:
 738                 /*
 739                  * Allocate memory for thread's lgroup affinities
 740                  * ahead of time w/o holding locks
 741                  */
 742                 aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t),
 743                     KM_SLEEP);
 744
 745                 p = curproc;
 746
 747                 /*
 748                  * Set affinity for thread
 749                  */
 750                 mutex_enter(&p->p_lock);
 751                 if (id == P_MYID) {             /* current thread */
 752                         retval = lgrp_affinity_set_thread(curthread, lgrp, aff,
 753                             &aff_buf);
 754                 } else if (p->p_tlist == NULL) {
 755                         retval = set_errno(ESRCH);
 756                 } else {                        /* other thread */
 757                         int             found = 0;
 758                         kthread_t       *t;
 759
 760                         t = p->p_tlist;
 761                         do {
 762                                 if (t->t_tid == id) {
 763                                         retval = lgrp_affinity_set_thread(t,
 764                                             lgrp, aff, &aff_buf);
 765                                         found = 1;
 766                                         break;
 767                                 }
 768                         } while ((t = t->t_forw) != p->p_tlist);
 769                         if (!found)
 770                                 retval = set_errno(ESRCH);
 771                 }
 772                 mutex_exit(&p->p_lock);
 773
 774                 /*
 775                  * Free memory for lgroup affinities,
 776                  * since thread didn't need it
 777                  */
 778                 if (aff_buf)
 779                         kmem_free(aff_buf,
 780                             nlgrpsmax * sizeof (lgrp_affinity_t));
 781
 782                 break;
 783
 784         case P_PID:
 785
 786                 do {
 787                         lgrp_affinity_t **aff_buf_array;
 788                         int             i;
 789                         size_t          size;
 790
 791                         /*
 792                          * Get process
 793                          */
 794                         mutex_enter(&pidlock);
 795
 796                         if (id == P_MYID)
 797                                 p = curproc;
 798                         else
 799                                 p = prfind(id);
 800
 801                         if (p == NULL) {
 802                                 mutex_exit(&pidlock);
 803                                 return (set_errno(ESRCH));
 804                         }
 805
 806                         /*
 807                          * Get number of threads in process
 808                          *
 809                          * NOTE: Only care about user processes,
 810                          *       so p_lwpcnt should be number of threads.
 811                          */
 812                         mutex_enter(&p->p_lock);
 813                         nthreads = p->p_lwpcnt;
 814                         mutex_exit(&p->p_lock);
 815
 816                         mutex_exit(&pidlock);
 817
 818                         if (nthreads < 1)
 819                                 return (set_errno(ESRCH));
 820
 821                         /*
 822                          * Preallocate memory for lgroup affinities for
 823                          * each thread in process now to avoid holding
 824                          * any locks.  Allocate an array to hold a buffer
 825                          * for each thread.
 826                          */
 827                         aff_buf_array = kmem_zalloc(nthreads *
 828                             sizeof (lgrp_affinity_t *), KM_SLEEP);
 829
 830                         size = nlgrpsmax * sizeof (lgrp_affinity_t);
 831                         for (i = 0; i < nthreads; i++)
 832                                 aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP);
 833
 834                         mutex_enter(&pidlock);
 835
 836                         /*
 837                          * Get process again since dropped locks to allocate
 838                          * memory (except current process)
 839                          */
 840                         if (id != P_MYID)
 841                                 p = prfind(id);
 842
 843                         /*
 844                          * Process went away after we dropped locks and before
 845                          * reacquiring them, so drop locks, free memory, and
 846                          * return.
 847                          */
 848                         if (p == NULL) {
 849                                 mutex_exit(&pidlock);
 850                                 for (i = 0; i < nthreads; i++)
 851                                         kmem_free(aff_buf_array[i], size);
 852                                 kmem_free(aff_buf_array,
 853                                     nthreads * sizeof (lgrp_affinity_t *));
 854                                 return (set_errno(ESRCH));
 855                         }
 856
 857                         mutex_enter(&p->p_lock);
 858
 859                         /*
 860                          * See whether number of threads is same
 861                          * If not, drop locks, free memory, and try again
 862                          */
 863                         if (nthreads != p->p_lwpcnt) {
 864                                 mutex_exit(&p->p_lock);
 865                                 mutex_exit(&pidlock);
 866                                 for (i = 0; i < nthreads; i++)
 867                                         kmem_free(aff_buf_array[i], size);
 868                                 kmem_free(aff_buf_array,
 869                                     nthreads * sizeof (lgrp_affinity_t *));
 870                                 continue;
 871                         }
 872
 873                         /*
 874                          * Set lgroup affinity for threads in process
 875                          */
 876                         retval = lgrp_affinity_set_proc(p, lgrp, aff,
 877                             aff_buf_array);
 878
 879                         mutex_exit(&p->p_lock);
 880                         mutex_exit(&pidlock);
 881
 882                         /*
 883                          * Free any leftover memory, since some threads may
 884                          * have already allocated memory and set lgroup
 885                          * affinities before
 886                          */
 887                         for (i = 0; i < nthreads; i++)
 888                                 if (aff_buf_array[i] != NULL)
 889                                         kmem_free(aff_buf_array[i], size);
 890                         kmem_free(aff_buf_array,
 891                             nthreads * sizeof (lgrp_affinity_t *));
 892
 893                         break;
 894
 895                 } while (nthreads != p->p_lwpcnt);
 896
 897                 break;
 898
 899         default:
 900                 retval = set_errno(EINVAL);
 901                 break;
 902         }
 903
 904         return (retval);
 905 }
 906
 907
 908 /*
 909  * Return the latest generation number for the lgroup hierarchy
 910  * with the given view
 911  */
 912 lgrp_gen_t
 913 lgrp_generation(lgrp_view_t view)
 914 {
 915         cpupart_t       *cpupart;
 916         uint_t          gen;
 917
 918         kpreempt_disable();
 919
 920         /*
 921          * Determine generation number for given view
 922          */
 923         if (view == LGRP_VIEW_OS)
 924                 /*
 925                  * Return generation number of lgroup hierarchy for OS view
 926                  */
 927                 gen = lgrp_gen;
 928         else {
 929                 /*
 930                  * For caller's view, use generation numbers for lgroup
 931                  * hierarchy and caller's pset
 932                  * NOTE: Caller needs to check for change in pset ID
 933                  */
 934                 cpupart = curthread->t_cpupart;
 935                 ASSERT(cpupart);
 936                 gen = lgrp_gen + cpupart->cp_gen;
 937         }
 938
 939         kpreempt_enable();
 940
 941         return (gen);
 942 }
 943
 944
 945 lgrp_id_t
 946 lgrp_home_thread(kthread_t *t)
 947 {
 948         lgrp_id_t       home;
 949
 950         ASSERT(t != NULL);
 951         ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
 952
 953         thread_lock(t);
 954
 955         /*
 956          * Check to see whether caller has permission to set affinity for
 957          * thread
 958          */
 959         if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
 960                 thread_unlock(t);
 961                 return (set_errno(EPERM));
 962         }
 963
 964         home = lgrp_home_id(t);
 965
 966         thread_unlock(t);
 967         return (home);
 968 }
 969
 970
 971 /*
 972  * Get home lgroup of given process or thread
 973  */
 974 lgrp_id_t
 975 lgrp_home_get(idtype_t idtype, id_t id)
 976 {
 977         proc_t          *p;
 978         lgrp_id_t       retval;
 979         kthread_t       *t;
 980
 981         /*
 982          * Get home lgroup of given LWP or process
 983          */
 984         switch (idtype) {
 985
 986         case P_LWPID:
 987                 p = curproc;
 988
 989                 /*
 990                  * Set affinity for thread
 991                  */
 992                 mutex_enter(&p->p_lock);
 993                 if (id == P_MYID) {             /* current thread */
 994                         retval = lgrp_home_thread(curthread);
 995                 } else if (p->p_tlist == NULL) {
 996                         retval = set_errno(ESRCH);
 997                 } else {                        /* other thread */
 998                         int     found = 0;
 999
1000                         t = p->p_tlist;
1001                         do {
1002                                 if (t->t_tid == id) {
1003                                         retval = lgrp_home_thread(t);
1004                                         found = 1;
1005                                         break;
1006                                 }
1007                         } while ((t = t->t_forw) != p->p_tlist);
1008                         if (!found)
1009                                 retval = set_errno(ESRCH);
1010                 }
1011                 mutex_exit(&p->p_lock);
1012                 break;
1013
1014         case P_PID:
1015                 /*
1016                  * Get process
1017                  */
1018                 mutex_enter(&pidlock);
1019
1020                 if (id == P_MYID)
1021                         p = curproc;
1022                 else
1023                         p = prfind(id);
1024
1025                 if (p == NULL) {
1026                         mutex_exit(&pidlock);
1027                         return (set_errno(ESRCH));
1028                 }
1029
1030                 mutex_enter(&p->p_lock);
1031                 t = p->p_tlist;
1032                 if (t == NULL)
1033                         retval = set_errno(ESRCH);
1034                 else
1035                         retval = lgrp_home_thread(t);
1036                 mutex_exit(&p->p_lock);
1037
1038                 mutex_exit(&pidlock);
1039
1040                 break;
1041
1042         default:
1043                 retval = set_errno(EINVAL);
1044                 break;
1045         }
1046
1047         return (retval);
1048 }
1049
1050
1051 /*
1052  * Return latency between "from" and "to" lgroups
1053  *
1054  * This latency number can only be used for relative comparison
1055  * between lgroups on the running system, cannot be used across platforms,
1056  * and may not reflect the actual latency.  It is platform and implementation
1057  * specific, so platform gets to decide its value.  It would be nice if the
1058  * number was at least proportional to make comparisons more meaningful though.
1059  */
1060 int
1061 lgrp_latency(lgrp_id_t from, lgrp_id_t to)
1062 {
1063         lgrp_t          *from_lgrp;
1064         int             i;
1065         int             latency;
1066         int             latency_max;
1067         lgrp_t          *to_lgrp;
1068
1069         ASSERT(MUTEX_HELD(&cpu_lock));
1070
1071         if (from < 0 || to < 0)
1072                 return (set_errno(EINVAL));
1073
1074         if (from > lgrp_alloc_max || to > lgrp_alloc_max)
1075                 return (set_errno(ESRCH));
1076
1077         from_lgrp = lgrp_table[from];
1078         to_lgrp = lgrp_table[to];
1079
1080         if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) {
1081                 return (set_errno(ESRCH));
1082         }
1083
1084         /*
1085          * Get latency for same lgroup
1086          */
1087         if (from == to) {
1088                 latency = from_lgrp->lgrp_latency;
1089                 return (latency);
1090         }
1091
1092         /*
1093          * Get latency between leaf lgroups
1094          */
1095         if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0)
1096                 return (lgrp_plat_latency(from_lgrp->lgrp_plathand,
1097                     to_lgrp->lgrp_plathand));
1098
1099         /*
1100          * Determine max latency between resources in two lgroups
1101          */
1102         latency_max = 0;
1103         for (i = 0; i <= lgrp_alloc_max; i++) {
1104                 lgrp_t  *from_rsrc;
1105                 int     j;
1106                 lgrp_t  *to_rsrc;
1107
1108                 from_rsrc = lgrp_table[i];
1109                 if (!LGRP_EXISTS(from_rsrc) ||
1110                     !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i))
1111                         continue;
1112
1113                 for (j = 0; j <= lgrp_alloc_max; j++) {
1114                         to_rsrc = lgrp_table[j];
1115                         if (!LGRP_EXISTS(to_rsrc) ||
1116                             klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM],
1117                             j) == 0)
1118                                 continue;
1119                         latency = lgrp_plat_latency(from_rsrc->lgrp_plathand,
1120                             to_rsrc->lgrp_plathand);
1121                         if (latency > latency_max)
1122                                 latency_max = latency;
1123                 }
1124         }
1125         return (latency_max);
1126 }
1127
1128
1129 /*
1130  * Return lgroup interface version number
1131  * 0 - none
1132  * 1 - original
1133  * 2 - lgrp_latency_cookie() and lgrp_resources() added
1134  */
1135 int
1136 lgrp_version(int version)
1137 {
1138         /*
1139          * Return LGRP_VER_NONE when requested version isn't supported
1140          */
1141         if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT)
1142                 return (LGRP_VER_NONE);
1143
1144         /*
1145          * Return current version when LGRP_VER_NONE passed in
1146          */
1147         if (version == LGRP_VER_NONE)
1148                 return (LGRP_VER_CURRENT);
1149
1150         /*
1151          * Otherwise, return supported version.
1152          */
1153         return (version);
1154 }
1155
1156
1157 /*
1158  * Snapshot of lgroup hieararchy
1159  *
1160  * One snapshot is kept and is based on the kernel's native data model, so
1161  * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the
1162  * 64-bit kernel.  If a 32-bit user wants a snapshot from the 64-bit kernel,
1163  * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot.
1164  *
1165  * The format is defined by lgroup snapshot header and the layout of
1166  * the snapshot in memory is as follows:
1167  * 1) lgroup snapshot header
1168  *    - specifies format of snapshot
1169  *    - defined by lgrp_snapshot_header_t
1170  * 2) lgroup info array
1171  *    - contains information about each lgroup
1172  *    - one element for each lgroup
1173  *    - each element is defined by lgrp_info_t
1174  * 3) lgroup CPU ID array
1175  *    - contains list (array) of CPU IDs for each lgroup
1176  *    - lgrp_info_t points into array and specifies how many CPUs belong to
1177  *      given lgroup
1178  * 4) lgroup parents array
1179  *    - contains lgroup bitmask of parents for each lgroup
1180  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1181  * 5) lgroup children array
1182  *    - contains lgroup bitmask of children for each lgroup
1183  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1184  * 6) lgroup resources array
1185  *    - contains lgroup bitmask of resources for each lgroup
1186  *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1187  * 7) lgroup latency table
1188  *    - contains latency from each lgroup to each of other lgroups
1189  *
1190  * NOTE:  Must use nlgrpsmax for per lgroup data structures because lgroups
1191  *        may be sparsely allocated.
1192  */
1193 lgrp_snapshot_header_t  *lgrp_snap = NULL;      /* lgroup snapshot */
1194 static kmutex_t         lgrp_snap_lock;         /* snapshot lock */
1195
1196
1197 /*
1198  * Take a snapshot of lgroup hierarchy and return size of buffer
1199  * needed to hold snapshot
1200  */
1201 static int
1202 lgrp_snapshot(void)
1203 {
1204         size_t          bitmask_size;
1205         size_t          bitmasks_size;
1206         size_t          bufsize;
1207         int             cpu_index;
1208         size_t          cpuids_size;
1209         int             i;
1210         int             j;
1211         size_t          info_size;
1212         size_t          lats_size;
1213         ulong_t         *lgrp_children;
1214         processorid_t   *lgrp_cpuids;
1215         lgrp_info_t     *lgrp_info;
1216         int             **lgrp_lats;
1217         ulong_t         *lgrp_parents;
1218         ulong_t         *lgrp_rsets;
1219         ulong_t         *lgrpset;
1220         int             snap_ncpus;
1221         int             snap_nlgrps;
1222         int             snap_nlgrpsmax;
1223         size_t          snap_hdr_size;
1224 #ifdef  _SYSCALL32_IMPL
1225         model_t         model = DATAMODEL_NATIVE;
1226
1227         /*
1228          * Have up-to-date snapshot, so check to see whether caller is 32-bit
1229          * program and need to return size of 32-bit snapshot now.
1230          */
1231         model = get_udatamodel();
1232         if (model == DATAMODEL_ILP32 && lgrp_snap &&
1233             lgrp_snap->ss_gen == lgrp_gen) {
1234
1235                 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1236
1237                 /*
1238                  * Calculate size of buffer needed for 32-bit snapshot,
1239                  * rounding up size of each object to allow for alignment
1240                  * of next object in buffer.
1241                  */
1242                 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1243                     sizeof (caddr32_t));
1244                 info_size =
1245                     P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1246                     sizeof (processorid_t));
1247                 cpuids_size =
1248                     P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1249                     sizeof (ulong_t));
1250
1251                 /*
1252                  * lgroup bitmasks needed for parents, children, and resources
1253                  * for each lgroup and pset lgroup set
1254                  */
1255                 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1256                 bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1257                     snap_nlgrpsmax) + 1) * bitmask_size;
1258
1259                 /*
1260                  * Size of latency table and buffer
1261                  */
1262                 lats_size = snap_nlgrpsmax * sizeof (caddr32_t) +
1263                     snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1264
1265                 bufsize = snap_hdr_size + info_size + cpuids_size +
1266                     bitmasks_size + lats_size;
1267                 return (bufsize);
1268         }
1269 #endif  /* _SYSCALL32_IMPL */
1270
1271         /*
1272          * Check whether snapshot is up-to-date
1273          * Free it and take another one if not
1274          */
1275         if (lgrp_snap) {
1276                 if (lgrp_snap->ss_gen == lgrp_gen)
1277                         return (lgrp_snap->ss_size);
1278
1279                 kmem_free(lgrp_snap, lgrp_snap->ss_size);
1280                 lgrp_snap = NULL;
1281         }
1282
1283         /*
1284          * Allocate memory for snapshot
1285          * w/o holding cpu_lock while waiting for memory
1286          */
1287         while (lgrp_snap == NULL) {
1288                 int     old_generation;
1289
1290                 /*
1291                  * Take snapshot of lgroup generation number
1292                  * and configuration size dependent information
1293                  * NOTE: Only count number of online CPUs,
1294                  * since only online CPUs appear in lgroups.
1295                  */
1296                 mutex_enter(&cpu_lock);
1297                 old_generation = lgrp_gen;
1298                 snap_ncpus = ncpus_online;
1299                 snap_nlgrps = nlgrps;
1300                 snap_nlgrpsmax = nlgrpsmax;
1301                 mutex_exit(&cpu_lock);
1302
1303                 /*
1304                  * Calculate size of buffer needed for snapshot,
1305                  * rounding up size of each object to allow for alignment
1306                  * of next object in buffer.
1307                  */
1308                 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1309                     sizeof (void *));
1310                 info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1311                     sizeof (processorid_t));
1312                 cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1313                     sizeof (ulong_t));
1314                 /*
1315                  * lgroup bitmasks needed for pset lgroup set and  parents,
1316                  * children, and resource sets for each lgroup
1317                  */
1318                 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1319                 bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1320                     snap_nlgrpsmax) + 1) * bitmask_size;
1321
1322                 /*
1323                  * Size of latency table and buffer
1324                  */
1325                 lats_size = snap_nlgrpsmax * sizeof (int *) +
1326                     snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1327
1328                 bufsize = snap_hdr_size + info_size + cpuids_size +
1329                     bitmasks_size + lats_size;
1330
1331                 /*
1332                  * Allocate memory for buffer
1333                  */
1334                 lgrp_snap = kmem_zalloc(bufsize, KM_NOSLEEP);
1335                 if (lgrp_snap == NULL)
1336                         return (set_errno(ENOMEM));
1337
1338                 /*
1339                  * Check whether generation number has changed
1340                  */
1341                 mutex_enter(&cpu_lock);
1342                 if (lgrp_gen == old_generation)
1343                         break;          /* hasn't change, so done. */
1344
1345                 /*
1346                  * Generation number changed, so free memory and try again.
1347                  */
1348                 mutex_exit(&cpu_lock);
1349                 kmem_free(lgrp_snap, bufsize);
1350                 lgrp_snap = NULL;
1351         }
1352
1353         /*
1354          * Fill in lgroup snapshot header
1355          * (including pointers to tables of lgroup info, CPU IDs, and parents
1356          * and children)
1357          */
1358         lgrp_snap->ss_version = LGRP_VER_CURRENT;
1359
1360         /*
1361          * XXX For now, liblgrp only needs to know whether the hierarchy
1362          * XXX only has one level or not
1363          */
1364         if (snap_nlgrps == 1)
1365                 lgrp_snap->ss_levels = 1;
1366         else
1367                 lgrp_snap->ss_levels = 2;
1368
1369         lgrp_snap->ss_root = LGRP_ROOTID;
1370
1371         lgrp_snap->ss_nlgrps = lgrp_snap->ss_nlgrps_os = snap_nlgrps;
1372         lgrp_snap->ss_nlgrps_max = snap_nlgrpsmax;
1373         lgrp_snap->ss_ncpus = snap_ncpus;
1374         lgrp_snap->ss_gen = lgrp_gen;
1375         lgrp_snap->ss_view = LGRP_VIEW_OS;
1376         lgrp_snap->ss_pset = 0;         /* NOTE: caller should set if needed */
1377         lgrp_snap->ss_size = bufsize;
1378         lgrp_snap->ss_magic = (uintptr_t)lgrp_snap;
1379
1380         lgrp_snap->ss_info = lgrp_info =
1381             (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1382
1383         lgrp_snap->ss_cpuids = lgrp_cpuids =
1384             (processorid_t *)((uintptr_t)lgrp_info + info_size);
1385
1386         lgrp_snap->ss_lgrpset = lgrpset =
1387             (ulong_t *)((uintptr_t)lgrp_cpuids + cpuids_size);
1388
1389         lgrp_snap->ss_parents = lgrp_parents =
1390             (ulong_t *)((uintptr_t)lgrpset + bitmask_size);
1391
1392         lgrp_snap->ss_children = lgrp_children =
1393             (ulong_t *)((uintptr_t)lgrp_parents + (snap_nlgrpsmax *
1394             bitmask_size));
1395
1396         lgrp_snap->ss_rsets = lgrp_rsets =
1397             (ulong_t *)((uintptr_t)lgrp_children + (snap_nlgrpsmax *
1398             bitmask_size));
1399
1400         lgrp_snap->ss_latencies = lgrp_lats =
1401             (int **)((uintptr_t)lgrp_rsets + (LGRP_RSRC_COUNT *
1402                 snap_nlgrpsmax * bitmask_size));
1403
1404         /*
1405          * Fill in lgroup information
1406          */
1407         cpu_index = 0;
1408         for (i = 0; i < snap_nlgrpsmax; i++) {
1409                 struct cpu      *cp;
1410                 int             cpu_count;
1411                 struct cpu      *head;
1412                 int             k;
1413                 lgrp_t          *lgrp;
1414
1415                 lgrp = lgrp_table[i];
1416                 if (!LGRP_EXISTS(lgrp)) {
1417                         bzero(&lgrp_info[i], sizeof (lgrp_info[i]));
1418                         lgrp_info[i].info_lgrpid = LGRP_NONE;
1419                         continue;
1420                 }
1421
1422                 lgrp_info[i].info_lgrpid = i;
1423                 lgrp_info[i].info_latency = lgrp->lgrp_latency;
1424
1425                 /*
1426                  * Fill in parents, children, and lgroup resources
1427                  */
1428                 lgrp_info[i].info_parents =
1429                     (ulong_t *)((uintptr_t)lgrp_parents + (i * bitmask_size));
1430
1431                 if (lgrp->lgrp_parent)
1432                         BT_SET(lgrp_info[i].info_parents,
1433                             lgrp->lgrp_parent->lgrp_id);
1434
1435                 lgrp_info[i].info_children =
1436                     (ulong_t *)((uintptr_t)lgrp_children + (i * bitmask_size));
1437
1438                 for (j = 0; j < snap_nlgrpsmax; j++)
1439                         if (klgrpset_ismember(lgrp->lgrp_children, j))
1440                                 BT_SET(lgrp_info[i].info_children, j);
1441
1442                 lgrp_info[i].info_rset =
1443                     (ulong_t *)((uintptr_t)lgrp_rsets +
1444                     (i * LGRP_RSRC_COUNT * bitmask_size));
1445
1446                 for (j = 0; j < LGRP_RSRC_COUNT; j++) {
1447                         ulong_t *rset;
1448
1449                         rset = (ulong_t *)((uintptr_t)lgrp_info[i].info_rset +
1450                             (j * bitmask_size));
1451                         for (k = 0; k < snap_nlgrpsmax; k++)
1452                                 if (klgrpset_ismember(lgrp->lgrp_set[j], k))
1453                                         BT_SET(rset, k);
1454                 }
1455
1456                 /*
1457                  * Fill in CPU IDs
1458                  */
1459                 cpu_count = 0;
1460                 lgrp_info[i].info_cpuids = NULL;
1461                 cp = head = lgrp->lgrp_cpu;
1462                 if (head != NULL) {
1463                         lgrp_info[i].info_cpuids = &lgrp_cpuids[cpu_index];
1464                         do {
1465                                 lgrp_cpuids[cpu_index] = cp->cpu_id;
1466                                 cpu_index++;
1467                                 cpu_count++;
1468                                 cp = cp->cpu_next_lgrp;
1469                         } while (cp != head);
1470                 }
1471                 ASSERT(cpu_count == lgrp->lgrp_cpucnt);
1472                 lgrp_info[i].info_ncpus = cpu_count;
1473
1474                 /*
1475                  * Fill in memory sizes for lgroups that directly contain
1476                  * memory
1477                  */
1478                 if (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], i)) {
1479                         lgrp_info[i].info_mem_free =
1480                             lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1481                         lgrp_info[i].info_mem_install =
1482                             lgrp_mem_size(i, LGRP_MEM_SIZE_INSTALL);
1483                 }
1484
1485                 /*
1486                  * Fill in latency table and buffer
1487                  */
1488                 lgrp_lats[i] = (int *)((uintptr_t)lgrp_lats + snap_nlgrpsmax *
1489                     sizeof (int *) + i * snap_nlgrpsmax * sizeof (int));
1490                 for (j = 0; j < snap_nlgrpsmax; j++) {
1491                         lgrp_t  *to;
1492
1493                         to = lgrp_table[j];
1494                         if (!LGRP_EXISTS(to))
1495                                 continue;
1496                         lgrp_lats[i][j] = lgrp_latency(lgrp->lgrp_id,
1497                             to->lgrp_id);
1498                 }
1499         }
1500         ASSERT(cpu_index == snap_ncpus);
1501
1502
1503         mutex_exit(&cpu_lock);
1504
1505 #ifdef  _SYSCALL32_IMPL
1506         /*
1507          * Check to see whether caller is 32-bit program and need to return
1508          * size of 32-bit snapshot now that snapshot has been taken/updated.
1509          * May not have been able to do this earlier if snapshot was out of
1510          * date or didn't exist yet.
1511          */
1512         if (model == DATAMODEL_ILP32) {
1513
1514                 snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1515
1516                 /*
1517                  * Calculate size of buffer needed for 32-bit snapshot,
1518                  * rounding up size of each object to allow for alignment
1519                  * of next object in buffer.
1520                  */
1521                 snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1522                     sizeof (caddr32_t));
1523                 info_size =
1524                     P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1525                     sizeof (processorid_t));
1526                 cpuids_size =
1527                     P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1528                     sizeof (ulong_t));
1529
1530                 bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1531                 bitmasks_size = (((2 + LGRP_RSRC_COUNT) * snap_nlgrpsmax) +
1532                     1) * bitmask_size;
1533
1534
1535                 /*
1536                  * Size of latency table and buffer
1537                  */
1538                 lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1539                     (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1540
1541                 bufsize = snap_hdr_size + info_size + cpuids_size +
1542                     bitmasks_size + lats_size;
1543                 return (bufsize);
1544         }
1545 #endif  /* _SYSCALL32_IMPL */
1546
1547         return (lgrp_snap->ss_size);
1548 }
1549
1550
1551 /*
1552  * Copy snapshot into given user buffer, fix up any pointers in buffer to point
1553  * into user instead of kernel address space, and return size of buffer
1554  * needed to hold snapshot
1555  */
1556 static int
1557 lgrp_snapshot_copy(char *buf, size_t bufsize)
1558 {
1559         size_t                  bitmask_size;
1560         int                     cpu_index;
1561         size_t                  cpuids_size;
1562         int                     i;
1563         size_t                  info_size;
1564         lgrp_info_t             *lgrp_info;
1565         int                     retval;
1566         size_t                  snap_hdr_size;
1567         int                     snap_ncpus;
1568         int                     snap_nlgrpsmax;
1569         lgrp_snapshot_header_t  *user_snap;
1570         lgrp_info_t             *user_info;
1571         lgrp_info_t             *user_info_buffer;
1572         processorid_t           *user_cpuids;
1573         ulong_t                 *user_lgrpset;
1574         ulong_t                 *user_parents;
1575         ulong_t                 *user_children;
1576         int                     **user_lats;
1577         int                     **user_lats_buffer;
1578         ulong_t                 *user_rsets;
1579
1580         if (lgrp_snap == NULL)
1581                 return (0);
1582
1583         if (buf == NULL || bufsize <= 0)
1584                 return (lgrp_snap->ss_size);
1585
1586         /*
1587          * User needs to try getting size of buffer again
1588          * because given buffer size is too small.
1589          * The lgroup hierarchy may have changed after they asked for the size
1590          * but before the snapshot was taken.
1591          */
1592         if (bufsize < lgrp_snap->ss_size)
1593                 return (set_errno(EAGAIN));
1594
1595         snap_ncpus = lgrp_snap->ss_ncpus;
1596         snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1597
1598         /*
1599          * Fill in lgrpset now because caller may have change psets
1600          */
1601         kpreempt_disable();
1602         for (i = 0; i < snap_nlgrpsmax; i++) {
1603                 if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1604                     i)) {
1605                         BT_SET(lgrp_snap->ss_lgrpset, i);
1606                 }
1607         }
1608         kpreempt_enable();
1609
1610         /*
1611          * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs)
1612          * into user buffer all at once
1613          */
1614         if (copyout(lgrp_snap, buf, lgrp_snap->ss_size) != 0)
1615                 return (set_errno(EFAULT));
1616
1617         /*
1618          * Round up sizes of lgroup snapshot header and info for alignment
1619          */
1620         snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1621             sizeof (void *));
1622         info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1623             sizeof (processorid_t));
1624         cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1625             sizeof (ulong_t));
1626
1627         bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1628
1629         /*
1630          * Calculate pointers into user buffer for lgroup snapshot header,
1631          * info, and CPU IDs
1632          */
1633         user_snap = (lgrp_snapshot_header_t *)buf;
1634         user_info = (lgrp_info_t *)((uintptr_t)user_snap + snap_hdr_size);
1635         user_cpuids = (processorid_t *)((uintptr_t)user_info + info_size);
1636         user_lgrpset = (ulong_t *)((uintptr_t)user_cpuids + cpuids_size);
1637         user_parents = (ulong_t *)((uintptr_t)user_lgrpset + bitmask_size);
1638         user_children = (ulong_t *)((uintptr_t)user_parents +
1639             (snap_nlgrpsmax * bitmask_size));
1640         user_rsets = (ulong_t *)((uintptr_t)user_children +
1641             (snap_nlgrpsmax * bitmask_size));
1642         user_lats = (int **)((uintptr_t)user_rsets +
1643             (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size));
1644
1645         /*
1646          * Copyout magic number (ie. pointer to beginning of buffer)
1647          */
1648         if (copyout(&buf, &user_snap->ss_magic, sizeof (buf)) != 0)
1649                 return (set_errno(EFAULT));
1650
1651         /*
1652          * Fix up pointers in user buffer to point into user buffer
1653          * not kernel snapshot
1654          */
1655         if (copyout(&user_info, &user_snap->ss_info, sizeof (user_info)) != 0)
1656                 return (set_errno(EFAULT));
1657
1658         if (copyout(&user_cpuids, &user_snap->ss_cpuids,
1659             sizeof (user_cpuids)) != 0)
1660                 return (set_errno(EFAULT));
1661
1662         if (copyout(&user_lgrpset, &user_snap->ss_lgrpset,
1663             sizeof (user_lgrpset)) != 0)
1664                 return (set_errno(EFAULT));
1665
1666         if (copyout(&user_parents, &user_snap->ss_parents,
1667             sizeof (user_parents)) != 0)
1668                 return (set_errno(EFAULT));
1669
1670         if (copyout(&user_children, &user_snap->ss_children,
1671             sizeof (user_children)) != 0)
1672                 return (set_errno(EFAULT));
1673
1674         if (copyout(&user_rsets, &user_snap->ss_rsets,
1675             sizeof (user_rsets)) != 0)
1676                 return (set_errno(EFAULT));
1677
1678         if (copyout(&user_lats, &user_snap->ss_latencies,
1679             sizeof (user_lats)) != 0)
1680                 return (set_errno(EFAULT));
1681
1682         /*
1683          * Make copies of lgroup info and latency table, fix up pointers,
1684          * and then copy them into user buffer
1685          */
1686         user_info_buffer = kmem_zalloc(info_size, KM_NOSLEEP);
1687         if (user_info_buffer == NULL)
1688                 return (set_errno(ENOMEM));
1689
1690         user_lats_buffer = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1691             KM_NOSLEEP);
1692         if (user_lats_buffer == NULL) {
1693                 kmem_free(user_info_buffer, info_size);
1694                 return (set_errno(ENOMEM));
1695         }
1696
1697         lgrp_info = (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1698         bcopy(lgrp_info, user_info_buffer, info_size);
1699
1700         cpu_index = 0;
1701         for (i = 0; i < snap_nlgrpsmax; i++) {
1702                 ulong_t *snap_rset;
1703
1704                 /*
1705                  * Skip non-existent lgroups
1706                  */
1707                 if (user_info_buffer[i].info_lgrpid == LGRP_NONE)
1708                         continue;
1709
1710                 /*
1711                  * Update free memory size since it changes frequently
1712                  * Only do so for lgroups directly containing memory
1713                  *
1714                  * NOTE: This must be done before changing the pointers to
1715                  *       point into user space since we need to dereference
1716                  *       lgroup resource set
1717                  */
1718                 snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
1719                     BT_BITOUL(snap_nlgrpsmax)];
1720                 if (BT_TEST(snap_rset, i))
1721                         user_info_buffer[i].info_mem_free =
1722                             lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1723
1724                 /*
1725                  * Fix up pointers to parents, children, resources, and
1726                  * latencies
1727                  */
1728                 user_info_buffer[i].info_parents =
1729                     (ulong_t *)((uintptr_t)user_parents + (i * bitmask_size));
1730                 user_info_buffer[i].info_children =
1731                     (ulong_t *)((uintptr_t)user_children + (i * bitmask_size));
1732                 user_info_buffer[i].info_rset =
1733                     (ulong_t *)((uintptr_t)user_rsets +
1734                     (i * LGRP_RSRC_COUNT * bitmask_size));
1735                 user_lats_buffer[i] = (int *)((uintptr_t)user_lats +
1736                     (snap_nlgrpsmax * sizeof (int *)) + (i * snap_nlgrpsmax *
1737                     sizeof (int)));
1738
1739                 /*
1740                  * Fix up pointer to CPU IDs
1741                  */
1742                 if (user_info_buffer[i].info_ncpus == 0) {
1743                         user_info_buffer[i].info_cpuids = NULL;
1744                         continue;
1745                 }
1746                 user_info_buffer[i].info_cpuids = &user_cpuids[cpu_index];
1747                 cpu_index += user_info_buffer[i].info_ncpus;
1748         }
1749         ASSERT(cpu_index == snap_ncpus);
1750
1751         /*
1752          * Copy lgroup info and latency table with pointers fixed up to point
1753          * into user buffer out to user buffer now
1754          */
1755         retval = lgrp_snap->ss_size;
1756         if (copyout(user_info_buffer, user_info, info_size) != 0)
1757                 retval = set_errno(EFAULT);
1758         kmem_free(user_info_buffer, info_size);
1759
1760         if (copyout(user_lats_buffer, user_lats, snap_nlgrpsmax *
1761             sizeof (int *)) != 0)
1762                 retval = set_errno(EFAULT);
1763         kmem_free(user_lats_buffer, snap_nlgrpsmax * sizeof (int *));
1764
1765         return (retval);
1766 }
1767
1768
1769 #ifdef  _SYSCALL32_IMPL
1770 /*
1771  * Make 32-bit copy of snapshot, fix up any pointers in buffer to point
1772  * into user instead of kernel address space, copy 32-bit snapshot into
1773  * given user buffer, and return size of buffer needed to hold snapshot
1774  */
1775 static int
1776 lgrp_snapshot_copy32(caddr32_t buf, size32_t bufsize)
1777 {
1778         size32_t                        bitmask_size;
1779         size32_t                        bitmasks_size;
1780         size32_t                        children_size;
1781         int                             cpu_index;
1782         size32_t                        cpuids_size;
1783         int                             i;
1784         int                             j;
1785         size32_t                        info_size;
1786         size32_t                        lats_size;
1787         lgrp_info_t                     *lgrp_info;
1788         lgrp_snapshot_header32_t        *lgrp_snap32;
1789         lgrp_info32_t                   *lgrp_info32;
1790         processorid_t                   *lgrp_cpuids32;
1791         caddr32_t                       *lgrp_lats32;
1792         int                             **lgrp_lats32_kernel;
1793         uint_t                          *lgrp_set32;
1794         uint_t                          *lgrp_parents32;
1795         uint_t                          *lgrp_children32;
1796         uint_t                          *lgrp_rsets32;
1797         size32_t                        parents_size;
1798         size32_t                        rsets_size;
1799         size32_t                        set_size;
1800         size32_t                        snap_hdr_size;
1801         int                             snap_ncpus;
1802         int                             snap_nlgrpsmax;
1803         size32_t                        snap_size;
1804
1805         if (lgrp_snap == NULL)
1806                 return (0);
1807
1808         snap_ncpus = lgrp_snap->ss_ncpus;
1809         snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1810
1811         /*
1812          * Calculate size of buffer needed for 32-bit snapshot,
1813          * rounding up size of each object to allow for alignment
1814          * of next object in buffer.
1815          */
1816         snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1817             sizeof (caddr32_t));
1818         info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1819             sizeof (processorid_t));
1820         cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1821                     sizeof (ulong_t));
1822
1823         bitmask_size = BT_SIZEOFMAP32(snap_nlgrpsmax);
1824
1825         set_size = bitmask_size;
1826         parents_size = snap_nlgrpsmax * bitmask_size;
1827         children_size = snap_nlgrpsmax * bitmask_size;
1828         rsets_size = P2ROUNDUP(LGRP_RSRC_COUNT * snap_nlgrpsmax *
1829             (int)bitmask_size, sizeof (caddr32_t));
1830
1831         bitmasks_size = set_size + parents_size + children_size + rsets_size;
1832
1833         /*
1834          * Size of latency table and buffer
1835          */
1836         lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1837             (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1838
1839         snap_size = snap_hdr_size + info_size + cpuids_size + bitmasks_size +
1840                 lats_size;
1841
1842         if (buf == NULL || bufsize <= 0) {
1843                 return (snap_size);
1844         }
1845
1846         /*
1847          * User needs to try getting size of buffer again
1848          * because given buffer size is too small.
1849          * The lgroup hierarchy may have changed after they asked for the size
1850          * but before the snapshot was taken.
1851          */
1852         if (bufsize < snap_size)
1853                 return (set_errno(EAGAIN));
1854
1855         /*
1856          * Make 32-bit copy of snapshot, fix up pointers to point into user
1857          * buffer not kernel, and then copy whole thing into user buffer
1858          */
1859         lgrp_snap32 = kmem_zalloc(snap_size, KM_NOSLEEP);
1860         if (lgrp_snap32 == NULL)
1861                 return (set_errno(ENOMEM));
1862
1863         /*
1864          * Calculate pointers into 32-bit copy of snapshot
1865          * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children,
1866          * resources, and latency table and buffer
1867          */
1868         lgrp_info32 = (lgrp_info32_t *)((uintptr_t)lgrp_snap32 +
1869             snap_hdr_size);
1870         lgrp_cpuids32 = (processorid_t *)((uintptr_t)lgrp_info32 + info_size);
1871         lgrp_set32 = (uint_t *)((uintptr_t)lgrp_cpuids32 + cpuids_size);
1872         lgrp_parents32 = (uint_t *)((uintptr_t)lgrp_set32 + set_size);
1873         lgrp_children32 = (uint_t *)((uintptr_t)lgrp_parents32 + parents_size);
1874         lgrp_rsets32 = (uint_t *)((uintptr_t)lgrp_children32 + children_size);
1875         lgrp_lats32 = (caddr32_t *)((uintptr_t)lgrp_rsets32 + rsets_size);
1876
1877         /*
1878          * Make temporary lgroup latency table of pointers for kernel to use
1879          * to fill in rows of table with latencies from each lgroup
1880          */
1881         lgrp_lats32_kernel =  kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1882             KM_NOSLEEP);
1883         if (lgrp_lats32_kernel == NULL) {
1884                 kmem_free(lgrp_snap32, snap_size);
1885                 return (set_errno(ENOMEM));
1886         }
1887
1888         /*
1889          * Fill in 32-bit lgroup snapshot header
1890          * (with pointers into user's buffer for lgroup info, CPU IDs,
1891          * bit masks, and latencies)
1892          */
1893         lgrp_snap32->ss_version = lgrp_snap->ss_version;
1894         lgrp_snap32->ss_levels = lgrp_snap->ss_levels;
1895         lgrp_snap32->ss_nlgrps = lgrp_snap32->ss_nlgrps_os =
1896             lgrp_snap->ss_nlgrps;
1897         lgrp_snap32->ss_nlgrps_max = snap_nlgrpsmax;
1898         lgrp_snap32->ss_root = lgrp_snap->ss_root;
1899         lgrp_snap32->ss_ncpus = lgrp_snap->ss_ncpus;
1900         lgrp_snap32->ss_gen = lgrp_snap->ss_gen;
1901         lgrp_snap32->ss_view = LGRP_VIEW_OS;
1902         lgrp_snap32->ss_size = snap_size;
1903         lgrp_snap32->ss_magic = buf;
1904         lgrp_snap32->ss_info = buf + snap_hdr_size;
1905         lgrp_snap32->ss_cpuids = lgrp_snap32->ss_info + info_size;
1906         lgrp_snap32->ss_lgrpset = lgrp_snap32->ss_cpuids + cpuids_size;
1907         lgrp_snap32->ss_parents = lgrp_snap32->ss_lgrpset + bitmask_size;
1908         lgrp_snap32->ss_children = lgrp_snap32->ss_parents +
1909             (snap_nlgrpsmax * bitmask_size);
1910         lgrp_snap32->ss_rsets = lgrp_snap32->ss_children +
1911             (snap_nlgrpsmax * bitmask_size);
1912         lgrp_snap32->ss_latencies = lgrp_snap32->ss_rsets +
1913             (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size);
1914
1915         /*
1916          * Fill in lgrpset now because caller may have change psets
1917          */
1918         kpreempt_disable();
1919         for (i = 0; i < snap_nlgrpsmax; i++) {
1920                 if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1921                     i)) {
1922                         BT_SET32(lgrp_set32, i);
1923                 }
1924         }
1925         kpreempt_enable();
1926
1927         /*
1928          * Fill in 32-bit copy of lgroup info and fix up pointers
1929          * to point into user's buffer instead of kernel's
1930          */
1931         cpu_index = 0;
1932         lgrp_info = lgrp_snap->ss_info;
1933         for (i = 0; i < snap_nlgrpsmax; i++) {
1934                 uint_t  *children;
1935                 uint_t  *lgrp_rset;
1936                 uint_t  *parents;
1937                 ulong_t *snap_rset;
1938
1939                 /*
1940                  * Skip non-existent lgroups
1941                  */
1942                 if (lgrp_info[i].info_lgrpid == LGRP_NONE) {
1943                         bzero(&lgrp_info32[i], sizeof (lgrp_info32[i]));
1944                         lgrp_info32[i].info_lgrpid = LGRP_NONE;
1945                         continue;
1946                 }
1947
1948                 /*
1949                  * Fill in parents, children, lgroup resource set, and
1950                  * latencies from snapshot
1951                  */
1952                 parents = (uint_t *)((uintptr_t)lgrp_parents32 +
1953                     i * bitmask_size);
1954                 children = (uint_t *)((uintptr_t)lgrp_children32 +
1955                     i * bitmask_size);
1956                 snap_rset = (ulong_t *)((uintptr_t)lgrp_snap->ss_rsets +
1957                     (i * LGRP_RSRC_COUNT * BT_SIZEOFMAP(snap_nlgrpsmax)));
1958                 lgrp_rset = (uint_t *)((uintptr_t)lgrp_rsets32 +
1959                     (i * LGRP_RSRC_COUNT * bitmask_size));
1960                 lgrp_lats32_kernel[i] = (int *)((uintptr_t)lgrp_lats32 +
1961                     snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1962                     sizeof (int));
1963                 for (j = 0; j < snap_nlgrpsmax; j++) {
1964                         int     k;
1965                         uint_t  *rset;
1966
1967                         if (BT_TEST(&lgrp_snap->ss_parents[i], j))
1968                                 BT_SET32(parents, j);
1969
1970                         if (BT_TEST(&lgrp_snap->ss_children[i], j))
1971                                 BT_SET32(children, j);
1972
1973                         for (k = 0; k < LGRP_RSRC_COUNT; k++) {
1974                                 rset = (uint_t *)((uintptr_t)lgrp_rset +
1975                                     k * bitmask_size);
1976                                 if (BT_TEST(&snap_rset[k], j))
1977                                         BT_SET32(rset, j);
1978                         }
1979
1980                         lgrp_lats32_kernel[i][j] =
1981                             lgrp_snap->ss_latencies[i][j];
1982                 }
1983
1984                 /*
1985                  * Fix up pointer to latency buffer
1986                  */
1987                 lgrp_lats32[i] = lgrp_snap32->ss_latencies +
1988                     snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1989                     sizeof (int);
1990
1991                 /*
1992                  * Fix up pointers for parents, children, and resources
1993                  */
1994                 lgrp_info32[i].info_parents = lgrp_snap32->ss_parents +
1995                     (i * bitmask_size);
1996                 lgrp_info32[i].info_children = lgrp_snap32->ss_children +
1997                     (i * bitmask_size);
1998                 lgrp_info32[i].info_rset = lgrp_snap32->ss_rsets +
1999                     (i * LGRP_RSRC_COUNT * bitmask_size);
2000
2001                 /*
2002                  * Fill in memory and CPU info
2003                  * Only fill in memory for lgroups directly containing memory
2004                  */
2005                 snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
2006                     BT_BITOUL(snap_nlgrpsmax)];
2007                 if (BT_TEST(snap_rset, i)) {
2008                         lgrp_info32[i].info_mem_free = lgrp_mem_size(i,
2009                             LGRP_MEM_SIZE_FREE);
2010                         lgrp_info32[i].info_mem_install =
2011                             lgrp_info[i].info_mem_install;
2012                 }
2013
2014                 lgrp_info32[i].info_ncpus = lgrp_info[i].info_ncpus;
2015
2016                 lgrp_info32[i].info_lgrpid = lgrp_info[i].info_lgrpid;
2017                 lgrp_info32[i].info_latency = lgrp_info[i].info_latency;
2018
2019                 if (lgrp_info32[i].info_ncpus == 0) {
2020                         lgrp_info32[i].info_cpuids = 0;
2021                         continue;
2022                 }
2023
2024                 /*
2025                  * Fix up pointer for CPU IDs
2026                  */
2027                 lgrp_info32[i].info_cpuids = lgrp_snap32->ss_cpuids +
2028                     (cpu_index * sizeof (processorid_t));
2029                 cpu_index += lgrp_info32[i].info_ncpus;
2030         }
2031         ASSERT(cpu_index == snap_ncpus);
2032
2033         /*
2034          * Copy lgroup CPU IDs into 32-bit snapshot
2035          * before copying it out into user's buffer
2036          */
2037         bcopy(lgrp_snap->ss_cpuids, lgrp_cpuids32, cpuids_size);
2038
2039         /*
2040          * Copy 32-bit lgroup snapshot into user's buffer all at once
2041          */
2042         if (copyout(lgrp_snap32, (void *)(uintptr_t)buf, snap_size) != 0) {
2043                 kmem_free(lgrp_snap32, snap_size);
2044                 kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2045                 return (set_errno(EFAULT));
2046         }
2047
2048         kmem_free(lgrp_snap32, snap_size);
2049         kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2050
2051         return (snap_size);
2052 }
2053 #endif  /* _SYSCALL32_IMPL */
2054
2055
2056 int
2057 lgrpsys(int subcode, long ia, void *ap)
2058 {
2059         size_t  bufsize;
2060         int     latency;
2061
2062         switch (subcode) {
2063
2064         case LGRP_SYS_AFFINITY_GET:
2065                 return (lgrp_affinity_get((lgrp_affinity_args_t *)ap));
2066
2067         case LGRP_SYS_AFFINITY_SET:
2068                 return (lgrp_affinity_set((lgrp_affinity_args_t *)ap));
2069
2070         case LGRP_SYS_GENERATION:
2071                 return (lgrp_generation(ia));
2072
2073         case LGRP_SYS_HOME:
2074                 return (lgrp_home_get((idtype_t)ia, (id_t)(uintptr_t)ap));
2075
2076         case LGRP_SYS_LATENCY:
2077                 mutex_enter(&cpu_lock);
2078                 latency = lgrp_latency(ia, (lgrp_id_t)(uintptr_t)ap);
2079                 mutex_exit(&cpu_lock);
2080                 return (latency);
2081
2082         case LGRP_SYS_MEMINFO:
2083                 return (meminfo(ia, (struct meminfo *)ap));
2084
2085         case LGRP_SYS_VERSION:
2086                 return (lgrp_version(ia));
2087
2088         case LGRP_SYS_SNAPSHOT:
2089                 mutex_enter(&lgrp_snap_lock);
2090                 bufsize = lgrp_snapshot();
2091                 if (ap && ia > 0) {
2092                         if (get_udatamodel() == DATAMODEL_NATIVE)
2093                                 bufsize = lgrp_snapshot_copy(ap, ia);
2094 #ifdef  _SYSCALL32_IMPL
2095                         else
2096                                 bufsize = lgrp_snapshot_copy32(
2097                                     (caddr32_t)(uintptr_t)ap, ia);
2098 #endif  /* _SYSCALL32_IMPL */
2099                 }
2100                 mutex_exit(&lgrp_snap_lock);
2101                 return (bufsize);
2102
2103         default:
2104                 break;
2105
2106         }
2107
2108         return (set_errno(EINVAL));
2109 }