4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/types.h>
26 #include <sys/systm.h>
27 #include <sys/cmn_err.h>
28 #include <sys/cpuvar.h>
29 #include <sys/thread.h>
32 #include <sys/debug.h>
33 #include <sys/sysmacros.h>
34 #include <sys/cpupart.h>
36 #include <sys/modctl.h>
37 #include <sys/syscall.h>
39 #include <sys/loadavg.h>
42 #include <sys/pool_pset.h>
43 #include <sys/policy.h>
45 #include <sys/contract/process_impl.h>
47 static int pset(int, long, long, long, long);
49 static struct sysent pset_sysent
= {
51 SE_ARGC
| SE_NOUNLOAD
,
55 static struct modlsys modlsys
= {
56 &mod_syscallops
, "processor sets", &pset_sysent
59 #ifdef _SYSCALL32_IMPL
60 static struct modlsys modlsys32
= {
61 &mod_syscallops32
, "32-bit pset(2) syscall", &pset_sysent
65 static struct modlinkage modlinkage
= {
68 #ifdef _SYSCALL32_IMPL
74 #define PSET_BADATTR(attr) ((~PSET_NOESCAPE) & (attr))
79 return (mod_install(&modlinkage
));
83 _info(struct modinfo
*modinfop
)
85 return (mod_info(&modlinkage
, modinfop
));
89 pset_create(psetid_t
*psetp
)
94 if (secpolicy_pset(CRED()) != 0)
95 return (set_errno(EPERM
));
98 if (pool_state
== POOL_ENABLED
) {
100 return (set_errno(ENOTSUP
));
102 error
= cpupart_create(&newpset
);
105 return (set_errno(error
));
107 if (copyout(&newpset
, psetp
, sizeof (psetid_t
)) != 0) {
108 (void) cpupart_destroy(newpset
);
110 return (set_errno(EFAULT
));
117 pset_destroy(psetid_t pset
)
121 if (secpolicy_pset(CRED()) != 0)
122 return (set_errno(EPERM
));
125 if (pool_state
== POOL_ENABLED
) {
127 return (set_errno(ENOTSUP
));
129 error
= cpupart_destroy(pset
);
132 return (set_errno(error
));
138 pset_assign(psetid_t pset
, processorid_t cpuid
, psetid_t
*opset
, int forced
)
144 if (pset
!= PS_QUERY
&& secpolicy_pset(CRED()) != 0)
145 return (set_errno(EPERM
));
148 if (pset
!= PS_QUERY
&& pool_state
== POOL_ENABLED
) {
150 return (set_errno(ENOTSUP
));
153 mutex_enter(&cpu_lock
);
154 if ((cp
= cpu_get(cpuid
)) == NULL
) {
155 mutex_exit(&cpu_lock
);
157 return (set_errno(EINVAL
));
160 oldpset
= cpupart_query_cpu(cp
);
162 if (pset
!= PS_QUERY
)
163 error
= cpupart_attach_cpu(pset
, cp
, forced
);
164 mutex_exit(&cpu_lock
);
168 return (set_errno(error
));
171 if (copyout(&oldpset
, opset
, sizeof (psetid_t
)) != 0)
172 return (set_errno(EFAULT
));
178 pset_info(psetid_t pset
, int *typep
, uint_t
*numcpusp
,
179 processorid_t
*cpulistp
)
182 uint_t user_ncpus
= 0, real_ncpus
, copy_ncpus
;
183 processorid_t
*pset_cpus
= NULL
;
186 if (numcpusp
!= NULL
) {
187 if (copyin(numcpusp
, &user_ncpus
, sizeof (uint_t
)) != 0)
188 return (set_errno(EFAULT
));
191 if (user_ncpus
> max_ncpus
) /* sanity check */
192 user_ncpus
= max_ncpus
;
193 if (user_ncpus
!= 0 && cpulistp
!= NULL
)
194 pset_cpus
= kmem_alloc(sizeof (processorid_t
) * user_ncpus
,
197 real_ncpus
= user_ncpus
;
198 if ((error
= cpupart_get_cpus(&pset
, pset_cpus
, &real_ncpus
)) != 0)
202 * Now copyout the information about this processor set.
206 * Get number of cpus to copy back. If the user didn't pass in
207 * a big enough buffer, only copy back as many cpus as fits in
208 * the buffer but copy back the real number of cpus.
211 if (user_ncpus
!= 0 && cpulistp
!= NULL
) {
212 copy_ncpus
= MIN(real_ncpus
, user_ncpus
);
213 if (copyout(pset_cpus
, cpulistp
,
214 sizeof (processorid_t
) * copy_ncpus
) != 0) {
219 if (pset_cpus
!= NULL
)
220 kmem_free(pset_cpus
, sizeof (processorid_t
) * user_ncpus
);
225 pset_type
= PS_PRIVATE
;
226 if (copyout(&pset_type
, typep
, sizeof (int)) != 0)
227 return (set_errno(EFAULT
));
229 if (numcpusp
!= NULL
)
230 if (copyout(&real_ncpus
, numcpusp
, sizeof (uint_t
)) != 0)
231 return (set_errno(EFAULT
));
235 if (pset_cpus
!= NULL
)
236 kmem_free(pset_cpus
, sizeof (processorid_t
) * user_ncpus
);
237 return (set_errno(error
));
241 pset_bind_thread(kthread_t
*tp
, psetid_t pset
, psetid_t
*oldpset
, void *projbuf
,
246 ASSERT(pool_lock_held());
247 ASSERT(MUTEX_HELD(&cpu_lock
));
248 ASSERT(MUTEX_HELD(&ttoproc(tp
)->p_lock
));
250 *oldpset
= tp
->t_bind_pset
;
254 TB_PSET_SOFT_SET(tp
);
258 TB_PSET_HARD_SET(tp
);
265 *oldpset
= TB_PSET_IS_SOFT(tp
) ? PS_SOFT
: PS_HARD
;
270 * Must have the same UID as the target process or
271 * have PRIV_PROC_OWNER privilege.
273 if (!hasprocperm(tp
->t_cred
, CRED()))
276 * Unbinding of an unbound thread should always succeed.
278 if (*oldpset
== PS_NONE
&& pset
== PS_NONE
)
281 * Only privileged processes can move threads from psets with
282 * PSET_NOESCAPE attribute.
284 if ((tp
->t_cpupart
->cp_attr
& PSET_NOESCAPE
) &&
285 secpolicy_pbind(CRED()) != 0)
287 if ((error
= cpupart_bind_thread(tp
, pset
, 0,
288 projbuf
, zonebuf
)) == 0)
289 tp
->t_bind_pset
= pset
;
298 pset_bind_process(proc_t
*pp
, psetid_t pset
, psetid_t
*oldpset
, void *projbuf
,
304 /* skip kernel processes */
305 if ((pset
!= PS_QUERY
) && pp
->p_flag
& SSYS
) {
310 mutex_enter(&pp
->p_lock
);
316 rval
= pset_bind_thread(tp
, pset
, oldpset
, projbuf
,
320 } while ((tp
= tp
->t_forw
) != pp
->p_tlist
);
323 mutex_exit(&pp
->p_lock
);
329 pset_bind_task(task_t
*tk
, psetid_t pset
, psetid_t
*oldpset
, void *projbuf
,
335 ASSERT(MUTEX_HELD(&pidlock
));
337 if ((pp
= tk
->tk_memb_list
) == NULL
) {
344 if (!(pp
->p_flag
& SSYS
)) {
345 rval
= pset_bind_process(pp
, pset
, oldpset
, projbuf
,
350 } while ((pp
= pp
->p_tasknext
) != tk
->tk_memb_list
);
356 pset_bind_project(kproject_t
*kpj
, psetid_t pset
, psetid_t
*oldpset
,
357 void *projbuf
, void *zonebuf
)
362 ASSERT(MUTEX_HELD(&pidlock
));
364 for (pp
= practive
; pp
!= NULL
; pp
= pp
->p_next
) {
365 if (pp
->p_tlist
== NULL
)
367 if (pp
->p_task
->tk_proj
== kpj
&& !(pp
->p_flag
& SSYS
)) {
370 rval
= pset_bind_process(pp
, pset
, oldpset
, projbuf
,
381 pset_bind_zone(zone_t
*zptr
, psetid_t pset
, psetid_t
*oldpset
, void *projbuf
,
387 ASSERT(MUTEX_HELD(&pidlock
));
389 for (pp
= practive
; pp
!= NULL
; pp
= pp
->p_next
) {
390 if (pp
->p_zone
== zptr
&& !(pp
->p_flag
& SSYS
)) {
393 rval
= pset_bind_process(pp
, pset
, oldpset
, projbuf
,
404 * Unbind all threads from the specified processor set, or from all
408 pset_unbind(psetid_t pset
, void *projbuf
, void *zonebuf
, idtype_t idtype
)
416 ASSERT(MUTEX_HELD(&cpu_lock
));
418 if (idtype
== P_PSETID
&& cpupart_find(pset
) == NULL
)
421 mutex_enter(&pidlock
);
422 for (pp
= practive
; pp
!= NULL
; pp
= pp
->p_next
) {
423 mutex_enter(&pp
->p_lock
);
426 * Skip zombies and kernel processes, and processes in
427 * other zones, if called from a non-global zone.
429 if (tp
== NULL
|| (pp
->p_flag
& SSYS
) ||
430 !HASZONEACCESS(curproc
, pp
->p_zone
->zone_id
)) {
431 mutex_exit(&pp
->p_lock
);
435 if ((idtype
== P_PSETID
&& tp
->t_bind_pset
!= pset
) ||
436 (idtype
== P_ALL
&& tp
->t_bind_pset
== PS_NONE
))
438 rval
= pset_bind_thread(tp
, PS_NONE
, &olbind
,
442 } while ((tp
= tp
->t_forw
) != pp
->p_tlist
);
443 mutex_exit(&pp
->p_lock
);
445 mutex_exit(&pidlock
);
450 pset_bind_contract(cont_process_t
*ctp
, psetid_t pset
, psetid_t
*oldpset
,
451 void *projbuf
, void *zonebuf
)
456 ASSERT(MUTEX_HELD(&pidlock
));
458 for (pp
= practive
; pp
!= NULL
; pp
= pp
->p_next
) {
459 if (pp
->p_ct_process
== ctp
) {
462 rval
= pset_bind_process(pp
, pset
, oldpset
, projbuf
,
473 * Bind the lwp:id of process:pid to processor set: pset
476 pset_bind_lwp(psetid_t pset
, id_t id
, pid_t pid
, psetid_t
*opset
)
481 void *projbuf
, *zonebuf
;
485 mutex_enter(&cpu_lock
);
486 projbuf
= fss_allocbuf(FSS_NPROJ_BUF
, FSS_ALLOC_PROJ
);
487 zonebuf
= fss_allocbuf(FSS_NPROJ_BUF
, FSS_ALLOC_ZONE
);
489 mutex_enter(&pidlock
);
490 if ((pid
== P_MYID
&& id
== P_MYID
) ||
491 (pid
== curproc
->p_pid
&& id
== P_MYID
)) {
494 mutex_enter(&pp
->p_lock
);
498 } else if ((pp
= prfind(pid
)) == NULL
) {
502 if (pp
!= curproc
&& id
== P_MYID
) {
506 mutex_enter(&pp
->p_lock
);
507 if ((tp
= idtot(pp
, id
)) == NULL
) {
508 mutex_exit(&pp
->p_lock
);
514 error
= pset_bind_thread(tp
, pset
, &oldpset
, projbuf
, zonebuf
);
515 mutex_exit(&pp
->p_lock
);
517 mutex_exit(&pidlock
);
519 fss_freebuf(projbuf
, FSS_ALLOC_PROJ
);
520 fss_freebuf(zonebuf
, FSS_ALLOC_ZONE
);
521 mutex_exit(&cpu_lock
);
524 if (copyout(&oldpset
, opset
, sizeof (psetid_t
)) != 0)
525 return (set_errno(EFAULT
));
528 return (set_errno(error
));
533 pset_bind(psetid_t pset
, idtype_t idtype
, id_t id
, psetid_t
*opset
)
543 void *projbuf
, *zonebuf
;
546 if ((pset
!= PS_QUERY
) && (pset
!= PS_SOFT
) &&
547 (pset
!= PS_HARD
) && (pset
!= PS_QUERY_TYPE
)) {
549 * Check if the set actually exists before checking
550 * permissions. This is the historical error
551 * precedence. Note that if pset was PS_MYID, the
552 * cpupart_get_cpus call will change it to the
553 * processor set id of the caller (or PS_NONE if the
554 * caller is not bound to a processor set).
556 if (pool_state
== POOL_ENABLED
) {
558 return (set_errno(ENOTSUP
));
560 if (cpupart_get_cpus(&pset
, NULL
, NULL
) != 0) {
562 return (set_errno(EINVAL
));
563 } else if (pset
!= PS_NONE
&& secpolicy_pbind(CRED()) != 0) {
565 return (set_errno(EPERM
));
570 * Pre-allocate enough buffers for FSS for all active projects
571 * and for all active zones on the system. Unused buffers will
572 * be freed later by fss_freebuf().
574 mutex_enter(&cpu_lock
);
575 projbuf
= fss_allocbuf(FSS_NPROJ_BUF
, FSS_ALLOC_PROJ
);
576 zonebuf
= fss_allocbuf(FSS_NPROJ_BUF
, FSS_ALLOC_ZONE
);
581 mutex_enter(&pidlock
);
582 mutex_enter(&pp
->p_lock
);
586 if ((tp
= idtot(pp
, id
)) == NULL
) {
587 mutex_exit(&pp
->p_lock
);
588 mutex_exit(&pidlock
);
593 error
= pset_bind_thread(tp
, pset
, &oldpset
, projbuf
, zonebuf
);
594 mutex_exit(&pp
->p_lock
);
595 mutex_exit(&pidlock
);
599 mutex_enter(&pidlock
);
602 } else if ((pp
= prfind(id
)) == NULL
) {
603 mutex_exit(&pidlock
);
607 error
= pset_bind_process(pp
, pset
, &oldpset
, projbuf
, zonebuf
);
608 mutex_exit(&pidlock
);
612 mutex_enter(&pidlock
);
614 id
= curproc
->p_task
->tk_tkid
;
615 if ((tk
= task_hold_by_id(id
)) == NULL
) {
616 mutex_exit(&pidlock
);
620 error
= pset_bind_task(tk
, pset
, &oldpset
, projbuf
, zonebuf
);
621 mutex_exit(&pidlock
);
629 if ((kpj
= project_hold_by_id(id
, pp
->p_zone
,
630 PROJECT_HOLD_FIND
)) == NULL
) {
634 mutex_enter(&pidlock
);
635 error
= pset_bind_project(kpj
, pset
, &oldpset
, projbuf
,
637 mutex_exit(&pidlock
);
644 if ((zptr
= zone_find_by_id(id
)) == NULL
) {
648 mutex_enter(&pidlock
);
649 error
= pset_bind_zone(zptr
, pset
, &oldpset
, projbuf
, zonebuf
);
650 mutex_exit(&pidlock
);
656 id
= PRCTID(curproc
);
657 if ((ct
= contract_type_ptr(process_type
, id
,
658 curproc
->p_zone
->zone_uniqid
)) == NULL
) {
662 mutex_enter(&pidlock
);
663 error
= pset_bind_contract(ct
->ct_data
, pset
, &oldpset
, projbuf
,
665 mutex_exit(&pidlock
);
670 if (id
== P_MYID
|| pset
!= PS_NONE
|| !INGLOBALZONE(curproc
)) {
674 error
= pset_unbind(id
, projbuf
, zonebuf
, idtype
);
678 if (id
== P_MYID
|| pset
!= PS_NONE
|| !INGLOBALZONE(curproc
)) {
682 error
= pset_unbind(PS_NONE
, projbuf
, zonebuf
, idtype
);
690 fss_freebuf(projbuf
, FSS_ALLOC_PROJ
);
691 fss_freebuf(zonebuf
, FSS_ALLOC_ZONE
);
692 mutex_exit(&cpu_lock
);
696 return (set_errno(error
));
698 if (copyout(&oldpset
, opset
, sizeof (psetid_t
)) != 0)
699 return (set_errno(EFAULT
));
705 * Report load average statistics for the specified processor set.
708 pset_getloadavg(psetid_t pset
, int *buf
, int nelem
)
710 int loadbuf
[LOADAVG_NSTATS
];
714 return (set_errno(EINVAL
));
717 * We keep the same number of load average statistics for processor
718 * sets as we do for the system as a whole.
720 if (nelem
> LOADAVG_NSTATS
)
721 nelem
= LOADAVG_NSTATS
;
723 mutex_enter(&cpu_lock
);
724 error
= cpupart_get_loadavg(pset
, loadbuf
, nelem
);
725 mutex_exit(&cpu_lock
);
726 if (!error
&& nelem
&& copyout(loadbuf
, buf
, nelem
* sizeof (int)) != 0)
730 return (set_errno(error
));
737 * Return list of active processor sets, up to a maximum indicated by
738 * numpsets. The total number of processor sets is stored in the
739 * location pointed to by numpsets.
742 pset_list(psetid_t
*psetlist
, uint_t
*numpsets
)
744 uint_t user_npsets
= 0;
746 psetid_t
*psets
= NULL
;
749 if (numpsets
!= NULL
) {
750 if (copyin(numpsets
, &user_npsets
, sizeof (uint_t
)) != 0)
751 return (set_errno(EFAULT
));
755 * Get the list of all processor sets. First we need to find
756 * out how many there are, so we can allocate a large enough
759 mutex_enter(&cpu_lock
);
760 if (!INGLOBALZONE(curproc
) && pool_pset_enabled()) {
761 psetid_t psetid
= zone_pset_get(curproc
->p_zone
);
763 if (psetid
== PS_NONE
) {
767 psets
= kmem_alloc(real_npsets
* sizeof (psetid_t
),
772 real_npsets
= cpupart_list(0, 0, CP_ALL
);
774 psets
= kmem_alloc(real_npsets
* sizeof (psetid_t
),
776 (void) cpupart_list(psets
, real_npsets
, CP_ALL
);
779 mutex_exit(&cpu_lock
);
781 if (user_npsets
> real_npsets
)
782 user_npsets
= real_npsets
;
784 if (numpsets
!= NULL
) {
785 if (copyout(&real_npsets
, numpsets
, sizeof (uint_t
)) != 0)
787 else if (psetlist
!= NULL
&& user_npsets
!= 0) {
788 if (copyout(psets
, psetlist
,
789 user_npsets
* sizeof (psetid_t
)) != 0)
795 kmem_free(psets
, real_npsets
* sizeof (psetid_t
));
798 return (set_errno(error
));
804 pset_setattr(psetid_t pset
, uint_t attr
)
808 if (secpolicy_pset(CRED()) != 0)
809 return (set_errno(EPERM
));
811 if (pool_state
== POOL_ENABLED
) {
813 return (set_errno(ENOTSUP
));
815 if (pset
== PS_QUERY
|| PSET_BADATTR(attr
)) {
817 return (set_errno(EINVAL
));
819 if ((error
= cpupart_setattr(pset
, attr
)) != 0) {
821 return (set_errno(error
));
828 pset_getattr(psetid_t pset
, uint_t
*attrp
)
833 if (pset
== PS_QUERY
)
834 return (set_errno(EINVAL
));
835 if ((error
= cpupart_getattr(pset
, &attr
)) != 0)
836 return (set_errno(error
));
837 if (copyout(&attr
, attrp
, sizeof (uint_t
)) != 0)
838 return (set_errno(EFAULT
));
843 pset(int subcode
, long arg1
, long arg2
, long arg3
, long arg4
)
847 return (pset_create((psetid_t
*)arg1
));
849 return (pset_destroy((psetid_t
)arg1
));
851 return (pset_assign((psetid_t
)arg1
,
852 (processorid_t
)arg2
, (psetid_t
*)arg3
, 0));
854 return (pset_info((psetid_t
)arg1
, (int *)arg2
,
855 (uint_t
*)arg3
, (processorid_t
*)arg4
));
857 return (pset_bind((psetid_t
)arg1
, (idtype_t
)arg2
,
858 (id_t
)arg3
, (psetid_t
*)arg4
));
860 return (pset_bind_lwp((psetid_t
)arg1
, (id_t
)arg2
,
861 (pid_t
)arg3
, (psetid_t
*)arg4
));
862 case PSET_GETLOADAVG
:
863 return (pset_getloadavg((psetid_t
)arg1
, (int *)arg2
,
866 return (pset_list((psetid_t
*)arg1
, (uint_t
*)arg2
));
868 return (pset_setattr((psetid_t
)arg1
, (uint_t
)arg2
));
870 return (pset_getattr((psetid_t
)arg1
, (uint_t
*)arg2
));
871 case PSET_ASSIGN_FORCED
:
872 return (pset_assign((psetid_t
)arg1
,
873 (processorid_t
)arg2
, (psetid_t
*)arg3
, 1));
875 return (set_errno(EINVAL
));