4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/systm.h>
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/thread.h>
30 #include <sys/cpuvar.h>
31 #include <sys/cpupart.h>
32 #include <sys/cmn_err.h>
34 #include <sys/group.h>
35 #include <sys/bitset.h>
40 * CMT dispatcher policies
42 * This file implements CMT dispatching policies using Processor Groups.
44 * The scheduler/dispatcher leverages knowledge of the performance
45 * relevant CMT sharing relationships existing between CPUs to implement
46 * load balancing, and coalescence thread placement policies.
48 * Load balancing policy seeks to improve performance by minimizing
49 * contention over shared processor resources / facilities. Coalescence
50 * policies improve resource utilization and ultimately power efficiency.
52 * On NUMA systems, the dispatcher will generally perform load balancing and
53 * coalescence within (and not across) lgroups. This is because there isn't
54 * much sense in trying to correct an imbalance by sending a thread outside
55 * of its home, if it would attempt to return home a short while later.
56 * The dispatcher will implement CMT policy across lgroups however, if
57 * it can do so with a thread homed to the root lgroup, since root homed
58 * threads have no lgroup affinity.
62 * Return non-zero if, given the policy, we should migrate from running
63 * somewhere "here" to somewhere "there".
66 cmt_should_migrate(pg_cmt_t
*here
, pg_cmt_t
*there
, pg_cmt_policy_t policy
,
69 uint32_t here_util
, there_util
;
71 here_util
= here
->cmt_utilization
;
72 there_util
= there
->cmt_utilization
;
75 * This assumes that curthread's utilization is "1"
77 if (self
&& bitset_in_set(&here
->cmt_cpus_actv_set
, CPU
->cpu_seqid
))
78 here_util
--; /* Ignore curthread's effect */
81 * Load balancing and coalescence are conflicting policies
83 ASSERT((policy
& (CMT_BALANCE
|CMT_COALESCE
)) !=
84 (CMT_BALANCE
|CMT_COALESCE
));
86 if (policy
& CMT_BALANCE
) {
90 * If the target is comparatively underutilized
91 * (either in an absolute sense, or scaled by capacity),
92 * then choose to balance.
94 if ((here_util
> there_util
) ||
95 (here_util
== there_util
&&
96 (CMT_CAPACITY(there
) > CMT_CAPACITY(here
)))) {
99 } else if (policy
& CMT_COALESCE
) {
101 * Attempt to drive group utilization up to capacity
103 if (there_util
> here_util
&&
104 there_util
< CMT_CAPACITY(there
))
111 * Perform multi-level CMT load balancing of running threads.
113 * tp is the thread being enqueued.
114 * cp is a hint CPU, against which CMT load balancing will be performed.
116 * Returns cp, or a CPU better than cp with respect to balancing
117 * running thread load.
120 cmt_balance(kthread_t
*tp
, cpu_t
*cp
)
122 int hint
, i
, cpu
, nsiblings
;
124 group_t
*cmt_pgs
, *siblings
;
125 pg_cmt_t
*pg
, *pg_tmp
, *tpg
= NULL
;
128 extern cmt_lgrp_t
*cmt_root
;
130 ASSERT(THREAD_LOCK_HELD(tp
));
132 cmt_pgs
= &cp
->cpu_pg
->cmt_pgs
;
134 if (GROUP_SIZE(cmt_pgs
) == 0)
135 return (cp
); /* nothing to do */
141 * Balance across siblings in the CPUs CMT lineage
142 * If the thread is homed to the root lgroup, perform
143 * top level balancing against other top level PGs
144 * in the system. Otherwise, start with the default
145 * top level siblings group, which is within the leaf lgroup
147 pg
= GROUP_ACCESS(cmt_pgs
, level
);
148 if (tp
->t_lpl
->lpl_lgrpid
== LGRP_ROOTID
)
149 siblings
= &cmt_root
->cl_pgs
;
151 siblings
= pg
->cmt_siblings
;
154 * Traverse down the lineage until we find a level that needs
155 * balancing, or we get to the end.
158 nsiblings
= GROUP_SIZE(siblings
); /* self inclusive */
162 hint
= CPU_PSEUDO_RANDOM() % nsiblings
;
165 * Find a balancing candidate from among our siblings
166 * "hint" is a hint for where to start looking
170 ASSERT(i
< nsiblings
);
171 pg_tmp
= GROUP_ACCESS(siblings
, i
);
174 * The candidate must not be us, and must
175 * have some CPU resources in the thread's
179 bitset_in_set(&tp
->t_cpupart
->cp_cmt_pgs
,
180 ((pg_t
*)pg_tmp
)->pg_id
)) {
185 if (++i
>= nsiblings
)
190 goto next_level
; /* no candidates at this level */
193 * Decide if we should migrate from the current PG to a
194 * target PG given a policy
196 if (cmt_should_migrate(pg
, tpg
, pg
->cmt_policy
, self
))
201 if (++level
== GROUP_SIZE(cmt_pgs
))
204 pg
= GROUP_ACCESS(cmt_pgs
, level
);
205 siblings
= pg
->cmt_siblings
;
209 uint_t tgt_size
= GROUP_SIZE(&tpg
->cmt_cpus_actv
);
212 * Select an idle CPU from the target
214 hint
= CPU_PSEUDO_RANDOM() % tgt_size
;
217 newcp
= GROUP_ACCESS(&tpg
->cmt_cpus_actv
, cpu
);
218 if (newcp
->cpu_part
== tp
->t_cpupart
&&
219 newcp
->cpu_dispatch_pri
== -1) {
223 if (++cpu
== tgt_size
)
225 } while (cpu
!= hint
);