Linux-2.6.12-rc2
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / arch / ia64 / kernel / domain.c
blobfe532c970438973393cb4e1dc40f502e15aef858
1 /*
2 * arch/ia64/kernel/domain.c
3 * Architecture specific sched-domains builder.
5 * Copyright (C) 2004 Jesse Barnes
6 * Copyright (C) 2004 Silicon Graphics, Inc.
7 */
9 #include <linux/sched.h>
10 #include <linux/percpu.h>
11 #include <linux/slab.h>
12 #include <linux/cpumask.h>
13 #include <linux/init.h>
14 #include <linux/topology.h>
15 #include <linux/nodemask.h>
17 #define SD_NODES_PER_DOMAIN 6
19 #ifdef CONFIG_NUMA
20 /**
21 * find_next_best_node - find the next node to include in a sched_domain
22 * @node: node whose sched_domain we're building
23 * @used_nodes: nodes already in the sched_domain
25 * Find the next node to include in a given scheduling domain. Simply
26 * finds the closest node not already in the @used_nodes map.
28 * Should use nodemask_t.
30 static int __devinit find_next_best_node(int node, unsigned long *used_nodes)
32 int i, n, val, min_val, best_node = 0;
34 min_val = INT_MAX;
36 for (i = 0; i < MAX_NUMNODES; i++) {
37 /* Start at @node */
38 n = (node + i) % MAX_NUMNODES;
40 if (!nr_cpus_node(n))
41 continue;
43 /* Skip already used nodes */
44 if (test_bit(n, used_nodes))
45 continue;
47 /* Simple min distance search */
48 val = node_distance(node, n);
50 if (val < min_val) {
51 min_val = val;
52 best_node = n;
56 set_bit(best_node, used_nodes);
57 return best_node;
60 /**
61 * sched_domain_node_span - get a cpumask for a node's sched_domain
62 * @node: node whose cpumask we're constructing
63 * @size: number of nodes to include in this span
65 * Given a node, construct a good cpumask for its sched_domain to span. It
66 * should be one that prevents unnecessary balancing, but also spreads tasks
67 * out optimally.
69 static cpumask_t __devinit sched_domain_node_span(int node)
71 int i;
72 cpumask_t span, nodemask;
73 DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
75 cpus_clear(span);
76 bitmap_zero(used_nodes, MAX_NUMNODES);
78 nodemask = node_to_cpumask(node);
79 cpus_or(span, span, nodemask);
80 set_bit(node, used_nodes);
82 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
83 int next_node = find_next_best_node(node, used_nodes);
84 nodemask = node_to_cpumask(next_node);
85 cpus_or(span, span, nodemask);
88 return span;
90 #endif
93 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
94 * can switch it on easily if needed.
96 #ifdef CONFIG_SCHED_SMT
97 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
98 static struct sched_group sched_group_cpus[NR_CPUS];
99 static int __devinit cpu_to_cpu_group(int cpu)
101 return cpu;
103 #endif
105 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
106 static struct sched_group sched_group_phys[NR_CPUS];
107 static int __devinit cpu_to_phys_group(int cpu)
109 #ifdef CONFIG_SCHED_SMT
110 return first_cpu(cpu_sibling_map[cpu]);
111 #else
112 return cpu;
113 #endif
116 #ifdef CONFIG_NUMA
118 * The init_sched_build_groups can't handle what we want to do with node
119 * groups, so roll our own. Now each node has its own list of groups which
120 * gets dynamically allocated.
122 static DEFINE_PER_CPU(struct sched_domain, node_domains);
123 static struct sched_group *sched_group_nodes[MAX_NUMNODES];
125 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
126 static struct sched_group sched_group_allnodes[MAX_NUMNODES];
128 static int __devinit cpu_to_allnodes_group(int cpu)
130 return cpu_to_node(cpu);
132 #endif
135 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
137 void __devinit arch_init_sched_domains(void)
139 int i;
140 cpumask_t cpu_default_map;
143 * Setup mask for cpus without special case scheduling requirements.
144 * For now this just excludes isolated cpus, but could be used to
145 * exclude other special cases in the future.
147 cpus_complement(cpu_default_map, cpu_isolated_map);
148 cpus_and(cpu_default_map, cpu_default_map, cpu_online_map);
151 * Set up domains. Isolated domains just stay on the dummy domain.
153 for_each_cpu_mask(i, cpu_default_map) {
154 int group;
155 struct sched_domain *sd = NULL, *p;
156 cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
158 cpus_and(nodemask, nodemask, cpu_default_map);
160 #ifdef CONFIG_NUMA
161 if (num_online_cpus()
162 > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
163 sd = &per_cpu(allnodes_domains, i);
164 *sd = SD_ALLNODES_INIT;
165 sd->span = cpu_default_map;
166 group = cpu_to_allnodes_group(i);
167 sd->groups = &sched_group_allnodes[group];
168 p = sd;
169 } else
170 p = NULL;
172 sd = &per_cpu(node_domains, i);
173 *sd = SD_NODE_INIT;
174 sd->span = sched_domain_node_span(cpu_to_node(i));
175 sd->parent = p;
176 cpus_and(sd->span, sd->span, cpu_default_map);
177 #endif
179 p = sd;
180 sd = &per_cpu(phys_domains, i);
181 group = cpu_to_phys_group(i);
182 *sd = SD_CPU_INIT;
183 sd->span = nodemask;
184 sd->parent = p;
185 sd->groups = &sched_group_phys[group];
187 #ifdef CONFIG_SCHED_SMT
188 p = sd;
189 sd = &per_cpu(cpu_domains, i);
190 group = cpu_to_cpu_group(i);
191 *sd = SD_SIBLING_INIT;
192 sd->span = cpu_sibling_map[i];
193 cpus_and(sd->span, sd->span, cpu_default_map);
194 sd->parent = p;
195 sd->groups = &sched_group_cpus[group];
196 #endif
199 #ifdef CONFIG_SCHED_SMT
200 /* Set up CPU (sibling) groups */
201 for_each_cpu_mask(i, cpu_default_map) {
202 cpumask_t this_sibling_map = cpu_sibling_map[i];
203 cpus_and(this_sibling_map, this_sibling_map, cpu_default_map);
204 if (i != first_cpu(this_sibling_map))
205 continue;
207 init_sched_build_groups(sched_group_cpus, this_sibling_map,
208 &cpu_to_cpu_group);
210 #endif
212 /* Set up physical groups */
213 for (i = 0; i < MAX_NUMNODES; i++) {
214 cpumask_t nodemask = node_to_cpumask(i);
216 cpus_and(nodemask, nodemask, cpu_default_map);
217 if (cpus_empty(nodemask))
218 continue;
220 init_sched_build_groups(sched_group_phys, nodemask,
221 &cpu_to_phys_group);
224 #ifdef CONFIG_NUMA
225 init_sched_build_groups(sched_group_allnodes, cpu_default_map,
226 &cpu_to_allnodes_group);
228 for (i = 0; i < MAX_NUMNODES; i++) {
229 /* Set up node groups */
230 struct sched_group *sg, *prev;
231 cpumask_t nodemask = node_to_cpumask(i);
232 cpumask_t domainspan;
233 cpumask_t covered = CPU_MASK_NONE;
234 int j;
236 cpus_and(nodemask, nodemask, cpu_default_map);
237 if (cpus_empty(nodemask))
238 continue;
240 domainspan = sched_domain_node_span(i);
241 cpus_and(domainspan, domainspan, cpu_default_map);
243 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
244 sched_group_nodes[i] = sg;
245 for_each_cpu_mask(j, nodemask) {
246 struct sched_domain *sd;
247 sd = &per_cpu(node_domains, j);
248 sd->groups = sg;
249 if (sd->groups == NULL) {
250 /* Turn off balancing if we have no groups */
251 sd->flags = 0;
254 if (!sg) {
255 printk(KERN_WARNING
256 "Can not alloc domain group for node %d\n", i);
257 continue;
259 sg->cpu_power = 0;
260 sg->cpumask = nodemask;
261 cpus_or(covered, covered, nodemask);
262 prev = sg;
264 for (j = 0; j < MAX_NUMNODES; j++) {
265 cpumask_t tmp, notcovered;
266 int n = (i + j) % MAX_NUMNODES;
268 cpus_complement(notcovered, covered);
269 cpus_and(tmp, notcovered, cpu_default_map);
270 cpus_and(tmp, tmp, domainspan);
271 if (cpus_empty(tmp))
272 break;
274 nodemask = node_to_cpumask(n);
275 cpus_and(tmp, tmp, nodemask);
276 if (cpus_empty(tmp))
277 continue;
279 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
280 if (!sg) {
281 printk(KERN_WARNING
282 "Can not alloc domain group for node %d\n", j);
283 break;
285 sg->cpu_power = 0;
286 sg->cpumask = tmp;
287 cpus_or(covered, covered, tmp);
288 prev->next = sg;
289 prev = sg;
291 prev->next = sched_group_nodes[i];
293 #endif
295 /* Calculate CPU power for physical packages and nodes */
296 for_each_cpu_mask(i, cpu_default_map) {
297 int power;
298 struct sched_domain *sd;
299 #ifdef CONFIG_SCHED_SMT
300 sd = &per_cpu(cpu_domains, i);
301 power = SCHED_LOAD_SCALE;
302 sd->groups->cpu_power = power;
303 #endif
305 sd = &per_cpu(phys_domains, i);
306 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
307 (cpus_weight(sd->groups->cpumask)-1) / 10;
308 sd->groups->cpu_power = power;
310 #ifdef CONFIG_NUMA
311 sd = &per_cpu(allnodes_domains, i);
312 if (sd->groups) {
313 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
314 (cpus_weight(sd->groups->cpumask)-1) / 10;
315 sd->groups->cpu_power = power;
317 #endif
320 #ifdef CONFIG_NUMA
321 for (i = 0; i < MAX_NUMNODES; i++) {
322 struct sched_group *sg = sched_group_nodes[i];
323 int j;
325 if (sg == NULL)
326 continue;
327 next_sg:
328 for_each_cpu_mask(j, sg->cpumask) {
329 struct sched_domain *sd;
330 int power;
332 sd = &per_cpu(phys_domains, j);
333 if (j != first_cpu(sd->groups->cpumask)) {
335 * Only add "power" once for each
336 * physical package.
338 continue;
340 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
341 (cpus_weight(sd->groups->cpumask)-1) / 10;
343 sg->cpu_power += power;
345 sg = sg->next;
346 if (sg != sched_group_nodes[i])
347 goto next_sg;
349 #endif
351 /* Attach the domains */
352 for_each_online_cpu(i) {
353 struct sched_domain *sd;
354 #ifdef CONFIG_SCHED_SMT
355 sd = &per_cpu(cpu_domains, i);
356 #else
357 sd = &per_cpu(phys_domains, i);
358 #endif
359 cpu_attach_domain(sd, i);
363 void __devinit arch_destroy_sched_domains(void)
365 #ifdef CONFIG_NUMA
366 int i;
367 for (i = 0; i < MAX_NUMNODES; i++) {
368 struct sched_group *oldsg, *sg = sched_group_nodes[i];
369 if (sg == NULL)
370 continue;
371 sg = sg->next;
372 next_sg:
373 oldsg = sg;
374 sg = sg->next;
375 kfree(oldsg);
376 if (oldsg != sched_group_nodes[i])
377 goto next_sg;
378 sched_group_nodes[i] = NULL;
380 #endif