1 /* sched.c - SPU scheduler.
3 * Copyright (C) IBM 2005
4 * Author: Mark Nutter <mnutter@us.ibm.com>
6 * 2006-03-31 NUMA domains added.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2, or (at your option)
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include <linux/module.h>
26 #include <linux/errno.h>
27 #include <linux/sched.h>
28 #include <linux/kernel.h>
30 #include <linux/completion.h>
31 #include <linux/vmalloc.h>
32 #include <linux/smp.h>
33 #include <linux/smp_lock.h>
34 #include <linux/stddef.h>
35 #include <linux/unistd.h>
36 #include <linux/numa.h>
37 #include <linux/mutex.h>
38 #include <linux/notifier.h>
41 #include <asm/mmu_context.h>
43 #include <asm/spu_csa.h>
44 #include <asm/spu_priv1.h>
47 #define SPU_TIMESLICE (HZ)
49 struct spu_prio_array
{
50 DECLARE_BITMAP(bitmap
, MAX_PRIO
);
51 struct list_head runq
[MAX_PRIO
];
53 struct list_head active_list
[MAX_NUMNODES
];
54 struct mutex active_mutex
[MAX_NUMNODES
];
57 static struct spu_prio_array
*spu_prio
;
58 static struct workqueue_struct
*spu_sched_wq
;
60 static inline int node_allowed(int node
)
64 if (!nr_cpus_node(node
))
66 mask
= node_to_cpumask(node
);
67 if (!cpus_intersects(mask
, current
->cpus_allowed
))
72 void spu_start_tick(struct spu_context
*ctx
)
74 if (ctx
->policy
== SCHED_RR
)
75 queue_delayed_work(spu_sched_wq
, &ctx
->sched_work
, SPU_TIMESLICE
);
78 void spu_stop_tick(struct spu_context
*ctx
)
80 if (ctx
->policy
== SCHED_RR
)
81 cancel_delayed_work(&ctx
->sched_work
);
84 void spu_sched_tick(struct work_struct
*work
)
86 struct spu_context
*ctx
=
87 container_of(work
, struct spu_context
, sched_work
.work
);
91 mutex_lock(&ctx
->state_mutex
);
94 int best
= sched_find_first_bit(spu_prio
->bitmap
);
95 if (best
<= ctx
->prio
) {
100 mutex_unlock(&ctx
->state_mutex
);
107 * spu_add_to_active_list - add spu to active list
108 * @spu: spu to add to the active list
110 static void spu_add_to_active_list(struct spu
*spu
)
112 mutex_lock(&spu_prio
->active_mutex
[spu
->node
]);
113 list_add_tail(&spu
->list
, &spu_prio
->active_list
[spu
->node
]);
114 mutex_unlock(&spu_prio
->active_mutex
[spu
->node
]);
118 * spu_remove_from_active_list - remove spu from active list
119 * @spu: spu to remove from the active list
121 static void spu_remove_from_active_list(struct spu
*spu
)
123 int node
= spu
->node
;
125 mutex_lock(&spu_prio
->active_mutex
[node
]);
126 list_del_init(&spu
->list
);
127 mutex_unlock(&spu_prio
->active_mutex
[node
]);
130 static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier
);
132 static void spu_switch_notify(struct spu
*spu
, struct spu_context
*ctx
)
134 blocking_notifier_call_chain(&spu_switch_notifier
,
135 ctx
? ctx
->object_id
: 0, spu
);
138 int spu_switch_event_register(struct notifier_block
* n
)
140 return blocking_notifier_chain_register(&spu_switch_notifier
, n
);
143 int spu_switch_event_unregister(struct notifier_block
* n
)
145 return blocking_notifier_chain_unregister(&spu_switch_notifier
, n
);
149 * spu_bind_context - bind spu context to physical spu
150 * @spu: physical spu to bind to
151 * @ctx: context to bind
153 static void spu_bind_context(struct spu
*spu
, struct spu_context
*ctx
)
155 pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__
, current
->pid
,
156 spu
->number
, spu
->node
);
160 ctx
->ops
= &spu_hw_ops
;
161 spu
->pid
= current
->pid
;
162 spu_associate_mm(spu
, ctx
->owner
);
163 spu
->ibox_callback
= spufs_ibox_callback
;
164 spu
->wbox_callback
= spufs_wbox_callback
;
165 spu
->stop_callback
= spufs_stop_callback
;
166 spu
->mfc_callback
= spufs_mfc_callback
;
167 spu
->dma_callback
= spufs_dma_callback
;
169 spu_unmap_mappings(ctx
);
170 spu_restore(&ctx
->csa
, spu
);
171 spu
->timestamp
= jiffies
;
172 spu_cpu_affinity_set(spu
, raw_smp_processor_id());
173 spu_switch_notify(spu
, ctx
);
174 spu_add_to_active_list(spu
);
175 ctx
->state
= SPU_STATE_RUNNABLE
;
179 * spu_unbind_context - unbind spu context from physical spu
180 * @spu: physical spu to unbind from
181 * @ctx: context to unbind
183 static void spu_unbind_context(struct spu
*spu
, struct spu_context
*ctx
)
185 pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__
,
186 spu
->pid
, spu
->number
, spu
->node
);
188 spu_remove_from_active_list(spu
);
189 spu_switch_notify(spu
, NULL
);
190 spu_unmap_mappings(ctx
);
191 spu_save(&ctx
->csa
, spu
);
192 spu
->timestamp
= jiffies
;
193 ctx
->state
= SPU_STATE_SAVED
;
194 spu
->ibox_callback
= NULL
;
195 spu
->wbox_callback
= NULL
;
196 spu
->stop_callback
= NULL
;
197 spu
->mfc_callback
= NULL
;
198 spu
->dma_callback
= NULL
;
199 spu_associate_mm(spu
, NULL
);
201 ctx
->ops
= &spu_backing_ops
;
208 * spu_add_to_rq - add a context to the runqueue
209 * @ctx: context to add
211 static void spu_add_to_rq(struct spu_context
*ctx
)
213 spin_lock(&spu_prio
->runq_lock
);
214 list_add_tail(&ctx
->rq
, &spu_prio
->runq
[ctx
->prio
]);
215 set_bit(ctx
->prio
, spu_prio
->bitmap
);
216 spin_unlock(&spu_prio
->runq_lock
);
220 * spu_del_from_rq - remove a context from the runqueue
221 * @ctx: context to remove
223 static void spu_del_from_rq(struct spu_context
*ctx
)
225 spin_lock(&spu_prio
->runq_lock
);
226 list_del_init(&ctx
->rq
);
227 if (list_empty(&spu_prio
->runq
[ctx
->prio
]))
228 clear_bit(ctx
->prio
, spu_prio
->bitmap
);
229 spin_unlock(&spu_prio
->runq_lock
);
233 * spu_grab_context - remove one context from the runqueue
234 * @prio: priority of the context to be removed
236 * This function removes one context from the runqueue for priority @prio.
237 * If there is more than one context with the given priority the first
238 * task on the runqueue will be taken.
240 * Returns the spu_context it just removed.
242 * Must be called with spu_prio->runq_lock held.
244 static struct spu_context
*spu_grab_context(int prio
)
246 struct list_head
*rq
= &spu_prio
->runq
[prio
];
250 return list_entry(rq
->next
, struct spu_context
, rq
);
253 static void spu_prio_wait(struct spu_context
*ctx
)
257 prepare_to_wait_exclusive(&ctx
->stop_wq
, &wait
, TASK_INTERRUPTIBLE
);
258 if (!signal_pending(current
)) {
259 mutex_unlock(&ctx
->state_mutex
);
261 mutex_lock(&ctx
->state_mutex
);
263 __set_current_state(TASK_RUNNING
);
264 remove_wait_queue(&ctx
->stop_wq
, &wait
);
268 * spu_reschedule - try to find a runnable context for a spu
269 * @spu: spu available
271 * This function is called whenever a spu becomes idle. It looks for the
272 * most suitable runnable spu context and schedules it for execution.
274 static void spu_reschedule(struct spu
*spu
)
280 spin_lock(&spu_prio
->runq_lock
);
281 best
= sched_find_first_bit(spu_prio
->bitmap
);
282 if (best
< MAX_PRIO
) {
283 struct spu_context
*ctx
= spu_grab_context(best
);
285 wake_up(&ctx
->stop_wq
);
287 spin_unlock(&spu_prio
->runq_lock
);
290 static struct spu
*spu_get_idle(struct spu_context
*ctx
)
292 struct spu
*spu
= NULL
;
293 int node
= cpu_to_node(raw_smp_processor_id());
296 for (n
= 0; n
< MAX_NUMNODES
; n
++, node
++) {
297 node
= (node
< MAX_NUMNODES
) ? node
: 0;
298 if (!node_allowed(node
))
300 spu
= spu_alloc_node(node
);
308 * find_victim - find a lower priority context to preempt
309 * @ctx: canidate context for running
311 * Returns the freed physical spu to run the new context on.
313 static struct spu
*find_victim(struct spu_context
*ctx
)
315 struct spu_context
*victim
= NULL
;
320 * Look for a possible preemption candidate on the local node first.
321 * If there is no candidate look at the other nodes. This isn't
322 * exactly fair, but so far the whole spu schedule tries to keep
323 * a strong node affinity. We might want to fine-tune this in
327 node
= cpu_to_node(raw_smp_processor_id());
328 for (n
= 0; n
< MAX_NUMNODES
; n
++, node
++) {
329 node
= (node
< MAX_NUMNODES
) ? node
: 0;
330 if (!node_allowed(node
))
333 mutex_lock(&spu_prio
->active_mutex
[node
]);
334 list_for_each_entry(spu
, &spu_prio
->active_list
[node
], list
) {
335 struct spu_context
*tmp
= spu
->ctx
;
337 if (tmp
->rt_priority
< ctx
->rt_priority
&&
338 (!victim
|| tmp
->rt_priority
< victim
->rt_priority
))
341 mutex_unlock(&spu_prio
->active_mutex
[node
]);
345 * This nests ctx->state_mutex, but we always lock
346 * higher priority contexts before lower priority
347 * ones, so this is safe until we introduce
348 * priority inheritance schemes.
350 if (!mutex_trylock(&victim
->state_mutex
)) {
358 * This race can happen because we've dropped
359 * the active list mutex. No a problem, just
360 * restart the search.
362 mutex_unlock(&victim
->state_mutex
);
366 spu_unbind_context(spu
, victim
);
367 mutex_unlock(&victim
->state_mutex
);
376 * spu_activate - find a free spu for a context and execute it
377 * @ctx: spu context to schedule
378 * @flags: flags (currently ignored)
380 * Tries to find a free spu to run @ctx. If no free spu is availble
381 * add the context to the runqueue so it gets woken up once an spu
384 int spu_activate(struct spu_context
*ctx
, unsigned long flags
)
393 spu
= spu_get_idle(ctx
);
395 * If this is a realtime thread we try to get it running by
396 * preempting a lower priority thread.
398 if (!spu
&& ctx
->rt_priority
)
399 spu
= find_victim(ctx
);
401 spu_bind_context(spu
, ctx
);
407 spu_del_from_rq(ctx
);
408 } while (!signal_pending(current
));
414 * spu_deactivate - unbind a context from it's physical spu
415 * @ctx: spu context to unbind
417 * Unbind @ctx from the physical spu it is running on and schedule
418 * the highest priority context to run on the freed physical spu.
420 void spu_deactivate(struct spu_context
*ctx
)
422 struct spu
*spu
= ctx
->spu
;
425 spu_unbind_context(spu
, ctx
);
431 * spu_yield - yield a physical spu if others are waiting
432 * @ctx: spu context to yield
434 * Check if there is a higher priority context waiting and if yes
435 * unbind @ctx from the physical spu and schedule the highest
436 * priority context to run on the freed physical spu instead.
438 void spu_yield(struct spu_context
*ctx
)
442 if (mutex_trylock(&ctx
->state_mutex
)) {
443 if ((spu
= ctx
->spu
) != NULL
) {
444 int best
= sched_find_first_bit(spu_prio
->bitmap
);
445 if (best
< MAX_PRIO
) {
446 pr_debug("%s: yielding SPU %d NODE %d\n",
447 __FUNCTION__
, spu
->number
, spu
->node
);
451 mutex_unlock(&ctx
->state_mutex
);
455 int __init
spu_sched_init(void)
459 spu_sched_wq
= create_singlethread_workqueue("spusched");
463 spu_prio
= kzalloc(sizeof(struct spu_prio_array
), GFP_KERNEL
);
465 printk(KERN_WARNING
"%s: Unable to allocate priority queue.\n",
467 destroy_workqueue(spu_sched_wq
);
470 for (i
= 0; i
< MAX_PRIO
; i
++) {
471 INIT_LIST_HEAD(&spu_prio
->runq
[i
]);
472 __clear_bit(i
, spu_prio
->bitmap
);
474 __set_bit(MAX_PRIO
, spu_prio
->bitmap
);
475 for (i
= 0; i
< MAX_NUMNODES
; i
++) {
476 mutex_init(&spu_prio
->active_mutex
[i
]);
477 INIT_LIST_HEAD(&spu_prio
->active_list
[i
]);
479 spin_lock_init(&spu_prio
->runq_lock
);
483 void __exit
spu_sched_exit(void)
485 struct spu
*spu
, *tmp
;
488 for (node
= 0; node
< MAX_NUMNODES
; node
++) {
489 mutex_lock(&spu_prio
->active_mutex
[node
]);
490 list_for_each_entry_safe(spu
, tmp
, &spu_prio
->active_list
[node
],
492 list_del_init(&spu
->list
);
495 mutex_unlock(&spu_prio
->active_mutex
[node
]);
498 destroy_workqueue(spu_sched_wq
);